# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
#
# SPDX-License-Identifier: GPL-3.0-or-later

import click

from clapper.click import verbosity_option
from clapper.logging import setup

logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s")


def _load(data):
    """Load prediction.csv files.

    Parameters
    ----------

    data : dict
        A dict in which keys are the names of the systems and the values are
        paths to ``predictions.csv`` style files.


    Returns
    -------

    data : dict
        A dict in which keys are the names of the systems and the values are
        dictionaries that contain two keys:

        * ``df``: A :py:class:`pandas.DataFrame` with the predictions data
          loaded to
    """
    import re

    import numpy
    import pandas

    def _to_double_tensor(col):
        """Converts a column in a dataframe to a tensor array."""
        pattern = re.compile(" +")
        return col.apply(lambda cell: numpy.array(eval(pattern.sub(",", cell))))

    # loads all data
    retval = {}
    for name, predictions_path in data.items():
        # Load predictions
        logger.info(f"Loading predictions from {predictions_path}...")
        pred_data = pandas.read_csv(predictions_path)
        pred_data["likelihood"] = _to_double_tensor(pred_data["likelihood"])
        pred_data["ground_truth"] = _to_double_tensor(pred_data["ground_truth"])
        retval[name] = dict(df=pred_data)

    return retval


@click.command(
    epilog="""Examples:

\b
    1. Convert predictions of radiological signs to a JSON dataset file:

       .. code:: sh

          ptbench predtojson -vv train path/to/train/predictions.csv test path/to/test/predictions.csv
""",
)
@click.argument(
    "label_path",
    nargs=-1,
)
@click.option(
    "--output-folder",
    "-f",
    help="Path where to store the json file (created if does not exist)",
    required=False,
    default=None,
    type=click.Path(dir_okay=True, file_okay=False),
)
@verbosity_option(logger=logger, expose_value=False)
def predtojson(label_path, output_folder) -> None:
    """Convert predictions to dataset."""
    import os
    import shutil

    import click
    import torch

    # hack to get a dictionary from arguments passed to input
    if len(label_path) % 2 != 0:
        raise click.ClickException(
            "Input label-paths should be doubles"
            " composed of name-path entries"
        )
    data = dict(zip(label_path[::2], label_path[1::2]))

    # load all data measures
    data = _load(data)

    logger.info(f"Output folder: {output_folder}")
    os.makedirs(output_folder, exist_ok=True)

    output_file = os.path.join(output_folder, "dataset.json")
    if os.path.exists(output_file):
        backup = output_file + "~"
        if os.path.exists(backup):
            os.unlink(backup)
        shutil.move(output_file, backup)

    logger.info("Saving JSON file...")
    with open(output_file, "a+", newline="") as f:
        f.write("{")
        for i, (name, value) in enumerate(data.items()):
            if i > 0:
                f.write(",")

            df = value["df"]
            f.write('"' + name + '": [')
            for index, row in df.iterrows():
                if index > 0:
                    f.write(",")
                f.write('["' + row["filename"] + '", ')
                f.write(str(row["ground_truth"][0].item()))
                f.write(",")
                f.write(
                    str(
                        [
                            format(x, ".20f")
                            for x in torch.tensor(row["likelihood"]).tolist()
                        ]
                    ).replace("'", "")
                )
                f.write("]")
            f.write("]")
        f.write("}")