# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> # # SPDX-License-Identifier: GPL-3.0-or-later import click from clapper.click import verbosity_option from clapper.logging import setup logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") def _load(data): """Load prediction.csv files. Parameters ---------- data : dict A dict in which keys are the names of the systems and the values are paths to ``predictions.csv`` style files. Returns ------- data : dict A dict in which keys are the names of the systems and the values are dictionaries that contain two keys: * ``df``: A :py:class:`pandas.DataFrame` with the predictions data loaded to """ import re import numpy import pandas def _to_double_tensor(col): """Converts a column in a dataframe to a tensor array.""" pattern = re.compile(" +") return col.apply(lambda cell: numpy.array(eval(pattern.sub(",", cell)))) # loads all data retval = {} for name, predictions_path in data.items(): # Load predictions logger.info(f"Loading predictions from {predictions_path}...") pred_data = pandas.read_csv(predictions_path) pred_data["likelihood"] = _to_double_tensor(pred_data["likelihood"]) pred_data["ground_truth"] = _to_double_tensor(pred_data["ground_truth"]) retval[name] = dict(df=pred_data) return retval @click.command( epilog="""Examples: \b 1. Convert predictions of radiological signs to a JSON dataset file: .. code:: sh ptbench predtojson -vv train path/to/train/predictions.csv test path/to/test/predictions.csv """, ) @click.argument( "label_path", nargs=-1, ) @click.option( "--output-folder", "-f", help="Path where to store the json file (created if does not exist)", required=False, default=None, type=click.Path(dir_okay=True, file_okay=False), ) @verbosity_option(logger=logger, expose_value=False) def predtojson(label_path, output_folder) -> None: """Convert predictions to dataset.""" import os import shutil import click import torch # hack to get a dictionary from arguments passed to input if len(label_path) % 2 != 0: raise click.ClickException( "Input label-paths should be doubles" " composed of name-path entries" ) data = dict(zip(label_path[::2], label_path[1::2])) # load all data measures data = _load(data) logger.info(f"Output folder: {output_folder}") os.makedirs(output_folder, exist_ok=True) output_file = os.path.join(output_folder, "dataset.json") if os.path.exists(output_file): backup = output_file + "~" if os.path.exists(backup): os.unlink(backup) shutil.move(output_file, backup) logger.info("Saving JSON file...") with open(output_file, "a+", newline="") as f: f.write("{") for i, (name, value) in enumerate(data.items()): if i > 0: f.write(",") df = value["df"] f.write('"' + name + '": [') for index, row in df.iterrows(): if index > 0: f.write(",") f.write('["' + row["filename"] + '", ') f.write(str(row["ground_truth"][0].item())) f.write(",") f.write( str( [ format(x, ".20f") for x in torch.tensor(row["likelihood"]).tolist() ] ).replace("'", "") ) f.write("]") f.write("]") f.write("}")