From 01ae2cbbb2e324d549d9402085a27f914f091c9f Mon Sep 17 00:00:00 2001 From: Andre Anjos <andre.dos.anjos@gmail.com> Date: Fri, 18 Aug 2023 08:51:27 +0200 Subject: [PATCH] [scripts] Remove outdated aggregpred and predtojson scripts and associated tests --- src/ptbench/scripts/aggregpred.py | 95 --------------------- src/ptbench/scripts/cli.py | 4 - src/ptbench/scripts/predtojson.py | 135 ------------------------------ tests/test_cli.py | 93 -------------------- 4 files changed, 327 deletions(-) delete mode 100644 src/ptbench/scripts/aggregpred.py delete mode 100644 src/ptbench/scripts/predtojson.py diff --git a/src/ptbench/scripts/aggregpred.py b/src/ptbench/scripts/aggregpred.py deleted file mode 100644 index ca174636..00000000 --- a/src/ptbench/scripts/aggregpred.py +++ /dev/null @@ -1,95 +0,0 @@ -# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -import click - -from clapper.click import verbosity_option -from clapper.logging import setup - -logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") - - -@click.command( - epilog="""Examples: - -\b - 1. Aggregate multiple predictions csv files into one: - - .. code:: sh - - ptbench aggregpred -vv path/to/train/predictions.csv path/to/test/predictions.csv -""", -) -@click.argument( - "label_path", - nargs=-1, -) -@click.option( - "--output-folder", - "-f", - help="Path where to store the aggregated csv file (created if necessary)", - required=False, - default=None, - type=click.Path(dir_okay=True, file_okay=False), -) -@verbosity_option(logger=logger, expose_value=False) -def aggregpred(label_path, output_folder) -> None: - """Aggregate multiple predictions csv files into one.""" - import os - import re - import shutil - - import pandas - import torch - - # loads all data - series = [] - for predictions_path in label_path: - # Load predictions - logger.info(f"Loading predictions from {predictions_path}...") - pred_data = pandas.read_csv(predictions_path) - pred = ( - torch.Tensor( - [ - eval( - re.sub(" +", " ", x.replace("\n", "")).replace(" ", ",") - ) - for x in pred_data["likelihood"].values - ] - ) - .double() - .flatten() - ) - gt = ( - torch.Tensor( - [ - eval( - re.sub(" +", " ", x.replace("\n", "")).replace(" ", ",") - ) - for x in pred_data["ground_truth"].values - ] - ) - .double() - .flatten() - ) - - pred_data["likelihood"] = pred - pred_data["ground_truth"] = gt - - series.append(pred_data) - - df = pandas.concat([s for s in series]) - - logger.info(f"Output folder: {output_folder}") - os.makedirs(output_folder, exist_ok=True) - - output_file = os.path.join(output_folder, "aggregpred.csv") - if os.path.exists(output_file): - backup = output_file + "~" - if os.path.exists(backup): - os.unlink(backup) - shutil.move(output_file, backup) - - logger.info("Saving aggregated CSV file...") - df.to_csv(output_file, index=False, header=True) diff --git a/src/ptbench/scripts/cli.py b/src/ptbench/scripts/cli.py index a84c9d5b..9f27124b 100644 --- a/src/ptbench/scripts/cli.py +++ b/src/ptbench/scripts/cli.py @@ -7,14 +7,12 @@ import click from clapper.click import AliasedGroup from . import ( - aggregpred, compare, config, database, evaluate, experiment, predict, - predtojson, train, train_analysis, ) @@ -29,13 +27,11 @@ def cli(): pass -cli.add_command(aggregpred.aggregpred) cli.add_command(compare.compare) cli.add_command(config.config) cli.add_command(database.database) cli.add_command(evaluate.evaluate) cli.add_command(experiment.experiment) cli.add_command(predict.predict) -cli.add_command(predtojson.predtojson) cli.add_command(train.train) cli.add_command(train_analysis.train_analysis) diff --git a/src/ptbench/scripts/predtojson.py b/src/ptbench/scripts/predtojson.py deleted file mode 100644 index 2b782291..00000000 --- a/src/ptbench/scripts/predtojson.py +++ /dev/null @@ -1,135 +0,0 @@ -# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -import click - -from clapper.click import verbosity_option -from clapper.logging import setup - -logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") - - -def _load(data): - """Load prediction.csv files. - - Parameters - ---------- - - data : dict - A dict in which keys are the names of the systems and the values are - paths to ``predictions.csv`` style files. - - - Returns - ------- - - data : dict - A dict in which keys are the names of the systems and the values are - dictionaries that contain two keys: - - * ``df``: A :py:class:`pandas.DataFrame` with the predictions data - loaded to - """ - import re - - import numpy - import pandas - - def _to_double_tensor(col): - """Converts a column in a dataframe to a tensor array.""" - pattern = re.compile(" +") - return col.apply(lambda cell: numpy.array(eval(pattern.sub(",", cell)))) - - # loads all data - retval = {} - for name, predictions_path in data.items(): - # Load predictions - logger.info(f"Loading predictions from {predictions_path}...") - pred_data = pandas.read_csv(predictions_path) - pred_data["likelihood"] = _to_double_tensor(pred_data["likelihood"]) - pred_data["ground_truth"] = _to_double_tensor(pred_data["ground_truth"]) - retval[name] = dict(df=pred_data) - - return retval - - -@click.command( - epilog="""Examples: - -\b - 1. Convert predictions of radiological signs to a JSON dataset file: - - .. code:: sh - - ptbench predtojson -vv train path/to/train/predictions.csv test path/to/test/predictions.csv -""", -) -@click.argument( - "label_path", - nargs=-1, -) -@click.option( - "--output-folder", - "-f", - help="Path where to store the json file (created if does not exist)", - required=False, - default=None, - type=click.Path(dir_okay=True, file_okay=False), -) -@verbosity_option(logger=logger, expose_value=False) -def predtojson(label_path, output_folder) -> None: - """Convert predictions to dataset.""" - import os - import shutil - - import click - import torch - - # hack to get a dictionary from arguments passed to input - if len(label_path) % 2 != 0: - raise click.ClickException( - "Input label-paths should be doubles" - " composed of name-path entries" - ) - data = dict(zip(label_path[::2], label_path[1::2])) - - # load all data measures - data = _load(data) - - logger.info(f"Output folder: {output_folder}") - os.makedirs(output_folder, exist_ok=True) - - output_file = os.path.join(output_folder, "dataset.json") - if os.path.exists(output_file): - backup = output_file + "~" - if os.path.exists(backup): - os.unlink(backup) - shutil.move(output_file, backup) - - logger.info("Saving JSON file...") - with open(output_file, "a+", newline="") as f: - f.write("{") - for i, (name, value) in enumerate(data.items()): - if i > 0: - f.write(",") - - df = value["df"] - f.write('"' + name + '": [') - for index, row in df.iterrows(): - if index > 0: - f.write(",") - f.write('["' + row["filename"] + '", ') - f.write(str(row["ground_truth"][0].item())) - f.write(",") - f.write( - str( - [ - format(x, ".20f") - for x in torch.tensor(row["likelihood"]).tolist() - ] - ).replace("'", "") - ) - f.write("]") - f.write("]") - f.write("}") diff --git a/tests/test_cli.py b/tests/test_cli.py index e1a53b12..7b58b03d 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -148,18 +148,6 @@ def test_predict_help(): _check_help(predict) -def test_predtojson_help(): - from ptbench.scripts.predtojson import predtojson - - _check_help(predtojson) - - -def test_aggregpred_help(): - from ptbench.scripts.aggregpred import aggregpred - - _check_help(aggregpred) - - def test_evaluate_help(): from ptbench.scripts.evaluate import evaluate @@ -378,47 +366,6 @@ def test_predict_pasa_montgomery(temporary_basedir, datadir): ) -@pytest.mark.skip_if_rc_var_not_set("datadir.montgomery") -def test_predtojson(datadir, temporary_basedir): - from ptbench.scripts.predtojson import predtojson - - runner = CliRunner() - - with stdout_logging() as buf: - predictions = str(datadir / "test_predictions.csv") - output_folder = str(temporary_basedir / "pred_to_json") - result = runner.invoke( - predtojson, - [ - "-vv", - "train", - f"{predictions}", - "test", - f"{predictions}", - f"--output-folder={output_folder}", - ], - ) - _assert_exit_0(result) - - # check json file is there - assert os.path.exists(os.path.join(output_folder, "dataset.json")) - - keywords = { - f"Output folder: {output_folder}": 1, - r"Saving JSON file...": 1, - r"^Loading predictions from.*$": 2, - } - buf.seek(0) - logging_output = buf.read() - - for k, v in keywords.items(): - assert _str_counter(k, logging_output) == v, ( - f"Count for string '{k}' appeared " - f"({_str_counter(k, logging_output)}) " - f"instead of the expected {v}:\nOutput:\n{logging_output}" - ) - - @pytest.mark.skip(reason="Test need to be updated") @pytest.mark.skip_if_rc_var_not_set("datadir.montgomery") def test_evaluate_pasa_montgomery(temporary_basedir): @@ -708,46 +655,6 @@ def test_predict_logreg_montgomery_rs(temporary_basedir, datadir): ) -@pytest.mark.skip(reason="Test need to be updated") -@pytest.mark.skip_if_rc_var_not_set("datadir.montgomery") -def test_aggregpred(temporary_basedir): - from ptbench.scripts.aggregpred import aggregpred - - runner = CliRunner() - - with stdout_logging() as buf: - predictions = str(temporary_basedir / "predictions" / "test.csv") - output_folder = str(temporary_basedir / "aggregpred") - result = runner.invoke( - aggregpred, - [ - "-vv", - f"{predictions}", - f"{predictions}", - f"--output-folder={output_folder}", - ], - ) - _assert_exit_0(result) - - # check csv file is there - assert os.path.exists(os.path.join(output_folder, "aggregpred.csv")) - - keywords = { - f"Output folder: {output_folder}": 1, - r"Saving aggregated CSV file...": 1, - r"^Loading predictions from.*$": 2, - } - buf.seek(0) - logging_output = buf.read() - - for k, v in keywords.items(): - assert _str_counter(k, logging_output) == v, ( - f"Count for string '{k}' appeared " - f"({_str_counter(k, logging_output)}) " - f"instead of the expected {v}:\nOutput:\n{logging_output}" - ) - - # Not enough RAM available to do this test # @pytest.mark.skip_if_rc_var_not_set("datadir.montgomery") # def test_predict_densenetrs_montgomery(temporary_basedir, datadir): -- GitLab