Skip to content
Snippets Groups Projects
Commit 01ae2cbb authored by André Anjos's avatar André Anjos :speech_balloon:
Browse files

[scripts] Remove outdated aggregpred and predtojson scripts and associated tests

parent 222e8515
No related branches found
No related tags found
1 merge request!6Making use of LightningDataModule and simplification of data loading
Pipeline #77153 passed
# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
#
# SPDX-License-Identifier: GPL-3.0-or-later
import click
from clapper.click import verbosity_option
from clapper.logging import setup
logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s")
@click.command(
epilog="""Examples:
\b
1. Aggregate multiple predictions csv files into one:
.. code:: sh
ptbench aggregpred -vv path/to/train/predictions.csv path/to/test/predictions.csv
""",
)
@click.argument(
"label_path",
nargs=-1,
)
@click.option(
"--output-folder",
"-f",
help="Path where to store the aggregated csv file (created if necessary)",
required=False,
default=None,
type=click.Path(dir_okay=True, file_okay=False),
)
@verbosity_option(logger=logger, expose_value=False)
def aggregpred(label_path, output_folder) -> None:
"""Aggregate multiple predictions csv files into one."""
import os
import re
import shutil
import pandas
import torch
# loads all data
series = []
for predictions_path in label_path:
# Load predictions
logger.info(f"Loading predictions from {predictions_path}...")
pred_data = pandas.read_csv(predictions_path)
pred = (
torch.Tensor(
[
eval(
re.sub(" +", " ", x.replace("\n", "")).replace(" ", ",")
)
for x in pred_data["likelihood"].values
]
)
.double()
.flatten()
)
gt = (
torch.Tensor(
[
eval(
re.sub(" +", " ", x.replace("\n", "")).replace(" ", ",")
)
for x in pred_data["ground_truth"].values
]
)
.double()
.flatten()
)
pred_data["likelihood"] = pred
pred_data["ground_truth"] = gt
series.append(pred_data)
df = pandas.concat([s for s in series])
logger.info(f"Output folder: {output_folder}")
os.makedirs(output_folder, exist_ok=True)
output_file = os.path.join(output_folder, "aggregpred.csv")
if os.path.exists(output_file):
backup = output_file + "~"
if os.path.exists(backup):
os.unlink(backup)
shutil.move(output_file, backup)
logger.info("Saving aggregated CSV file...")
df.to_csv(output_file, index=False, header=True)
...@@ -7,14 +7,12 @@ import click ...@@ -7,14 +7,12 @@ import click
from clapper.click import AliasedGroup from clapper.click import AliasedGroup
from . import ( from . import (
aggregpred,
compare, compare,
config, config,
database, database,
evaluate, evaluate,
experiment, experiment,
predict, predict,
predtojson,
train, train,
train_analysis, train_analysis,
) )
...@@ -29,13 +27,11 @@ def cli(): ...@@ -29,13 +27,11 @@ def cli():
pass pass
cli.add_command(aggregpred.aggregpred)
cli.add_command(compare.compare) cli.add_command(compare.compare)
cli.add_command(config.config) cli.add_command(config.config)
cli.add_command(database.database) cli.add_command(database.database)
cli.add_command(evaluate.evaluate) cli.add_command(evaluate.evaluate)
cli.add_command(experiment.experiment) cli.add_command(experiment.experiment)
cli.add_command(predict.predict) cli.add_command(predict.predict)
cli.add_command(predtojson.predtojson)
cli.add_command(train.train) cli.add_command(train.train)
cli.add_command(train_analysis.train_analysis) cli.add_command(train_analysis.train_analysis)
# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
#
# SPDX-License-Identifier: GPL-3.0-or-later
import click
from clapper.click import verbosity_option
from clapper.logging import setup
logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s")
def _load(data):
"""Load prediction.csv files.
Parameters
----------
data : dict
A dict in which keys are the names of the systems and the values are
paths to ``predictions.csv`` style files.
Returns
-------
data : dict
A dict in which keys are the names of the systems and the values are
dictionaries that contain two keys:
* ``df``: A :py:class:`pandas.DataFrame` with the predictions data
loaded to
"""
import re
import numpy
import pandas
def _to_double_tensor(col):
"""Converts a column in a dataframe to a tensor array."""
pattern = re.compile(" +")
return col.apply(lambda cell: numpy.array(eval(pattern.sub(",", cell))))
# loads all data
retval = {}
for name, predictions_path in data.items():
# Load predictions
logger.info(f"Loading predictions from {predictions_path}...")
pred_data = pandas.read_csv(predictions_path)
pred_data["likelihood"] = _to_double_tensor(pred_data["likelihood"])
pred_data["ground_truth"] = _to_double_tensor(pred_data["ground_truth"])
retval[name] = dict(df=pred_data)
return retval
@click.command(
epilog="""Examples:
\b
1. Convert predictions of radiological signs to a JSON dataset file:
.. code:: sh
ptbench predtojson -vv train path/to/train/predictions.csv test path/to/test/predictions.csv
""",
)
@click.argument(
"label_path",
nargs=-1,
)
@click.option(
"--output-folder",
"-f",
help="Path where to store the json file (created if does not exist)",
required=False,
default=None,
type=click.Path(dir_okay=True, file_okay=False),
)
@verbosity_option(logger=logger, expose_value=False)
def predtojson(label_path, output_folder) -> None:
"""Convert predictions to dataset."""
import os
import shutil
import click
import torch
# hack to get a dictionary from arguments passed to input
if len(label_path) % 2 != 0:
raise click.ClickException(
"Input label-paths should be doubles"
" composed of name-path entries"
)
data = dict(zip(label_path[::2], label_path[1::2]))
# load all data measures
data = _load(data)
logger.info(f"Output folder: {output_folder}")
os.makedirs(output_folder, exist_ok=True)
output_file = os.path.join(output_folder, "dataset.json")
if os.path.exists(output_file):
backup = output_file + "~"
if os.path.exists(backup):
os.unlink(backup)
shutil.move(output_file, backup)
logger.info("Saving JSON file...")
with open(output_file, "a+", newline="") as f:
f.write("{")
for i, (name, value) in enumerate(data.items()):
if i > 0:
f.write(",")
df = value["df"]
f.write('"' + name + '": [')
for index, row in df.iterrows():
if index > 0:
f.write(",")
f.write('["' + row["filename"] + '", ')
f.write(str(row["ground_truth"][0].item()))
f.write(",")
f.write(
str(
[
format(x, ".20f")
for x in torch.tensor(row["likelihood"]).tolist()
]
).replace("'", "")
)
f.write("]")
f.write("]")
f.write("}")
...@@ -148,18 +148,6 @@ def test_predict_help(): ...@@ -148,18 +148,6 @@ def test_predict_help():
_check_help(predict) _check_help(predict)
def test_predtojson_help():
from ptbench.scripts.predtojson import predtojson
_check_help(predtojson)
def test_aggregpred_help():
from ptbench.scripts.aggregpred import aggregpred
_check_help(aggregpred)
def test_evaluate_help(): def test_evaluate_help():
from ptbench.scripts.evaluate import evaluate from ptbench.scripts.evaluate import evaluate
...@@ -378,47 +366,6 @@ def test_predict_pasa_montgomery(temporary_basedir, datadir): ...@@ -378,47 +366,6 @@ def test_predict_pasa_montgomery(temporary_basedir, datadir):
) )
@pytest.mark.skip_if_rc_var_not_set("datadir.montgomery")
def test_predtojson(datadir, temporary_basedir):
from ptbench.scripts.predtojson import predtojson
runner = CliRunner()
with stdout_logging() as buf:
predictions = str(datadir / "test_predictions.csv")
output_folder = str(temporary_basedir / "pred_to_json")
result = runner.invoke(
predtojson,
[
"-vv",
"train",
f"{predictions}",
"test",
f"{predictions}",
f"--output-folder={output_folder}",
],
)
_assert_exit_0(result)
# check json file is there
assert os.path.exists(os.path.join(output_folder, "dataset.json"))
keywords = {
f"Output folder: {output_folder}": 1,
r"Saving JSON file...": 1,
r"^Loading predictions from.*$": 2,
}
buf.seek(0)
logging_output = buf.read()
for k, v in keywords.items():
assert _str_counter(k, logging_output) == v, (
f"Count for string '{k}' appeared "
f"({_str_counter(k, logging_output)}) "
f"instead of the expected {v}:\nOutput:\n{logging_output}"
)
@pytest.mark.skip(reason="Test need to be updated") @pytest.mark.skip(reason="Test need to be updated")
@pytest.mark.skip_if_rc_var_not_set("datadir.montgomery") @pytest.mark.skip_if_rc_var_not_set("datadir.montgomery")
def test_evaluate_pasa_montgomery(temporary_basedir): def test_evaluate_pasa_montgomery(temporary_basedir):
...@@ -708,46 +655,6 @@ def test_predict_logreg_montgomery_rs(temporary_basedir, datadir): ...@@ -708,46 +655,6 @@ def test_predict_logreg_montgomery_rs(temporary_basedir, datadir):
) )
@pytest.mark.skip(reason="Test need to be updated")
@pytest.mark.skip_if_rc_var_not_set("datadir.montgomery")
def test_aggregpred(temporary_basedir):
from ptbench.scripts.aggregpred import aggregpred
runner = CliRunner()
with stdout_logging() as buf:
predictions = str(temporary_basedir / "predictions" / "test.csv")
output_folder = str(temporary_basedir / "aggregpred")
result = runner.invoke(
aggregpred,
[
"-vv",
f"{predictions}",
f"{predictions}",
f"--output-folder={output_folder}",
],
)
_assert_exit_0(result)
# check csv file is there
assert os.path.exists(os.path.join(output_folder, "aggregpred.csv"))
keywords = {
f"Output folder: {output_folder}": 1,
r"Saving aggregated CSV file...": 1,
r"^Loading predictions from.*$": 2,
}
buf.seek(0)
logging_output = buf.read()
for k, v in keywords.items():
assert _str_counter(k, logging_output) == v, (
f"Count for string '{k}' appeared "
f"({_str_counter(k, logging_output)}) "
f"instead of the expected {v}:\nOutput:\n{logging_output}"
)
# Not enough RAM available to do this test # Not enough RAM available to do this test
# @pytest.mark.skip_if_rc_var_not_set("datadir.montgomery") # @pytest.mark.skip_if_rc_var_not_set("datadir.montgomery")
# def test_predict_densenetrs_montgomery(temporary_basedir, datadir): # def test_predict_densenetrs_montgomery(temporary_basedir, datadir):
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment