Skip to content
Snippets Groups Projects
Commit bd4d1f75 authored by André Anjos's avatar André Anjos :speech_balloon:
Browse files

[tests] Separate test output folders

parent c2483554
Branches
Tags
1 merge request!46Create common library
...@@ -137,8 +137,19 @@ def evaluate( ...@@ -137,8 +137,19 @@ def evaluate(
# register metadata # register metadata
json_data: dict[str, typing.Any] = execution_metadata() json_data: dict[str, typing.Any] = execution_metadata()
json_data.update(
dict(
predictions=str(predictions),
output_folder=str(output_folder),
threshold=threshold,
binning=binning,
plot=plot,
),
)
json_data = {k.replace("_", "-"): v for k, v in json_data.items()} json_data = {k.replace("_", "-"): v for k, v in json_data.items()}
save_json_with_backup(evaluation_file.with_suffix(".meta.json"), json_data) evaluation_meta = evaluation_file.with_suffix(".meta.json")
logger.info(f"Saving evaluation metadata at `{str(evaluation_meta)}`...")
save_json_with_backup(evaluation_meta, json_data)
if threshold in predict_data: if threshold in predict_data:
# it is the name of a split # it is the name of a split
...@@ -161,7 +172,7 @@ def evaluate( ...@@ -161,7 +172,7 @@ def evaluate(
results: dict[str, dict[str, typing.Any]] = dict() results: dict[str, dict[str, typing.Any]] = dict()
for k, v in predict_data.items(): for k, v in predict_data.items():
logger.info(f"Analyzing split `{k}`...") logger.info(f"Computing performance on split `{k}`...")
results[k] = run_binary( results[k] = run_binary(
name=k, name=k,
predictions=v, predictions=v,
...@@ -170,7 +181,7 @@ def evaluate( ...@@ -170,7 +181,7 @@ def evaluate(
) )
# records full result analysis to a JSON file # records full result analysis to a JSON file
logger.info(f"Saving evaluation results at `{evaluation_file}`...") logger.info(f"Saving evaluation results at `{str(evaluation_file)}`...")
with evaluation_file.open("w") as f: with evaluation_file.open("w") as f:
json.dump(results, f, indent=2, cls=NumpyJSONEncoder) json.dump(results, f, indent=2, cls=NumpyJSONEncoder)
...@@ -190,11 +201,10 @@ def evaluate( ...@@ -190,11 +201,10 @@ def evaluate(
with table_path.open("w") as f: with table_path.open("w") as f:
f.write(table) f.write(table)
# dump evaluation plots in file
figure_path = evaluation_file.with_suffix(".pdf")
logger.info(f"Saving evaluation figures at `{figure_path}`...")
if plot: if plot:
figure_path = evaluation_file.with_suffix(".pdf")
logger.info(f"Saving evaluation figures at `{str(figure_path)}`...")
with PdfPages(figure_path) as pdf: with PdfPages(figure_path) as pdf:
pr_curves = {k: v["curves"]["precision_recall"] for k, v in results.items()} pr_curves = {k: v["curves"]["precision_recall"] for k, v in results.items()}
pr_fig = aggregate_pr(pr_curves) pr_fig = aggregate_pr(pr_curves)
......
...@@ -61,8 +61,8 @@ def experiment( ...@@ -61,8 +61,8 @@ def experiment(
└─ <output-folder>/ └─ <output-folder>/
├── model/ # the generated model will be here ├── model/ # the generated model will be here
├── predictions.json # the prediction outputs for the sets ├── predictions.json # the prediction outputs
── evaluation/ # the outputs of the evaluations for the sets ── evaluation.json # the evaluation outputs
""" """
experiment_start_timestamp = datetime.now() experiment_start_timestamp = datetime.now()
...@@ -112,11 +112,9 @@ def experiment( ...@@ -112,11 +112,9 @@ def experiment(
from .predict import predict from .predict import predict
predictions_output = output_folder / "predictions"
ctx.invoke( ctx.invoke(
predict, predict,
output_folder=predictions_output, output_folder=output_folder,
model=model, model=model,
datamodule=datamodule, datamodule=datamodule,
device=device, device=device,
...@@ -134,9 +132,9 @@ def experiment( ...@@ -134,9 +132,9 @@ def experiment(
from .evaluate import evaluate from .evaluate import evaluate
predictions_file = predictions_output / "predictions.json" predictions_file = output_folder / "predictions.json"
with (predictions_output / "predictions.json").open() as pf: with (output_folder / "predictions.json").open() as pf:
splits = json.load(pf).keys() splits = json.load(pf).keys()
if "validation" in splits: if "validation" in splits:
...@@ -159,56 +157,6 @@ def experiment( ...@@ -159,56 +157,6 @@ def experiment(
f"Prediction runtime: {evaluation_stop_timestamp-evaluation_start_timestamp}" f"Prediction runtime: {evaluation_stop_timestamp-evaluation_start_timestamp}"
) )
saliency_map_generation_start_timestamp = datetime.now()
logger.info(
f"Started saliency map generation at {saliency_map_generation_start_timestamp}"
)
from .saliency.generate import generate
saliencies_gen_folder = output_folder / "gradcam" / "saliencies"
ctx.invoke(
generate,
model=model,
datamodule=datamodule,
weight=train_output_folder,
output_folder=saliencies_gen_folder,
)
saliency_map_generation_stop_timestamp = datetime.now()
logger.info(
f"Ended saliency map generation at {saliency_map_generation_stop_timestamp}"
)
logger.info(
f"Saliency map generation runtime: {saliency_map_generation_stop_timestamp-saliency_map_generation_start_timestamp}"
)
saliency_images_generation_start_timestamp = datetime.now()
logger.info(
f"Started generating saliency images at {saliency_images_generation_start_timestamp}"
)
from .saliency.view import view
saliencies_view_folder = output_folder / "gradcam" / "visualizations"
ctx.invoke(
view,
model=model,
datamodule=datamodule,
input_folder=saliencies_gen_folder,
output_folder=saliencies_view_folder,
)
saliency_images_generation_stop_timestamp = datetime.now()
logger.info(
f"Ended saliency images generation at {saliency_images_generation_stop_timestamp}"
)
logger.info(
f"Saliency images generation runtime: {saliency_images_generation_stop_timestamp-saliency_images_generation_start_timestamp}"
)
experiment_stop_timestamp = datetime.now() experiment_stop_timestamp = datetime.now()
logger.info( logger.info(
f"Total experiment runtime: {experiment_stop_timestamp-experiment_start_timestamp}" f"Total experiment runtime: {experiment_stop_timestamp-experiment_start_timestamp}"
......
...@@ -14,6 +14,9 @@ from mednet.libs.segmentation.engine.evaluator import SUPPORTED_METRIC_TYPE ...@@ -14,6 +14,9 @@ from mednet.libs.segmentation.engine.evaluator import SUPPORTED_METRIC_TYPE
logger = setup("mednet") logger = setup("mednet")
# avoids X11/graphical desktop requirement when creating plots
__import__("matplotlib").use("agg")
def validate_threshold(threshold: float | str, splits: list[str]): def validate_threshold(threshold: float | str, splits: list[str]):
"""Validate the user threshold selection and returns parsed threshold. """Validate the user threshold selection and returns parsed threshold.
...@@ -89,17 +92,6 @@ def validate_threshold(threshold: float | str, splits: list[str]): ...@@ -89,17 +92,6 @@ def validate_threshold(threshold: float | str, splits: list[str]):
default="results", default="results",
cls=ResourceOption, cls=ResourceOption,
) )
# @click.option(
# "--second-annotator",
# "-a",
# help="""A datamodule containing annotations from another annotator, that
# will be compared to the ground-truth (reference annotator) in each
# sample.""",
# required=False,
# default=None,
# cls=ResourceOption,
# show_default=True,
# )
@click.option( @click.option(
"--threshold", "--threshold",
"-t", "-t",
...@@ -138,14 +130,43 @@ def validate_threshold(threshold: float | str, splits: list[str]): ...@@ -138,14 +130,43 @@ def validate_threshold(threshold: float | str, splits: list[str]):
required=True, required=True,
cls=ResourceOption, cls=ResourceOption,
) )
@click.option(
"--compare-annotator",
"-a",
help="""Path to a JSON file as produced by the CLI ``dump-annotations``,
containing splits and sample lists with associated HDF5 files where we can
find pre-processed annotation masks. These annotations will be compared
with the target annotations on the main predictions. In this case, a row
is added for each available split in the evaluation table.""",
required=False,
default=None,
type=click.Path(
file_okay=True,
dir_okay=False,
writable=False,
path_type=pathlib.Path,
),
cls=ResourceOption,
)
@click.option(
"--plot/--no-plot",
"-P",
help="""If set, then also produces figures containing the plots of
performance curves and score histograms.""",
required=True,
show_default=True,
default=True,
cls=ResourceOption,
)
@verbosity_option(logger=logger, cls=ResourceOption, expose_value=False) @verbosity_option(logger=logger, cls=ResourceOption, expose_value=False)
def evaluate( def evaluate(
predictions: pathlib.Path, predictions: pathlib.Path,
output_folder: pathlib.Path, output_folder: pathlib.Path,
threshold: str | float, threshold: str | float,
metric: str, metric: str,
# second_annotator,
steps: int, steps: int,
compare_annotator: pathlib.Path,
plot: bool,
**_, # ignored **_, # ignored
): # numpydoc ignore=PR01 ): # numpydoc ignore=PR01
"""Evaluate predictions (from a model) on a segmentation task.""" """Evaluate predictions (from a model) on a segmentation task."""
...@@ -185,10 +206,14 @@ def evaluate( ...@@ -185,10 +206,14 @@ def evaluate(
threshold=threshold, threshold=threshold,
metric=metric, metric=metric,
steps=steps, steps=steps,
compare_annotator=str(compare_annotator),
plot=plot,
), ),
) )
json_data = {k.replace("_", "-"): v for k, v in json_data.items()} json_data = {k.replace("_", "-"): v for k, v in json_data.items()}
save_json_with_backup(evaluation_file.with_suffix(".meta.json"), json_data) evaluation_meta = evaluation_file.with_suffix(".meta.json")
logger.info(f"Saving evaluation metadata at `{str(evaluation_meta)}`...")
save_json_with_backup(evaluation_meta, json_data)
threshold = validate_threshold(threshold, predict_data) threshold = validate_threshold(threshold, predict_data)
threshold_list = numpy.arange( threshold_list = numpy.arange(
...@@ -211,7 +236,7 @@ def evaluate( ...@@ -211,7 +236,7 @@ def evaluate(
if isinstance(threshold, str): if isinstance(threshold, str):
# Compute threshold on specified split, if required # Compute threshold on specified split, if required
logger.info(f"Evaluating threshold on `{threshold}` split using " f"`{metric}`") logger.info(f"Evaluating threshold on split `{threshold}` using " f"`{metric}`")
metric_list = compute_metric( metric_list = compute_metric(
eval_json_data[threshold]["counts"].values(), eval_json_data[threshold]["counts"].values(),
name2metric(typing.cast(SUPPORTED_METRIC_TYPE, metric)), name2metric(typing.cast(SUPPORTED_METRIC_TYPE, metric)),
...@@ -230,14 +255,12 @@ def evaluate( ...@@ -230,14 +255,12 @@ def evaluate(
threshold_index = (numpy.abs(threshold_list - threshold)).argmin() threshold_index = (numpy.abs(threshold_list - threshold)).argmin()
logger.info(f"Set --threshold={threshold_list[threshold_index]:.4f}") logger.info(f"Set --threshold={threshold_list[threshold_index]:.4f}")
logger.info("Tabulating performance summary...")
table_format = "rst"
output_table = output_folder / "evaluation.rst"
metrics_available = list(typing.get_args(SUPPORTED_METRIC_TYPE)) metrics_available = list(typing.get_args(SUPPORTED_METRIC_TYPE))
table_headers = ["Dataset", "threshold"] + metrics_available + ["auroc", "avgprec"] table_headers = ["Dataset", "threshold"] + metrics_available + ["auroc", "avgprec"]
table_data = [] table_data = []
for split_name in predict_data.keys(): for split_name in predict_data.keys():
logger.info("Computing performance on split `{split_name}`...")
counts = list(eval_json_data[split_name]["counts"].values()) counts = list(eval_json_data[split_name]["counts"].values())
base_metrics = all_metrics(*counts[threshold_index]) base_metrics = all_metrics(*counts[threshold_index])
table_data.append([split_name, threshold_list[threshold_index]] + base_metrics) table_data.append([split_name, threshold_list[threshold_index]] + base_metrics)
...@@ -266,10 +289,11 @@ def evaluate( ...@@ -266,10 +289,11 @@ def evaluate(
# records full result analysis to a JSON file # records full result analysis to a JSON file
evaluation_file = output_folder / "evaluation.json" evaluation_file = output_folder / "evaluation.json"
logger.info(f"Saving evaluation results at `{evaluation_file}`...") logger.info(f"Saving evaluation results at `{str(evaluation_file)}`...")
with evaluation_file.open("w") as f: with evaluation_file.open("w") as f:
json.dump(eval_json_data, f, indent=2, cls=NumpyJSONEncoder) json.dump(eval_json_data, f, indent=2, cls=NumpyJSONEncoder)
table_format = "rst"
table = tabulate.tabulate( table = tabulate.tabulate(
table_data, table_data,
table_headers, table_headers,
...@@ -278,41 +302,44 @@ def evaluate( ...@@ -278,41 +302,44 @@ def evaluate(
stralign="right", stralign="right",
) )
click.echo(table) click.echo(table)
logger.info(f"Saving table at {output_table}...")
output_table = output_folder / "evaluation.rst"
logger.info(f"Saving tabulated performance summary at `{str(output_table)}`...")
output_table.parent.mkdir(parents=True, exist_ok=True) output_table.parent.mkdir(parents=True, exist_ok=True)
with output_table.open("w") as f: with output_table.open("w") as f:
f.write(table) f.write(table)
logger.info("Plotting performance curves...") if plot:
output_figure = output_folder / "evaluation.pdf" figure_path = evaluation_file.with_suffix(".pdf")
logger.info(f"Saving figures at {output_figure}...") logger.info(f"Saving evaluation figures at `{str(figure_path)}`...")
with matplotlib.backends.backend_pdf.PdfPages(output_figure) as pdf:
with credible.plot.tight_layout( with matplotlib.backends.backend_pdf.PdfPages(figure_path) as pdf:
("False Positive Rate", "True Positive Rate"), "ROC" with credible.plot.tight_layout(
) as ( ("False Positive Rate", "True Positive Rate"), "ROC"
fig, ) as (
ax, fig,
): ax,
for split_name, data in eval_json_data.items(): ):
ax.plot( for split_name, data in eval_json_data.items():
data["curves"]["roc"]["fpr"], ax.plot(
data["curves"]["roc"]["tpr"], data["curves"]["roc"]["fpr"],
label=f"{split_name} (AUC: {data['auc_score']:.2f})", data["curves"]["roc"]["tpr"],
) label=f"{split_name} (AUC: {data['auc_score']:.2f})",
ax.legend(loc="best", fancybox=True, framealpha=0.7) )
pdf.savefig(fig) ax.legend(loc="best", fancybox=True, framealpha=0.7)
pdf.savefig(fig)
with credible.plot.tight_layout_f1iso(
("Recall", "Precision"), "Precison-Recall" with credible.plot.tight_layout_f1iso(
) as ( ("Recall", "Precision"), "Precison-Recall"
fig, ) as (
ax, fig,
): ax,
for split_name, data in eval_json_data.items(): ):
ax.plot( for split_name, data in eval_json_data.items():
data["curves"]["precision_recall"]["precision"], ax.plot(
data["curves"]["precision_recall"]["recall"], data["curves"]["precision_recall"]["precision"],
label=f"{split_name} (AP: {data['average_precision_score']:.2f})", data["curves"]["precision_recall"]["recall"],
) label=f"{split_name} (AP: {data['average_precision_score']:.2f})",
ax.legend(loc="best", fancybox=True, framealpha=0.7) )
pdf.savefig(fig) ax.legend(loc="best", fancybox=True, framealpha=0.7)
pdf.savefig(fig)
...@@ -60,8 +60,8 @@ def experiment( ...@@ -60,8 +60,8 @@ def experiment(
\b \b
└─ <output-folder>/ └─ <output-folder>/
├── model/ # the generated model will be here ├── model/ # the generated model will be here
├── predictions # the prediction outputs for the sets ├── predictions.json # the prediction outputs
└── evaluation/ # the outputs of the evaluations for the sets └── evaluation.json # the evaluation outputs
""" """
experiment_start_timestamp = datetime.now() experiment_start_timestamp = datetime.now()
...@@ -110,11 +110,9 @@ def experiment( ...@@ -110,11 +110,9 @@ def experiment(
from .predict import predict from .predict import predict
predictions_output = output_folder / "predictions"
ctx.invoke( ctx.invoke(
predict, predict,
output_folder=predictions_output, output_folder=output_folder,
model=model, model=model,
datamodule=datamodule, datamodule=datamodule,
device=device, device=device,
...@@ -132,11 +130,9 @@ def experiment( ...@@ -132,11 +130,9 @@ def experiment(
from .evaluate import evaluate from .evaluate import evaluate
evaluation_output = output_folder / "evaluation" predictions_file = output_folder / "predictions.json"
predictions_file = predictions_output / "predictions.json"
with (predictions_output / "predictions.json").open() as pf: with (predictions_file).open() as pf:
splits = json.load(pf).keys() splits = json.load(pf).keys()
if "validation" in splits: if "validation" in splits:
...@@ -149,7 +145,7 @@ def experiment( ...@@ -149,7 +145,7 @@ def experiment(
ctx.invoke( ctx.invoke(
evaluate, evaluate,
predictions=predictions_file, predictions=predictions_file,
output_folder=evaluation_output, output_folder=output_folder,
threshold=evaluation_threshold, threshold=evaluation_threshold,
# metric="f1", # metric="f1",
# steps=100, # steps=100,
......
...@@ -185,7 +185,7 @@ def test_upload_help(): ...@@ -185,7 +185,7 @@ def test_upload_help():
@pytest.mark.slow @pytest.mark.slow
@pytest.mark.skip_if_rc_var_not_set("datadir.montgomery") @pytest.mark.skip_if_rc_var_not_set("datadir.montgomery")
def test_train_pasa_montgomery(temporary_basedir): def test_train_pasa_montgomery(session_tmp_path):
from mednet.libs.classification.scripts.train import train from mednet.libs.classification.scripts.train import train
from mednet.libs.common.utils.checkpointer import ( from mednet.libs.common.utils.checkpointer import (
CHECKPOINT_EXTENSION, CHECKPOINT_EXTENSION,
...@@ -195,7 +195,7 @@ def test_train_pasa_montgomery(temporary_basedir): ...@@ -195,7 +195,7 @@ def test_train_pasa_montgomery(temporary_basedir):
runner = CliRunner() runner = CliRunner()
with stdout_logging() as buf: with stdout_logging() as buf:
output_folder = temporary_basedir / "classification" / "results" output_folder = session_tmp_path / "classification-standalone"
result = runner.invoke( result = runner.invoke(
train, train,
[ [
...@@ -241,8 +241,8 @@ def test_train_pasa_montgomery(temporary_basedir): ...@@ -241,8 +241,8 @@ def test_train_pasa_montgomery(temporary_basedir):
@pytest.mark.slow @pytest.mark.slow
@pytest.mark.skip_if_rc_var_not_set("datadir.montgomery") @pytest.mark.skip_if_rc_var_not_set("datadir.montgomery")
def test_train_pasa_montgomery_from_checkpoint(temporary_basedir): def test_predict_pasa_montgomery(session_tmp_path):
from mednet.libs.classification.scripts.train import train from mednet.libs.classification.scripts.predict import predict
from mednet.libs.common.utils.checkpointer import ( from mednet.libs.common.utils.checkpointer import (
CHECKPOINT_EXTENSION, CHECKPOINT_EXTENSION,
_get_checkpoint_from_alias, _get_checkpoint_from_alias,
...@@ -250,62 +250,36 @@ def test_train_pasa_montgomery_from_checkpoint(temporary_basedir): ...@@ -250,62 +250,36 @@ def test_train_pasa_montgomery_from_checkpoint(temporary_basedir):
runner = CliRunner() runner = CliRunner()
output_folder = temporary_basedir / "classification" / "results" / "pasa_checkpoint"
result0 = runner.invoke(
train,
[
"pasa",
"montgomery",
"-vv",
"--epochs=1",
"--batch-size=1",
f"--output-folder={str(output_folder)}",
],
)
_assert_exit_0(result0)
# asserts checkpoints are there, or raises FileNotFoundError
last = _get_checkpoint_from_alias(output_folder, "periodic")
assert last.name.endswith("epoch=0" + CHECKPOINT_EXTENSION)
best = _get_checkpoint_from_alias(output_folder, "best")
assert best.name.endswith("epoch=0" + CHECKPOINT_EXTENSION)
assert (output_folder / "meta.json").exists()
assert len(list((output_folder / "logs").glob("events.out.tfevents.*"))) == 1
with stdout_logging() as buf: with stdout_logging() as buf:
output_folder = session_tmp_path / "classification-standalone"
last = _get_checkpoint_from_alias(output_folder, "periodic")
assert last.name.endswith("epoch=0" + CHECKPOINT_EXTENSION)
result = runner.invoke( result = runner.invoke(
train, predict,
[ [
"pasa", "pasa",
"montgomery", "montgomery",
"-vv", "-vv",
"--epochs=2",
"--batch-size=1", "--batch-size=1",
f"--output-folder={output_folder}", f"--weight={str(last)}",
f"--output-folder={str(output_folder)}",
], ],
) )
_assert_exit_0(result) _assert_exit_0(result)
# asserts checkpoints are there, or raises FileNotFoundError assert (output_folder / "predictions.meta.json").exists()
last = _get_checkpoint_from_alias(output_folder, "periodic") assert (output_folder / "predictions.json").exists()
assert last.name.endswith("epoch=1" + CHECKPOINT_EXTENSION)
best = _get_checkpoint_from_alias(output_folder, "best")
assert (output_folder / "meta.json").exists()
assert len(list((output_folder / "logs").glob("events.out.tfevents.*"))) == 2
keywords = { keywords = {
r"^Loading dataset:`train` without caching. Trade-off: CPU RAM usage: less | Disk I/O: more.$": 1, r"^Loading dataset: * without caching. Trade-off: CPU RAM usage: less | Disk I/O: more$": 3,
r"^Loading dataset:`validation` without caching. Trade-off: CPU RAM usage: less | Disk I/O: more.$": 1, r"^Loading checkpoint from .*$": 1,
r"^Applying train/valid loss balancing...$": 1,
r"^Training for at most 2 epochs.$": 1,
r"^Resuming from epoch 0 \(checkpoint file: .*$": 1,
r"^Writing run metadata at.*$": 1,
r"^Dataset `train` is already setup. Not re-instantiating it.$": 1,
r"^Dataset `validation` is already setup. Not re-instantiating it.$": 1,
r"^Restoring normalizer from checkpoint.$": 1, r"^Restoring normalizer from checkpoint.$": 1,
r"^Running prediction on `train` split...$": 1,
r"^Running prediction on `validation` split...$": 1,
r"^Running prediction on `test` split...$": 1,
r"^Predictions saved to .*$": 1,
} }
buf.seek(0) buf.seek(0)
logging_output = buf.read() logging_output = buf.read()
...@@ -319,47 +293,37 @@ def test_train_pasa_montgomery_from_checkpoint(temporary_basedir): ...@@ -319,47 +293,37 @@ def test_train_pasa_montgomery_from_checkpoint(temporary_basedir):
@pytest.mark.slow @pytest.mark.slow
@pytest.mark.skip_if_rc_var_not_set("datadir.montgomery") @pytest.mark.skip_if_rc_var_not_set("datadir.montgomery")
def test_predict_pasa_montgomery(temporary_basedir, datadir): def test_evaluate_pasa_montgomery(session_tmp_path):
from mednet.libs.classification.scripts.predict import predict from mednet.libs.classification.scripts.evaluate import evaluate
from mednet.libs.common.utils.checkpointer import (
CHECKPOINT_EXTENSION,
_get_checkpoint_from_alias,
)
runner = CliRunner() runner = CliRunner()
with stdout_logging() as buf: with stdout_logging() as buf:
output = temporary_basedir / "classification" / "predictions" output_folder = session_tmp_path / "classification-standalone"
last = _get_checkpoint_from_alias(
temporary_basedir / "classification" / "results",
"periodic",
)
assert last.name.endswith("epoch=0" + CHECKPOINT_EXTENSION)
result = runner.invoke( result = runner.invoke(
predict, evaluate,
[ [
"pasa",
"montgomery",
"-vv", "-vv",
"--batch-size=1", f"--predictions={str(output_folder / 'predictions.json')}",
f"--weight={str(last)}", f"--output-folder={str(output_folder)}",
f"--output-folder={str(output)}", "--threshold=test",
], ],
) )
_assert_exit_0(result) _assert_exit_0(result)
assert output.exists() assert (output_folder / "evaluation.json").exists()
assert (output_folder / "evaluation.meta.json").exists()
assert (output_folder / "evaluation.rst").exists()
assert (output_folder / "evaluation.pdf").exists()
keywords = { keywords = {
r"^Loading dataset: * without caching. Trade-off: CPU RAM usage: less | Disk I/O: more$": 3, r"^Saving evaluation metadata at .*$": 1,
r"^Loading checkpoint from .*$": 1, r"^Setting --threshold=.*$": 1,
r"^Restoring normalizer from checkpoint.$": 1, r"^Computing performance on split .*...$": 3,
r"^Running prediction on `train` split...$": 1, r"^Saving evaluation results at .*$": 1,
r"^Running prediction on `validation` split...$": 1, r"^Saving evaluation results in table format at .*$": 1,
r"^Running prediction on `test` split...$": 1, r"^Saving evaluation figures at .*$": 1,
r"^Predictions saved to .*$": 1,
} }
buf.seek(0) buf.seek(0)
logging_output = buf.read() logging_output = buf.read()
...@@ -373,39 +337,69 @@ def test_predict_pasa_montgomery(temporary_basedir, datadir): ...@@ -373,39 +337,69 @@ def test_predict_pasa_montgomery(temporary_basedir, datadir):
@pytest.mark.slow @pytest.mark.slow
@pytest.mark.skip_if_rc_var_not_set("datadir.montgomery") @pytest.mark.skip_if_rc_var_not_set("datadir.montgomery")
def test_evaluate_pasa_montgomery(temporary_basedir): def test_train_pasa_montgomery_from_checkpoint(tmp_path):
from mednet.libs.classification.scripts.evaluate import evaluate from mednet.libs.classification.scripts.train import train
from mednet.libs.common.utils.checkpointer import (
CHECKPOINT_EXTENSION,
_get_checkpoint_from_alias,
)
runner = CliRunner() runner = CliRunner()
with stdout_logging() as buf: result0 = runner.invoke(
prediction_path = temporary_basedir / "classification" / "predictions" train,
predictions_file = prediction_path / "predictions.json" [
evaluation_path = temporary_basedir / "classification" / "evaluations" "pasa",
"montgomery",
"-vv",
"--epochs=1",
"--batch-size=1",
f"--output-folder={str(tmp_path)}",
],
)
_assert_exit_0(result0)
# asserts checkpoints are there, or raises FileNotFoundError
last = _get_checkpoint_from_alias(tmp_path, "periodic")
assert last.name.endswith("epoch=0" + CHECKPOINT_EXTENSION)
best = _get_checkpoint_from_alias(tmp_path, "best")
assert best.name.endswith("epoch=0" + CHECKPOINT_EXTENSION)
assert (tmp_path / "meta.json").exists()
assert len(list((tmp_path / "logs").glob("events.out.tfevents.*"))) == 1
with stdout_logging() as buf:
result = runner.invoke( result = runner.invoke(
evaluate, train,
[ [
"-vv", "pasa",
"montgomery", "montgomery",
f"--predictions={predictions_file}", "-vv",
f"--output-folder={evaluation_path}", "--epochs=2",
"--threshold=test", "--batch-size=1",
f"--output-folder={tmp_path}",
], ],
) )
_assert_exit_0(result) _assert_exit_0(result)
assert (evaluation_path / "evaluation.json").exists() # asserts checkpoints are there, or raises FileNotFoundError
assert (evaluation_path / "evaluation.meta.json").exists() last = _get_checkpoint_from_alias(tmp_path, "periodic")
assert (evaluation_path / "evaluation.pdf").exists() assert last.name.endswith("epoch=1" + CHECKPOINT_EXTENSION)
assert (evaluation_path / "evaluation.rst").exists() best = _get_checkpoint_from_alias(tmp_path, "best")
assert (tmp_path / "meta.json").exists()
assert len(list((tmp_path / "logs").glob("events.out.tfevents.*"))) == 2
keywords = { keywords = {
r"^Setting --threshold=.*$": 1, r"^Loading dataset:`train` without caching. Trade-off: CPU RAM usage: less | Disk I/O: more.$": 1,
r"^Analyzing split `train`...$": 1, r"^Loading dataset:`validation` without caching. Trade-off: CPU RAM usage: less | Disk I/O: more.$": 1,
r"^Analyzing split `validation`...$": 1, r"^Applying train/valid loss balancing...$": 1,
r"^Analyzing split `test`...$": 1, r"^Training for at most 2 epochs.$": 1,
r"^Saving evaluation results .*$": 2, r"^Resuming from epoch 0 \(checkpoint file: .*$": 1,
r"^Saving evaluation figures at .*$": 1, r"^Writing run metadata at.*$": 1,
r"^Dataset `train` is already setup. Not re-instantiating it.$": 1,
r"^Dataset `validation` is already setup. Not re-instantiating it.$": 1,
r"^Restoring normalizer from checkpoint.$": 1,
} }
buf.seek(0) buf.seek(0)
logging_output = buf.read() logging_output = buf.read()
...@@ -420,12 +414,11 @@ def test_evaluate_pasa_montgomery(temporary_basedir): ...@@ -420,12 +414,11 @@ def test_evaluate_pasa_montgomery(temporary_basedir):
@pytest.mark.slow @pytest.mark.slow
@pytest.mark.skip_if_rc_var_not_set("datadir.montgomery") @pytest.mark.skip_if_rc_var_not_set("datadir.montgomery")
def test_experiment(temporary_basedir): def test_experiment(tmp_path):
from mednet.libs.classification.scripts.experiment import experiment from mednet.libs.classification.scripts.experiment import experiment
runner = CliRunner() runner = CliRunner()
output_folder = temporary_basedir / "classification" / "experiment"
num_epochs = 2 num_epochs = 2
result = runner.invoke( result = runner.invoke(
experiment, experiment,
...@@ -434,61 +427,39 @@ def test_experiment(temporary_basedir): ...@@ -434,61 +427,39 @@ def test_experiment(temporary_basedir):
"pasa", "pasa",
"montgomery", "montgomery",
f"--epochs={num_epochs}", f"--epochs={num_epochs}",
f"--output-folder={str(output_folder)}", f"--output-folder={str(tmp_path)}",
], ],
) )
_assert_exit_0(result) _assert_exit_0(result)
assert (output_folder / "model" / "meta.json").exists() assert (tmp_path / "model" / "meta.json").exists()
assert (output_folder / "model" / f"model-at-epoch={num_epochs-1}.ckpt").exists() assert (tmp_path / "model" / f"model-at-epoch={num_epochs-1}.ckpt").exists()
assert (output_folder / "predictions" / "predictions.json").exists() assert (tmp_path / "predictions.json").exists()
assert (output_folder / "predictions" / "predictions.meta.json").exists() assert (tmp_path / "predictions.meta.json").exists()
# Need to glob because we cannot be sure of the checkpoint with lowest validation loss # Need to glob because we cannot be sure of the checkpoint with lowest validation loss
assert ( assert (
len( len(
list( list(
(output_folder / "model").glob( (tmp_path / "model").glob(
"model-at-lowest-validation-loss-epoch=*.ckpt", "model-at-lowest-validation-loss-epoch=*.ckpt"
), )
), )
) )
== 1 == 1
) )
assert (output_folder / "model" / "trainlog.pdf").exists() assert (tmp_path / "model" / "trainlog.pdf").exists()
assert ( assert (
len( len(
list( list(
(output_folder / "model" / "logs").glob( (tmp_path / "model" / "logs").glob(
"events.out.tfevents.*", "events.out.tfevents.*",
), ),
), ),
) )
== 1 == 1
) )
assert (output_folder / "evaluation.json").exists() assert (tmp_path / "evaluation.json").exists()
assert (output_folder / "evaluation.meta.json").exists() assert (tmp_path / "evaluation.meta.json").exists()
assert (output_folder / "evaluation.rst").exists() assert (tmp_path / "evaluation.rst").exists()
assert (output_folder / "evaluation.pdf").exists() assert (tmp_path / "evaluation.pdf").exists()
assert (output_folder / "gradcam" / "saliencies").exists()
assert (
len(
list(
(output_folder / "gradcam" / "saliencies" / "CXR_png").glob(
"MCUCXR_*.npy",
),
),
)
== 138
)
assert (output_folder / "gradcam" / "visualizations").exists()
assert (
len(
list(
(output_folder / "gradcam" / "visualizations" / "CXR_png").glob(
"MCUCXR_*.png",
),
),
)
== 58
)
...@@ -95,7 +95,7 @@ def rc_variable_set(name): ...@@ -95,7 +95,7 @@ def rc_variable_set(name):
@pytest.fixture(scope="session") @pytest.fixture(scope="session")
def temporary_basedir(tmp_path_factory): def session_tmp_path(tmp_path_factory):
return tmp_path_factory.mktemp("test-cli") return tmp_path_factory.mktemp("test-cli")
......
...@@ -153,7 +153,7 @@ def test_evaluate_help(): ...@@ -153,7 +153,7 @@ def test_evaluate_help():
@pytest.mark.slow @pytest.mark.slow
@pytest.mark.skip_if_rc_var_not_set("datadir.drive") @pytest.mark.skip_if_rc_var_not_set("datadir.drive")
def test_train_lwnet_drive(temporary_basedir): def test_train_lwnet_drive(session_tmp_path):
from mednet.libs.common.utils.checkpointer import ( from mednet.libs.common.utils.checkpointer import (
CHECKPOINT_EXTENSION, CHECKPOINT_EXTENSION,
_get_checkpoint_from_alias, _get_checkpoint_from_alias,
...@@ -163,7 +163,7 @@ def test_train_lwnet_drive(temporary_basedir): ...@@ -163,7 +163,7 @@ def test_train_lwnet_drive(temporary_basedir):
runner = CliRunner() runner = CliRunner()
with stdout_logging() as buf: with stdout_logging() as buf:
output_folder = temporary_basedir / "segmentation" / "results" output_folder = session_tmp_path / "segmentation-standalone"
result = runner.invoke( result = runner.invoke(
train, train,
[ [
...@@ -206,68 +206,44 @@ def test_train_lwnet_drive(temporary_basedir): ...@@ -206,68 +206,44 @@ def test_train_lwnet_drive(temporary_basedir):
@pytest.mark.slow @pytest.mark.slow
@pytest.mark.skip_if_rc_var_not_set("datadir.drive") @pytest.mark.skip_if_rc_var_not_set("datadir.drive")
def test_train_lwnet_drive_from_checkpoint(temporary_basedir): def test_predict_lwnet_drive(session_tmp_path):
from mednet.libs.common.utils.checkpointer import ( from mednet.libs.common.utils.checkpointer import (
CHECKPOINT_EXTENSION, CHECKPOINT_EXTENSION,
_get_checkpoint_from_alias, _get_checkpoint_from_alias,
) )
from mednet.libs.segmentation.scripts.train import train from mednet.libs.segmentation.scripts.predict import predict
runner = CliRunner() runner = CliRunner()
output_folder = temporary_basedir / "segmentation" / "results" / "lwnet_checkpoint"
result0 = runner.invoke(
train,
[
"lwnet",
"drive",
"-vv",
"--epochs=1",
"--batch-size=1",
f"--output-folder={str(output_folder)}",
],
)
_assert_exit_0(result0)
# asserts checkpoints are there, or raises FileNotFoundError
last = _get_checkpoint_from_alias(output_folder, "periodic")
assert last.name.endswith("epoch=0" + CHECKPOINT_EXTENSION)
best = _get_checkpoint_from_alias(output_folder, "best")
assert best.name.endswith("epoch=0" + CHECKPOINT_EXTENSION)
assert (output_folder / "meta.json").exists()
assert len(list((output_folder / "logs").glob("events.out.tfevents.*"))) == 1
with stdout_logging() as buf: with stdout_logging() as buf:
output_folder = session_tmp_path / "segmentation-standalone"
last_ckpt = _get_checkpoint_from_alias(output_folder, "periodic")
assert last_ckpt.name.endswith("epoch=0" + CHECKPOINT_EXTENSION)
result = runner.invoke( result = runner.invoke(
train, predict,
[ [
"lwnet", "lwnet",
"drive", "drive",
"-vv", "-vv",
"--epochs=2",
"--batch-size=1", "--batch-size=1",
f"--output-folder={output_folder}", f"--weight={str(last_ckpt)}",
f"--output-folder={str(output_folder)}",
], ],
) )
_assert_exit_0(result) _assert_exit_0(result)
# asserts checkpoints are there, or raises FileNotFoundError assert (output_folder / "predictions.meta.json").exists()
last = _get_checkpoint_from_alias(output_folder, "periodic") assert (output_folder / "predictions.json").exists()
assert last.name.endswith("epoch=1" + CHECKPOINT_EXTENSION)
best = _get_checkpoint_from_alias(output_folder, "best")
assert (output_folder / "meta.json").exists()
assert len(list((output_folder / "logs").glob("events.out.tfevents.*"))) == 2
keywords = { keywords = {
r"^Loading dataset:`train` without caching. Trade-off: CPU RAM usage: less | Disk I/O: more.$": 1, r"^Loading dataset: * without caching. Trade-off: CPU RAM usage: less | Disk I/O: more$": 2,
r"^Training for at most 2 epochs.$": 1, r"^Loading checkpoint from .*$": 1,
r"^Resuming from epoch 0 \(checkpoint file: .*$": 1,
r"^Writing run metadata at.*$": 1,
r"^Dataset `train` is already setup. Not re-instantiating it.$": 3,
r"^Restoring normalizer from checkpoint.$": 1, r"^Restoring normalizer from checkpoint.$": 1,
r"^Running prediction on `train` split...$": 1,
r"^Running prediction on `test` split...$": 1,
r"^Predictions saved to .*$": 1,
} }
buf.seek(0) buf.seek(0)
logging_output = buf.read() logging_output = buf.read()
...@@ -281,46 +257,38 @@ def test_train_lwnet_drive_from_checkpoint(temporary_basedir): ...@@ -281,46 +257,38 @@ def test_train_lwnet_drive_from_checkpoint(temporary_basedir):
@pytest.mark.slow @pytest.mark.slow
@pytest.mark.skip_if_rc_var_not_set("datadir.drive") @pytest.mark.skip_if_rc_var_not_set("datadir.drive")
def test_predict_lwnet_drive(temporary_basedir, datadir): def test_evaluate_lwnet_drive(session_tmp_path):
from mednet.libs.common.utils.checkpointer import ( from mednet.libs.segmentation.scripts.evaluate import evaluate
CHECKPOINT_EXTENSION,
_get_checkpoint_from_alias,
)
from mednet.libs.segmentation.scripts.predict import predict
runner = CliRunner() runner = CliRunner()
with stdout_logging() as buf: with stdout_logging() as buf:
output = temporary_basedir / "segmentation" / "predictions" output_folder = session_tmp_path / "segmentation-standalone"
last = _get_checkpoint_from_alias(
temporary_basedir / "segmentation" / "results",
"periodic",
)
assert last.name.endswith("epoch=0" + CHECKPOINT_EXTENSION)
result = runner.invoke( result = runner.invoke(
predict, evaluate,
[ [
"lwnet",
"drive",
"-vv", "-vv",
"--batch-size=1", f"--predictions={str(output_folder / 'predictions.json')}",
f"--weight={str(last)}", f"--output-folder={str(output_folder)}",
f"--output-folder={str(output)}", "--threshold=test",
], ],
) )
_assert_exit_0(result) _assert_exit_0(result)
assert output.exists() assert (output_folder / "evaluation.json").exists()
assert (output_folder / "evaluation.meta.json").exists()
assert (output_folder / "evaluation.rst").exists()
assert (output_folder / "evaluation.pdf").exists()
keywords = { keywords = {
r"^Loading dataset: * without caching. Trade-off: CPU RAM usage: less | Disk I/O: more$": 2, r"^Saving evaluation metadata at .*$": 1,
r"^Loading checkpoint from .*$": 1, r"^Counting true/false positive/negatives at split.*$": 2,
r"^Restoring normalizer from checkpoint.$": 1, r"^Evaluating threshold on split .*$": 1,
r"^Running prediction on `train` split...$": 1, r"^Computing performance on split .*...$": 2,
r"^Running prediction on `test` split...$": 1, r"^Saving evaluation results at .*$": 1,
r"^Predictions saved to .*$": 1, r"^Saving tabulated performance summary at .*$": 1,
r"^Saving evaluation figures at .*$": 1,
} }
buf.seek(0) buf.seek(0)
logging_output = buf.read() logging_output = buf.read()
...@@ -334,41 +302,66 @@ def test_predict_lwnet_drive(temporary_basedir, datadir): ...@@ -334,41 +302,66 @@ def test_predict_lwnet_drive(temporary_basedir, datadir):
@pytest.mark.slow @pytest.mark.slow
@pytest.mark.skip_if_rc_var_not_set("datadir.drive") @pytest.mark.skip_if_rc_var_not_set("datadir.drive")
def test_evaluate_lwnet_drive(temporary_basedir): def test_train_lwnet_drive_from_checkpoint(tmp_path):
from mednet.libs.segmentation.scripts.evaluate import evaluate from mednet.libs.common.utils.checkpointer import (
CHECKPOINT_EXTENSION,
_get_checkpoint_from_alias,
)
from mednet.libs.segmentation.scripts.train import train
runner = CliRunner() runner = CliRunner()
result0 = runner.invoke(
train,
[
"lwnet",
"drive",
"-vv",
"--epochs=1",
"--batch-size=1",
f"--output-folder={str(tmp_path)}",
],
)
_assert_exit_0(result0)
# asserts checkpoints are there, or raises FileNotFoundError
last = _get_checkpoint_from_alias(tmp_path, "periodic")
assert last.name.endswith("epoch=0" + CHECKPOINT_EXTENSION)
best = _get_checkpoint_from_alias(tmp_path, "best")
assert best.name.endswith("epoch=0" + CHECKPOINT_EXTENSION)
assert (tmp_path / "meta.json").exists()
assert len(list((tmp_path / "logs").glob("events.out.tfevents.*"))) == 1
with stdout_logging() as buf: with stdout_logging() as buf:
prediction_path = temporary_basedir / "segmentation" / "predictions"
predictions_file = prediction_path / "predictions.json"
evaluation_path = temporary_basedir / "segmentation" / "evaluations"
result = runner.invoke( result = runner.invoke(
evaluate, train,
[ [
"-vv", "lwnet",
"drive", "drive",
f"--predictions={predictions_file}", "-vv",
f"--output-folder={evaluation_path}", "--epochs=2",
"--threshold=test", "--batch-size=1",
f"--output-folder={tmp_path}",
], ],
) )
_assert_exit_0(result) _assert_exit_0(result)
assert (evaluation_path / "evaluation.json").exists() # asserts checkpoints are there, or raises FileNotFoundError
assert (evaluation_path / "evaluation.meta.json").exists() last = _get_checkpoint_from_alias(tmp_path, "periodic")
assert (evaluation_path / "evaluation.pdf").exists() assert last.name.endswith("epoch=1" + CHECKPOINT_EXTENSION)
assert (evaluation_path / "evaluation.rst").exists() best = _get_checkpoint_from_alias(tmp_path, "best")
assert (tmp_path / "meta.json").exists()
assert len(list((tmp_path / "logs").glob("events.out.tfevents.*"))) == 2
keywords = { keywords = {
r"^Loading dataset:`train` without caching. Trade-off: CPU RAM usage: less | Disk I/O: more.$": 1,
r"^Training for at most 2 epochs.$": 1,
r"^Resuming from epoch 0 \(checkpoint file: .*$": 1,
r"^Writing run metadata at.*$": 1, r"^Writing run metadata at.*$": 1,
r"^Counting true/false positive/negatives at split.*$": 2, r"^Dataset `train` is already setup. Not re-instantiating it.$": 3,
r"^Evaluating threshold on.*$": 1, r"^Restoring normalizer from checkpoint.$": 1,
r"^Tabulating performance summary...": 1,
r"^Saving evaluation results at.*$": 1,
r"^Saving table at .*$": 1,
r"^Plotting performance curves...": 1,
r"^Saving figures at .*$": 1,
} }
buf.seek(0) buf.seek(0)
logging_output = buf.read() logging_output = buf.read()
...@@ -383,12 +376,11 @@ def test_evaluate_lwnet_drive(temporary_basedir): ...@@ -383,12 +376,11 @@ def test_evaluate_lwnet_drive(temporary_basedir):
@pytest.mark.slow @pytest.mark.slow
@pytest.mark.skip_if_rc_var_not_set("datadir.drive") @pytest.mark.skip_if_rc_var_not_set("datadir.drive")
def test_experiment(temporary_basedir): def test_experiment(tmp_path):
from mednet.libs.segmentation.scripts.experiment import experiment from mednet.libs.segmentation.scripts.experiment import experiment
runner = CliRunner() runner = CliRunner()
output_folder = temporary_basedir / "segmentation" / "experiment"
num_epochs = 2 num_epochs = 2
result = runner.invoke( result = runner.invoke(
experiment, experiment,
...@@ -397,39 +389,40 @@ def test_experiment(temporary_basedir): ...@@ -397,39 +389,40 @@ def test_experiment(temporary_basedir):
"lwnet", "lwnet",
"drive", "drive",
f"--epochs={num_epochs}", f"--epochs={num_epochs}",
f"--output-folder={str(output_folder)}", f"--output-folder={str(tmp_path)}",
], ],
) )
_assert_exit_0(result) _assert_exit_0(result)
assert (output_folder / "model" / "meta.json").exists() assert (tmp_path / "model" / "meta.json").exists()
assert (output_folder / "model" / f"model-at-epoch={num_epochs-1}.ckpt").exists() assert (tmp_path / "model" / f"model-at-epoch={num_epochs-1}.ckpt").exists()
assert (output_folder / "predictions" / "predictions.json").exists()
assert (output_folder / "predictions" / "predictions.meta.json").exists()
# Need to glob because we cannot be sure of the checkpoint with lowest validation loss # Need to glob because we cannot be sure of the checkpoint with lowest validation loss
assert ( assert (
len( len(
list( list(
(output_folder / "model").glob( (tmp_path / "model").glob(
"model-at-lowest-validation-loss-epoch=*.ckpt", "model-at-lowest-validation-loss-epoch=*.ckpt",
), ),
), ),
) )
== 1 == 1
) )
assert (output_folder / "model" / "trainlog.pdf").exists() assert (tmp_path / "model" / "trainlog.pdf").exists()
assert ( assert (
len( len(
list( list(
(output_folder / "model" / "logs").glob( (tmp_path / "model" / "logs").glob(
"events.out.tfevents.*", "events.out.tfevents.*",
), ),
), ),
) )
== 1 == 1
) )
assert (output_folder / "evaluation" / "evaluation.json").exists()
assert (output_folder / "evaluation" / "evaluation.meta.json").exists() assert (tmp_path / "predictions.json").exists()
assert (output_folder / "evaluation" / "evaluation.pdf").exists() assert (tmp_path / "predictions.meta.json").exists()
assert (output_folder / "evaluation" / "evaluation.rst").exists() assert (tmp_path / "evaluation.json").exists()
assert (tmp_path / "evaluation.meta.json").exists()
assert (tmp_path / "evaluation.pdf").exists()
assert (tmp_path / "evaluation.rst").exists()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment