From bd6da859599007d39ed50a0d4e8710c93cfa806f Mon Sep 17 00:00:00 2001 From: Andre Anjos <andre.dos.anjos@gmail.com> Date: Thu, 23 Feb 2023 09:56:59 +0100 Subject: [PATCH] [tests] Improve access to datadir.montgomery rc variable throughout the test units --- doc/catalog.json | 4 +- src/ptbench/data/hivtb/__init__.py | 4 +- src/ptbench/data/indian/__init__.py | 4 +- src/ptbench/data/montgomery/__init__.py | 12 +- src/ptbench/data/nih_cxr14_re/__init__.py | 6 +- src/ptbench/data/padchest/__init__.py | 4 +- src/ptbench/data/shenzhen/__init__.py | 4 +- src/ptbench/data/tbpoc/__init__.py | 4 +- tests/conftest.py | 75 ++- tests/test_cli.py | 681 ++++++++++------------ tests/test_config.py | 94 +-- 11 files changed, 411 insertions(+), 481 deletions(-) diff --git a/doc/catalog.json b/doc/catalog.json index b3af4a0c..0a78c0e3 100644 --- a/doc/catalog.json +++ b/doc/catalog.json @@ -1,8 +1,8 @@ { "clapp": { "versions": { - "stable": "https://www.idiap.ch/software/biosignal/docs/software/clapp/stable/sphinx/", - "latest": "https://www.idiap.ch/software/biosignal/docs/software/clapp/main/sphinx/" + "stable": "https://clapp.readthedocs.io/en/stable/", + "latest": "https://clapp.readthedocs.io/en/latest/" }, "sources": {} } diff --git a/src/ptbench/data/hivtb/__init__.py b/src/ptbench/data/hivtb/__init__.py index d743968d..dd87f9a4 100644 --- a/src/ptbench/data/hivtb/__init__.py +++ b/src/ptbench/data/hivtb/__init__.py @@ -34,12 +34,12 @@ _protocols = [ importlib.resources.files(__name__).joinpath("fold_9.json.bz2"), ] -_root_path = load_rc().get("datadir.hivtb", os.path.realpath(os.curdir)) +_datadir = load_rc().get("datadir.hivtb", os.path.realpath(os.curdir)) def _raw_data_loader(sample): return dict( - data=load_pil_baw(os.path.join(_root_path, sample["data"])), + data=load_pil_baw(os.path.join(_datadir, sample["data"])), label=sample["label"], ) diff --git a/src/ptbench/data/indian/__init__.py b/src/ptbench/data/indian/__init__.py index 80e50dbc..dc1b9d99 100644 --- a/src/ptbench/data/indian/__init__.py +++ b/src/ptbench/data/indian/__init__.py @@ -34,12 +34,12 @@ _protocols = [ importlib.resources.files(__name__).joinpath("fold_9.json.bz2"), ] -_root_path = load_rc().get("datadir.indian", os.path.realpath(os.curdir)) +_datadir = load_rc().get("datadir.indian", os.path.realpath(os.curdir)) def _raw_data_loader(sample): return dict( - data=load_pil_baw(os.path.join(_root_path, sample["data"])), + data=load_pil_baw(os.path.join(_datadir, sample["data"])), label=sample["label"], ) diff --git a/src/ptbench/data/montgomery/__init__.py b/src/ptbench/data/montgomery/__init__.py index fd65f24d..719941f9 100644 --- a/src/ptbench/data/montgomery/__init__.py +++ b/src/ptbench/data/montgomery/__init__.py @@ -40,25 +40,19 @@ _protocols = [ importlib.resources.files(__name__).joinpath("fold_9.json.bz2"), ] -_root_path = None +_datadir = load_rc().get("datadir.montgomery", os.path.realpath(os.curdir)) def _raw_data_loader(sample): - # hack to allow tests to change "datadir.montgomery" - global _root_path - _root_path = _root_path or load_rc().get( - "datadir.montgomery", os.path.realpath(os.curdir) - ) - return dict( - data=load_pil_baw(os.path.join(_root_path, sample["data"])), # type: ignore + data=load_pil_baw(os.path.join(_datadir, sample["data"])), # type: ignore label=sample["label"], ) def _loader(context, sample): # "context" is ignored in this case - database is homogeneous - # we returned delayed samples to avoid loading all images at once + # we return delayed samples to avoid loading all images at once return make_delayed(sample, _raw_data_loader) diff --git a/src/ptbench/data/nih_cxr14_re/__init__.py b/src/ptbench/data/nih_cxr14_re/__init__.py index 5cefea42..8dc3f223 100644 --- a/src/ptbench/data/nih_cxr14_re/__init__.py +++ b/src/ptbench/data/nih_cxr14_re/__init__.py @@ -38,7 +38,7 @@ _protocols = [ importlib.resources.files(__name__).joinpath("cardiomegaly.json.bz2"), ] -_root_path = load_rc().get("datadir.nih_cxr14_re", os.path.realpath(os.curdir)) +_datadir = load_rc().get("datadir.nih_cxr14_re", os.path.realpath(os.curdir)) _idiap_folders = load_rc().get("nih_cxr14_re.idiap_folder_structure", False) @@ -51,7 +51,7 @@ def _raw_data_loader(sample): return dict( data=load_pil_rgb( os.path.join( - _root_path, + _datadir, os.path.dirname(sample["data"]), basename[:5], basename, @@ -61,7 +61,7 @@ def _raw_data_loader(sample): ) else: return dict( - data=load_pil_rgb(os.path.join(_root_path, sample["data"])), + data=load_pil_rgb(os.path.join(_datadir, sample["data"])), label=sample["label"], ) diff --git a/src/ptbench/data/padchest/__init__.py b/src/ptbench/data/padchest/__init__.py index eb4ccfe4..e52b6dd6 100644 --- a/src/ptbench/data/padchest/__init__.py +++ b/src/ptbench/data/padchest/__init__.py @@ -236,12 +236,12 @@ _protocols = [ importlib.resources.files(__name__).joinpath("cardiomegaly_idiap.json.bz2"), ] -_root_path = load_rc().get("datadir.padchest", os.path.realpath(os.curdir)) +_datadir = load_rc().get("datadir.padchest", os.path.realpath(os.curdir)) def _raw_data_loader(sample): return dict( - data=load_pil(os.path.join(_root_path, sample["data"])), + data=load_pil(os.path.join(_datadir, sample["data"])), label=sample["label"], ) diff --git a/src/ptbench/data/shenzhen/__init__.py b/src/ptbench/data/shenzhen/__init__.py index a8bcaa3a..c1488160 100644 --- a/src/ptbench/data/shenzhen/__init__.py +++ b/src/ptbench/data/shenzhen/__init__.py @@ -41,12 +41,12 @@ _protocols = [ importlib.resources.files(__name__).joinpath("fold_9.json.bz2"), ] -_root_path = load_rc().get("datadir.shenzhen", os.path.realpath(os.curdir)) +_datadir = load_rc().get("datadir.shenzhen", os.path.realpath(os.curdir)) def _raw_data_loader(sample): return dict( - data=load_pil_baw(os.path.join(_root_path, sample["data"])), + data=load_pil_baw(os.path.join(_datadir, sample["data"])), label=sample["label"], ) diff --git a/src/ptbench/data/tbpoc/__init__.py b/src/ptbench/data/tbpoc/__init__.py index 7ee817cf..0eba11a3 100644 --- a/src/ptbench/data/tbpoc/__init__.py +++ b/src/ptbench/data/tbpoc/__init__.py @@ -34,12 +34,12 @@ _protocols = [ importlib.resources.files(__name__).joinpath("fold_9.json.bz2"), ] -_root_path = load_rc().get("datadir.tbpoc", os.path.realpath(os.curdir)) +_datadir = load_rc().get("datadir.tbpoc", os.path.realpath(os.curdir)) def _raw_data_loader(sample): return dict( - data=load_pil_baw(os.path.join(_root_path, sample["data"])), + data=load_pil_baw(os.path.join(_datadir, sample["data"])), label=sample["label"], ) diff --git a/tests/conftest.py b/tests/conftest.py index f1aa8c92..86a852b5 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -2,9 +2,12 @@ # # SPDX-License-Identifier: GPL-3.0-or-later +import os import pathlib +import typing import pytest +import tomli_w @pytest.fixture @@ -67,30 +70,56 @@ def temporary_basedir(tmp_path_factory): return tmp_path_factory.mktemp("test-cli") -@pytest.fixture(scope="session") -def montgomery_datadir(tmp_path_factory) -> pathlib.Path: - from ptbench.utils.rc import load_rc - - database_dir = load_rc().get("datadir.montgomery") - if database_dir is not None: - return pathlib.Path(database_dir) - - # else, we must extract the LFS component - archive = ( - pathlib.Path(__file__).parents[0] / "data" / "lfs" / "test-database.zip" - ) - assert archive.exists(), ( - f"Neither datadir.montgomery is set on the global configuration, " - f"(typically ~/.config/ptbench.toml), or it is possible to detect " - f"the presence of {archive}' (did you git submodule init --update " - f"this submodule?)" - ) - - database_dir = tmp_path_factory.mktemp("montgomery_datadir") +@pytest.fixture(scope="session", autouse=True) +def ensure_montgomery( + tmp_path_factory, +) -> typing.Generator[None, None, None]: + """A pytest fixture that ensures that datadir.montgomery is always + available.""" + import tempfile import zipfile - with zipfile.ZipFile(archive) as zf: - zf.extractall(database_dir) + from ptbench.utils.rc import load_rc + + rc = load_rc() - return database_dir + database_dir = rc.get("datadir.montgomery") + if database_dir is not None: + # if the user downloaded it, use that copy + yield + + else: + # else, we must extract the LFS component (we are likely on the CI) + archive = ( + pathlib.Path(__file__).parents[0] + / "data" + / "lfs" + / "test-database.zip" + ) + assert archive.exists(), ( + f"Neither datadir.montgomery is set on the global configuration, " + f"(typically ~/.config/ptbench.toml), or it is possible to detect " + f"the presence of {archive}' (did you git submodule init --update " + f"this submodule?)" + ) + + database_dir = tmp_path_factory.mktemp("montgomery_datadir") + rc.setdefault("datadir.montgomery", str(database_dir)) + + with zipfile.ZipFile(archive) as zf: + zf.extractall(database_dir) + + with tempfile.TemporaryDirectory() as tmpdir: + config_filename = "ptbench.toml" + with open(os.path.join(tmpdir, config_filename), "wb") as f: + tomli_w.dump(rc.data, f) + f.flush() + + old_config_home = os.environ.get("XDG_CONFIG_HOME") + os.environ["XDG_CONFIG_HOME"] = tmpdir + yield + if old_config_home is None: + del os.environ["XDG_CONFIG_HOME"] + else: + os.environ["XDG_CONFIG_HOME"] = old_config_home diff --git a/tests/test_cli.py b/tests/test_cli.py index 6507e3d0..3ed16d08 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -7,29 +7,10 @@ import contextlib import os import re -import tempfile - -import tomli_w from click.testing import CliRunner -@contextlib.contextmanager -def rc_context(**new_config): - with tempfile.TemporaryDirectory() as tmpdir: - config_filename = "ptbench.toml" - with open(os.path.join(tmpdir, config_filename), "wb") as f: - tomli_w.dump(new_config, f) - f.flush() - old_config_home = os.environ.get("XDG_CONFIG_HOME") - os.environ["XDG_CONFIG_HOME"] = tmpdir - yield - if old_config_home is None: - del os.environ["XDG_CONFIG_HOME"] - else: - os.environ["XDG_CONFIG_HOME"] = old_config_home - - @contextlib.contextmanager def stdout_logging(): # copy logging messages to std out @@ -187,261 +168,236 @@ def test_compare_help(): _check_help(compare) -def test_train_pasa_montgomery(temporary_basedir, montgomery_datadir): - # Temporarily modifies Montgomery datadir if need be - new_value = {"datadir.montgomery": str(montgomery_datadir)} - with rc_context(**new_value): - from ptbench.scripts.train import train - - runner = CliRunner() - - with stdout_logging() as buf: - output_folder = str(temporary_basedir / "results") - result = runner.invoke( - train, - [ - "pasa", - "montgomery", - "-vv", - "--epochs=1", - "--batch-size=1", - "--normalization=current", - f"--output-folder={output_folder}", - ], - ) - _assert_exit_0(result) +def test_train_pasa_montgomery(temporary_basedir): + from ptbench.scripts.train import train - assert os.path.exists( - os.path.join(output_folder, "model_final_epoch.pth") - ) - assert os.path.exists( - os.path.join(output_folder, "model_lowest_valid_loss.pth") - ) - assert os.path.exists( - os.path.join(output_folder, "last_checkpoint") - ) - assert os.path.exists(os.path.join(output_folder, "constants.csv")) - assert os.path.exists(os.path.join(output_folder, "trainlog.csv")) - assert os.path.exists( - os.path.join(output_folder, "model_summary.txt") - ) + runner = CliRunner() - keywords = { - r"^Found \(dedicated\) '__train__' set for training$": 1, - r"^Found \(dedicated\) '__valid__' set for validation$": 1, - r"^Continuing from epoch 0$": 1, - r"^Saving model summary at.*$": 1, - r"^Model has.*$": 1, - r"^Saving checkpoint": 2, - r"^Total training time:": 1, - r"^Z-normalization with mean": 1, - } - buf.seek(0) - logging_output = buf.read() - - for k, v in keywords.items(): - assert _str_counter(k, logging_output) == v, ( - f"Count for string '{k}' appeared " - f"({_str_counter(k, logging_output)}) " - f"instead of the expected {v}:\nOutput:\n{logging_output}" - ) - - -def test_predict_pasa_montgomery( - temporary_basedir, montgomery_datadir, datadir -): - # Temporarily modifies Montgomery datadir if need be - new_value = {"datadir.montgomery": str(montgomery_datadir)} - with rc_context(**new_value): - from ptbench.scripts.predict import predict - - runner = CliRunner() - - with stdout_logging() as buf: - output_folder = str(temporary_basedir / "predictions") - result = runner.invoke( - predict, - [ - "pasa", - "montgomery", - "-vv", - "--batch-size=1", - "--relevance-analysis", - f"--weight={str(datadir / 'lfs' / 'models' / 'pasa.pth')}", - f"--output-folder={output_folder}", - ], - ) - _assert_exit_0(result) + with stdout_logging() as buf: + output_folder = str(temporary_basedir / "results") + result = runner.invoke( + train, + [ + "pasa", + "montgomery", + "-vv", + "--epochs=1", + "--batch-size=1", + "--normalization=current", + f"--output-folder={output_folder}", + ], + ) + _assert_exit_0(result) - # check predictions are there - predictions_file1 = os.path.join( - output_folder, "train/predictions.csv" - ) - predictions_file2 = os.path.join( - output_folder, "validation/predictions.csv" - ) - predictions_file3 = os.path.join( - output_folder, "test/predictions.csv" - ) - assert os.path.exists(predictions_file1) - assert os.path.exists(predictions_file2) - assert os.path.exists(predictions_file3) - - keywords = { - r"^Loading checkpoint from.*$": 1, - r"^Total time:.*$": 3, - r"^Relevance analysis.*$": 3, - } - buf.seek(0) - logging_output = buf.read() - - for k, v in keywords.items(): - assert _str_counter(k, logging_output) == v, ( - f"Count for string '{k}' appeared " - f"({_str_counter(k, logging_output)}) " - f"instead of the expected {v}:\nOutput:\n{logging_output}" - ) - - -def test_predtojson(datadir, temporary_basedir, montgomery_datadir): - # Temporarily modify Montgomery datadir if need be - new_value = {"datadir.montgomery": str(montgomery_datadir)} - with rc_context(**new_value): - from ptbench.scripts.predtojson import predtojson - - runner = CliRunner() - - with stdout_logging() as buf: - predictions = str(datadir / "test_predictions.csv") - output_folder = str(temporary_basedir / "pred_to_json") - result = runner.invoke( - predtojson, - [ - "-vv", - "train", - f"{predictions}", - "test", - f"{predictions}", - f"--output-folder={output_folder}", - ], - ) - _assert_exit_0(result) - - # check json file is there - assert os.path.exists(os.path.join(output_folder, "dataset.json")) - - keywords = { - f"Output folder: {output_folder}": 1, - r"Saving JSON file...": 1, - r"^Loading predictions from.*$": 2, - } - buf.seek(0) - logging_output = buf.read() - - for k, v in keywords.items(): - assert _str_counter(k, logging_output) == v, ( - f"Count for string '{k}' appeared " - f"({_str_counter(k, logging_output)}) " - f"instead of the expected {v}:\nOutput:\n{logging_output}" - ) - - -def test_evaluate_pasa_montgomery(temporary_basedir, montgomery_datadir): - # Temporarily modify Montgomery datadir if need be - new_value = {"datadir.montgomery": str(montgomery_datadir)} - with rc_context(**new_value): - from ptbench.scripts.evaluate import evaluate - - runner = CliRunner() - - with stdout_logging() as buf: - prediction_folder = str(temporary_basedir / "predictions") - output_folder = str(temporary_basedir / "evaluations") - result = runner.invoke( - evaluate, - [ - "-vv", - "montgomery", - f"--predictions-folder={prediction_folder}", - f"--output-folder={output_folder}", - "--threshold=train", - "--steps=2000", - ], + assert os.path.exists( + os.path.join(output_folder, "model_final_epoch.pth") + ) + assert os.path.exists( + os.path.join(output_folder, "model_lowest_valid_loss.pth") + ) + assert os.path.exists(os.path.join(output_folder, "last_checkpoint")) + assert os.path.exists(os.path.join(output_folder, "constants.csv")) + assert os.path.exists(os.path.join(output_folder, "trainlog.csv")) + assert os.path.exists(os.path.join(output_folder, "model_summary.txt")) + + keywords = { + r"^Found \(dedicated\) '__train__' set for training$": 1, + r"^Found \(dedicated\) '__valid__' set for validation$": 1, + r"^Continuing from epoch 0$": 1, + r"^Saving model summary at.*$": 1, + r"^Model has.*$": 1, + r"^Saving checkpoint": 2, + r"^Total training time:": 1, + r"^Z-normalization with mean": 1, + } + buf.seek(0) + logging_output = buf.read() + + for k, v in keywords.items(): + assert _str_counter(k, logging_output) == v, ( + f"Count for string '{k}' appeared " + f"({_str_counter(k, logging_output)}) " + f"instead of the expected {v}:\nOutput:\n{logging_output}" ) - _assert_exit_0(result) - # check evaluations are there - assert os.path.exists(os.path.join(output_folder, "test.csv")) - assert os.path.exists(os.path.join(output_folder, "train.csv")) - assert os.path.exists( - os.path.join(output_folder, "test_score_table.pdf") + +def test_predict_pasa_montgomery(temporary_basedir, datadir): + from ptbench.scripts.predict import predict + + runner = CliRunner() + + with stdout_logging() as buf: + output_folder = str(temporary_basedir / "predictions") + result = runner.invoke( + predict, + [ + "pasa", + "montgomery", + "-vv", + "--batch-size=1", + "--relevance-analysis", + f"--weight={str(datadir / 'lfs' / 'models' / 'pasa.pth')}", + f"--output-folder={output_folder}", + ], + ) + _assert_exit_0(result) + + # check predictions are there + predictions_file1 = os.path.join(output_folder, "train/predictions.csv") + predictions_file2 = os.path.join( + output_folder, "validation/predictions.csv" + ) + predictions_file3 = os.path.join(output_folder, "test/predictions.csv") + assert os.path.exists(predictions_file1) + assert os.path.exists(predictions_file2) + assert os.path.exists(predictions_file3) + + keywords = { + r"^Loading checkpoint from.*$": 1, + r"^Total time:.*$": 3, + r"^Relevance analysis.*$": 3, + } + buf.seek(0) + logging_output = buf.read() + + for k, v in keywords.items(): + assert _str_counter(k, logging_output) == v, ( + f"Count for string '{k}' appeared " + f"({_str_counter(k, logging_output)}) " + f"instead of the expected {v}:\nOutput:\n{logging_output}" ) - assert os.path.exists( - os.path.join(output_folder, "train_score_table.pdf") + + +def test_predtojson(datadir, temporary_basedir): + from ptbench.scripts.predtojson import predtojson + + runner = CliRunner() + + with stdout_logging() as buf: + predictions = str(datadir / "test_predictions.csv") + output_folder = str(temporary_basedir / "pred_to_json") + result = runner.invoke( + predtojson, + [ + "-vv", + "train", + f"{predictions}", + "test", + f"{predictions}", + f"--output-folder={output_folder}", + ], + ) + _assert_exit_0(result) + + # check json file is there + assert os.path.exists(os.path.join(output_folder, "dataset.json")) + + keywords = { + f"Output folder: {output_folder}": 1, + r"Saving JSON file...": 1, + r"^Loading predictions from.*$": 2, + } + buf.seek(0) + logging_output = buf.read() + + for k, v in keywords.items(): + assert _str_counter(k, logging_output) == v, ( + f"Count for string '{k}' appeared " + f"({_str_counter(k, logging_output)}) " + f"instead of the expected {v}:\nOutput:\n{logging_output}" ) - keywords = { - r"^Skipping dataset '__train__'": 1, - r"^Evaluating threshold on.*$": 1, - r"^Maximum F1-score of.*$": 4, - r"^Set --f1_threshold=.*$": 1, - r"^Set --eer_threshold=.*$": 1, - } - buf.seek(0) - logging_output = buf.read() - - for k, v in keywords.items(): - assert _str_counter(k, logging_output) == v, ( - f"Count for string '{k}' appeared " - f"({_str_counter(k, logging_output)}) " - f"instead of the expected {v}:\nOutput:\n{logging_output}" - ) - - -def test_compare_pasa_montgomery(temporary_basedir, montgomery_datadir): - # Temporarily modify Montgomery datadir if need be - new_value = {"datadir.montgomery": str(montgomery_datadir)} - with rc_context(**new_value): - from ptbench.scripts.compare import compare - - runner = CliRunner() - - with stdout_logging() as buf: - predictions_folder = str(temporary_basedir / "predictions") - output_folder = str(temporary_basedir / "comparisons") - result = runner.invoke( - compare, - [ - "-vv", - "train", - f"{predictions_folder}/train/predictions.csv", - "test", - f"{predictions_folder}/test/predictions.csv", - f"--output-figure={output_folder}/compare.pdf", - f"--output-table={output_folder}/table.txt", - "--threshold=0.5", - ], + +def test_evaluate_pasa_montgomery(temporary_basedir): + from ptbench.scripts.evaluate import evaluate + + runner = CliRunner() + + with stdout_logging() as buf: + prediction_folder = str(temporary_basedir / "predictions") + output_folder = str(temporary_basedir / "evaluations") + result = runner.invoke( + evaluate, + [ + "-vv", + "montgomery", + f"--predictions-folder={prediction_folder}", + f"--output-folder={output_folder}", + "--threshold=train", + "--steps=2000", + ], + ) + _assert_exit_0(result) + + # check evaluations are there + assert os.path.exists(os.path.join(output_folder, "test.csv")) + assert os.path.exists(os.path.join(output_folder, "train.csv")) + assert os.path.exists( + os.path.join(output_folder, "test_score_table.pdf") + ) + assert os.path.exists( + os.path.join(output_folder, "train_score_table.pdf") + ) + + keywords = { + r"^Skipping dataset '__train__'": 1, + r"^Evaluating threshold on.*$": 1, + r"^Maximum F1-score of.*$": 4, + r"^Set --f1_threshold=.*$": 1, + r"^Set --eer_threshold=.*$": 1, + } + buf.seek(0) + logging_output = buf.read() + + for k, v in keywords.items(): + assert _str_counter(k, logging_output) == v, ( + f"Count for string '{k}' appeared " + f"({_str_counter(k, logging_output)}) " + f"instead of the expected {v}:\nOutput:\n{logging_output}" ) - _assert_exit_0(result) - # check comparisons are there - assert os.path.exists(os.path.join(output_folder, "compare.pdf")) - assert os.path.exists(os.path.join(output_folder, "table.txt")) - keywords = { - r"^Dataset '\*': threshold =.*$": 1, - r"^Loading predictions from.*$": 2, - r"^Tabulating performance summary...": 1, - } - buf.seek(0) - logging_output = buf.read() +def test_compare_pasa_montgomery(temporary_basedir): + from ptbench.scripts.compare import compare + + runner = CliRunner() + + with stdout_logging() as buf: + predictions_folder = str(temporary_basedir / "predictions") + output_folder = str(temporary_basedir / "comparisons") + result = runner.invoke( + compare, + [ + "-vv", + "train", + f"{predictions_folder}/train/predictions.csv", + "test", + f"{predictions_folder}/test/predictions.csv", + f"--output-figure={output_folder}/compare.pdf", + f"--output-table={output_folder}/table.txt", + "--threshold=0.5", + ], + ) + _assert_exit_0(result) + + # check comparisons are there + assert os.path.exists(os.path.join(output_folder, "compare.pdf")) + assert os.path.exists(os.path.join(output_folder, "table.txt")) + + keywords = { + r"^Dataset '\*': threshold =.*$": 1, + r"^Loading predictions from.*$": 2, + r"^Tabulating performance summary...": 1, + } + buf.seek(0) + logging_output = buf.read() - for k, v in keywords.items(): - assert _str_counter(k, logging_output) == v, ( - f"Count for string '{k}' appeared " - f"({_str_counter(k, logging_output)}) " - f"instead of the expected {v}:\nOutput:\n{logging_output}" - ) + for k, v in keywords.items(): + assert _str_counter(k, logging_output) == v, ( + f"Count for string '{k}' appeared " + f"({_str_counter(k, logging_output)}) " + f"instead of the expected {v}:\nOutput:\n{logging_output}" + ) def test_train_signstotb_montgomery_rs(temporary_basedir, datadir): @@ -638,115 +594,108 @@ def test_predict_logreg_montgomery_rs(temporary_basedir, datadir): ) -def test_aggregpred(temporary_basedir, montgomery_datadir): - # Temporarily modify Montgomery datadir if need be - new_value = {"datadir.montgomery": str(montgomery_datadir)} - with rc_context(**new_value): - from ptbench.scripts.aggregpred import aggregpred +def test_aggregpred(temporary_basedir): + from ptbench.scripts.aggregpred import aggregpred - runner = CliRunner() + runner = CliRunner() - with stdout_logging() as buf: - predictions = str( - temporary_basedir / "predictions" / "train" / "predictions.csv" - ) - output_folder = str(temporary_basedir / "aggregpred") - result = runner.invoke( - aggregpred, - [ - "-vv", - f"{predictions}", - f"{predictions}", - f"--output-folder={output_folder}", - ], - ) - _assert_exit_0(result) + with stdout_logging() as buf: + predictions = str( + temporary_basedir / "predictions" / "train" / "predictions.csv" + ) + output_folder = str(temporary_basedir / "aggregpred") + result = runner.invoke( + aggregpred, + [ + "-vv", + f"{predictions}", + f"{predictions}", + f"--output-folder={output_folder}", + ], + ) + _assert_exit_0(result) - # check csv file is there - assert os.path.exists(os.path.join(output_folder, "aggregpred.csv")) + # check csv file is there + assert os.path.exists(os.path.join(output_folder, "aggregpred.csv")) - keywords = { - f"Output folder: {output_folder}": 1, - r"Saving aggregated CSV file...": 1, - r"^Loading predictions from.*$": 2, - } - buf.seek(0) - logging_output = buf.read() + keywords = { + f"Output folder: {output_folder}": 1, + r"Saving aggregated CSV file...": 1, + r"^Loading predictions from.*$": 2, + } + buf.seek(0) + logging_output = buf.read() - for k, v in keywords.items(): - assert _str_counter(k, logging_output) == v, ( - f"Count for string '{k}' appeared " - f"({_str_counter(k, logging_output)}) " - f"instead of the expected {v}:\nOutput:\n{logging_output}" - ) + for k, v in keywords.items(): + assert _str_counter(k, logging_output) == v, ( + f"Count for string '{k}' appeared " + f"({_str_counter(k, logging_output)}) " + f"instead of the expected {v}:\nOutput:\n{logging_output}" + ) # Not enough RAM available to do this test -# def test_predict_densenetrs_montgomery(temporary_basedir, montgomery_datadir, datadir): - -# # Temporarily modify Montgomery datadir if need be -# new_value = {"datadir.montgomery": str(montgomery_datadir)} -# with rc_context(**new_value): - -# from ptbench.scripts.predict import predict - -# runner = CliRunner() - -# with stdout_logging() as buf: - -# output_folder = str(temporary_basedir / "predictions") -# result = runner.invoke( -# predict, -# [ -# "densenet_rs", -# "montgomery_f0_rgb", -# "-vv", -# "--batch-size=1", -# f"--weight={str(datadir / 'lfs' / 'models' / 'densenetrs.pth')}", -# f"--output-folder={output_folder}", -# "--grad-cams" -# ], -# ) -# _assert_exit_0(result) - -# # check predictions are there -# predictions_file1 = os.path.join(output_folder, "train/predictions.csv") -# predictions_file2 = os.path.join(output_folder, "validation/predictions.csv") -# predictions_file3 = os.path.join(output_folder, "test/predictions.csv") -# assert os.path.exists(predictions_file1) -# assert os.path.exists(predictions_file2) -# assert os.path.exists(predictions_file3) -# # check some grad cams are there -# cam1 = os.path.join(output_folder, "train/cams/MCUCXR_0002_0_cam.png") -# cam2 = os.path.join(output_folder, "train/cams/MCUCXR_0126_1_cam.png") -# cam3 = os.path.join(output_folder, "train/cams/MCUCXR_0275_1_cam.png") -# cam4 = os.path.join(output_folder, "validation/cams/MCUCXR_0399_1_cam.png") -# cam5 = os.path.join(output_folder, "validation/cams/MCUCXR_0113_1_cam.png") -# cam6 = os.path.join(output_folder, "validation/cams/MCUCXR_0013_0_cam.png") -# cam7 = os.path.join(output_folder, "test/cams/MCUCXR_0027_0_cam.png") -# cam8 = os.path.join(output_folder, "test/cams/MCUCXR_0094_0_cam.png") -# cam9 = os.path.join(output_folder, "test/cams/MCUCXR_0375_1_cam.png") -# assert os.path.exists(cam1) -# assert os.path.exists(cam2) -# assert os.path.exists(cam3) -# assert os.path.exists(cam4) -# assert os.path.exists(cam5) -# assert os.path.exists(cam6) -# assert os.path.exists(cam7) -# assert os.path.exists(cam8) -# assert os.path.exists(cam9) - -# keywords = { -# r"^Loading checkpoint from.*$": 1, -# r"^Total time:.*$": 3, -# r"^Grad cams folder:.*$": 3, -# } -# buf.seek(0) -# logging_output = buf.read() - -# for k, v in keywords.items(): -# assert _str_counter(k, logging_output) == v, ( -# f"Count for string '{k}' appeared " -# f"({_str_counter(k, logging_output)}) " -# f"instead of the expected {v}:\nOutput:\n{logging_output}" -# ) +# def test_predict_densenetrs_montgomery(temporary_basedir, datadir): + +# from ptbench.scripts.predict import predict + +# runner = CliRunner() + +# with stdout_logging() as buf: + +# output_folder = str(temporary_basedir / "predictions") +# result = runner.invoke( +# predict, +# [ +# "densenet_rs", +# "montgomery_f0_rgb", +# "-vv", +# "--batch-size=1", +# f"--weight={str(datadir / 'lfs' / 'models' / 'densenetrs.pth')}", +# f"--output-folder={output_folder}", +# "--grad-cams" +# ], +# ) +# _assert_exit_0(result) + +# # check predictions are there +# predictions_file1 = os.path.join(output_folder, "train/predictions.csv") +# predictions_file2 = os.path.join(output_folder, "validation/predictions.csv") +# predictions_file3 = os.path.join(output_folder, "test/predictions.csv") +# assert os.path.exists(predictions_file1) +# assert os.path.exists(predictions_file2) +# assert os.path.exists(predictions_file3) +# # check some grad cams are there +# cam1 = os.path.join(output_folder, "train/cams/MCUCXR_0002_0_cam.png") +# cam2 = os.path.join(output_folder, "train/cams/MCUCXR_0126_1_cam.png") +# cam3 = os.path.join(output_folder, "train/cams/MCUCXR_0275_1_cam.png") +# cam4 = os.path.join(output_folder, "validation/cams/MCUCXR_0399_1_cam.png") +# cam5 = os.path.join(output_folder, "validation/cams/MCUCXR_0113_1_cam.png") +# cam6 = os.path.join(output_folder, "validation/cams/MCUCXR_0013_0_cam.png") +# cam7 = os.path.join(output_folder, "test/cams/MCUCXR_0027_0_cam.png") +# cam8 = os.path.join(output_folder, "test/cams/MCUCXR_0094_0_cam.png") +# cam9 = os.path.join(output_folder, "test/cams/MCUCXR_0375_1_cam.png") +# assert os.path.exists(cam1) +# assert os.path.exists(cam2) +# assert os.path.exists(cam3) +# assert os.path.exists(cam4) +# assert os.path.exists(cam5) +# assert os.path.exists(cam6) +# assert os.path.exists(cam7) +# assert os.path.exists(cam8) +# assert os.path.exists(cam9) + +# keywords = { +# r"^Loading checkpoint from.*$": 1, +# r"^Total time:.*$": 3, +# r"^Grad cams folder:.*$": 3, +# } +# buf.seek(0) +# logging_output = buf.read() + +# for k, v in keywords.items(): +# assert _str_counter(k, logging_output) == v, ( +# f"Count for string '{k}' appeared " +# f"({_str_counter(k, logging_output)}) " +# f"instead of the expected {v}:\nOutput:\n{logging_output}" +# ) diff --git a/tests/test_config.py b/tests/test_config.py index 6493892b..91f52a5a 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -2,13 +2,8 @@ # # SPDX-License-Identifier: GPL-3.0-or-later -import contextlib -import os -import tempfile - import numpy as np import pytest -import tomli_w import torch from torch.utils.data import ConcatDataset @@ -21,23 +16,6 @@ from ptbench.configs.datasets import get_positive_weights, get_samples_weights N = 10 -@contextlib.contextmanager -def rc_context(**new_config): - with tempfile.TemporaryDirectory() as tmpdir: - config_filename = "ptbench.toml" - with open(os.path.join(tmpdir, config_filename), "wb") as f: - tomli_w.dump(new_config, f) - f.flush() - old_config_home = os.environ.get("XDG_CONFIG_HOME") - os.environ["XDG_CONFIG_HOME"] = tmpdir - yield - if old_config_home is None: - del os.environ["XDG_CONFIG_HOME"] - else: - os.environ["XDG_CONFIG_HOME"] = old_config_home - - -@pytest.mark.skip_if_rc_var_not_set("datadir.montgomery") def test_montgomery(): def _check_subset(samples, size): assert len(samples) == size @@ -60,20 +38,15 @@ def test_montgomery(): _check_subset(dataset["test"], 28) -def test_get_samples_weights(montgomery_datadir): - # Temporarily modify Montgomery datadir - new_value = {"datadir.montgomery": str(montgomery_datadir)} - with rc_context(**new_value): - from ptbench.configs.datasets.montgomery.default import dataset +def test_get_samples_weights(): + from ptbench.configs.datasets.montgomery.default import dataset - train_samples_weights = get_samples_weights( - dataset["__train__"] - ).numpy() + train_samples_weights = get_samples_weights(dataset["__train__"]).numpy() - unique, counts = np.unique(train_samples_weights, return_counts=True) + unique, counts = np.unique(train_samples_weights, return_counts=True) - np.testing.assert_equal(counts, np.array([51, 37])) - np.testing.assert_equal(unique, np.array(1 / counts, dtype=np.float32)) + np.testing.assert_equal(counts, np.array([51, 37])) + np.testing.assert_equal(unique, np.array(1 / counts, dtype=np.float32)) @pytest.mark.skip_if_rc_var_not_set("datadir.nih_cxr14_re") @@ -87,22 +60,17 @@ def test_get_samples_weights_multi(): ) -def test_get_samples_weights_concat(montgomery_datadir): - # Temporarily modify Montgomery datadir - new_value = {"datadir.montgomery": str(montgomery_datadir)} - with rc_context(**new_value): - from ptbench.configs.datasets.montgomery.default import dataset +def test_get_samples_weights_concat(): + from ptbench.configs.datasets.montgomery.default import dataset - train_dataset = ConcatDataset( - (dataset["__train__"], dataset["__train__"]) - ) + train_dataset = ConcatDataset((dataset["__train__"], dataset["__train__"])) - train_samples_weights = get_samples_weights(train_dataset).numpy() + train_samples_weights = get_samples_weights(train_dataset).numpy() - unique, counts = np.unique(train_samples_weights, return_counts=True) + unique, counts = np.unique(train_samples_weights, return_counts=True) - np.testing.assert_equal(counts, np.array([102, 74])) - np.testing.assert_equal(unique, np.array(2 / counts, dtype=np.float32)) + np.testing.assert_equal(counts, np.array([102, 74])) + np.testing.assert_equal(unique, np.array(2 / counts, dtype=np.float32)) @pytest.mark.skip_if_rc_var_not_set("datadir.nih_cxr14_re") @@ -127,19 +95,14 @@ def test_get_samples_weights_multi_concat(): np.testing.assert_equal(train_samples_weights, ref_samples_weights) -def test_get_positive_weights(montgomery_datadir): - # Temporarily modify Montgomery datadir - new_value = {"datadir.montgomery": str(montgomery_datadir)} - with rc_context(**new_value): - from ptbench.configs.datasets.montgomery.default import dataset +def test_get_positive_weights(): + from ptbench.configs.datasets.montgomery.default import dataset - train_positive_weights = get_positive_weights( - dataset["__train__"] - ).numpy() + train_positive_weights = get_positive_weights(dataset["__train__"]).numpy() - np.testing.assert_equal( - train_positive_weights, np.array([51.0 / 37.0], dtype=np.float32) - ) + np.testing.assert_equal( + train_positive_weights, np.array([51.0 / 37.0], dtype=np.float32) + ) @pytest.mark.skip_if_rc_var_not_set("datadir.nih_cxr14_re") @@ -204,21 +167,16 @@ def test_get_positive_weights_multi(): ) -def test_get_positive_weights_concat(montgomery_datadir): - # Temporarily modify Montgomery datadir - new_value = {"datadir.montgomery": str(montgomery_datadir)} - with rc_context(**new_value): - from ptbench.configs.datasets.montgomery.default import dataset +def test_get_positive_weights_concat(): + from ptbench.configs.datasets.montgomery.default import dataset - train_dataset = ConcatDataset( - (dataset["__train__"], dataset["__train__"]) - ) + train_dataset = ConcatDataset((dataset["__train__"], dataset["__train__"])) - train_positive_weights = get_positive_weights(train_dataset).numpy() + train_positive_weights = get_positive_weights(train_dataset).numpy() - np.testing.assert_equal( - train_positive_weights, np.array([51.0 / 37.0], dtype=np.float32) - ) + np.testing.assert_equal( + train_positive_weights, np.array([51.0 / 37.0], dtype=np.float32) + ) @pytest.mark.skip_if_rc_var_not_set("datadir.nih_cxr14_re") -- GitLab