From bd6da859599007d39ed50a0d4e8710c93cfa806f Mon Sep 17 00:00:00 2001
From: Andre Anjos <andre.dos.anjos@gmail.com>
Date: Thu, 23 Feb 2023 09:56:59 +0100
Subject: [PATCH] [tests] Improve access to datadir.montgomery rc variable
 throughout the test units

---
 doc/catalog.json                          |   4 +-
 src/ptbench/data/hivtb/__init__.py        |   4 +-
 src/ptbench/data/indian/__init__.py       |   4 +-
 src/ptbench/data/montgomery/__init__.py   |  12 +-
 src/ptbench/data/nih_cxr14_re/__init__.py |   6 +-
 src/ptbench/data/padchest/__init__.py     |   4 +-
 src/ptbench/data/shenzhen/__init__.py     |   4 +-
 src/ptbench/data/tbpoc/__init__.py        |   4 +-
 tests/conftest.py                         |  75 ++-
 tests/test_cli.py                         | 681 ++++++++++------------
 tests/test_config.py                      |  94 +--
 11 files changed, 411 insertions(+), 481 deletions(-)

diff --git a/doc/catalog.json b/doc/catalog.json
index b3af4a0c..0a78c0e3 100644
--- a/doc/catalog.json
+++ b/doc/catalog.json
@@ -1,8 +1,8 @@
 {
   "clapp": {
     "versions": {
-      "stable": "https://www.idiap.ch/software/biosignal/docs/software/clapp/stable/sphinx/",
-      "latest": "https://www.idiap.ch/software/biosignal/docs/software/clapp/main/sphinx/"
+      "stable": "https://clapp.readthedocs.io/en/stable/",
+      "latest": "https://clapp.readthedocs.io/en/latest/"
     },
     "sources": {}
   }
diff --git a/src/ptbench/data/hivtb/__init__.py b/src/ptbench/data/hivtb/__init__.py
index d743968d..dd87f9a4 100644
--- a/src/ptbench/data/hivtb/__init__.py
+++ b/src/ptbench/data/hivtb/__init__.py
@@ -34,12 +34,12 @@ _protocols = [
     importlib.resources.files(__name__).joinpath("fold_9.json.bz2"),
 ]
 
-_root_path = load_rc().get("datadir.hivtb", os.path.realpath(os.curdir))
+_datadir = load_rc().get("datadir.hivtb", os.path.realpath(os.curdir))
 
 
 def _raw_data_loader(sample):
     return dict(
-        data=load_pil_baw(os.path.join(_root_path, sample["data"])),
+        data=load_pil_baw(os.path.join(_datadir, sample["data"])),
         label=sample["label"],
     )
 
diff --git a/src/ptbench/data/indian/__init__.py b/src/ptbench/data/indian/__init__.py
index 80e50dbc..dc1b9d99 100644
--- a/src/ptbench/data/indian/__init__.py
+++ b/src/ptbench/data/indian/__init__.py
@@ -34,12 +34,12 @@ _protocols = [
     importlib.resources.files(__name__).joinpath("fold_9.json.bz2"),
 ]
 
-_root_path = load_rc().get("datadir.indian", os.path.realpath(os.curdir))
+_datadir = load_rc().get("datadir.indian", os.path.realpath(os.curdir))
 
 
 def _raw_data_loader(sample):
     return dict(
-        data=load_pil_baw(os.path.join(_root_path, sample["data"])),
+        data=load_pil_baw(os.path.join(_datadir, sample["data"])),
         label=sample["label"],
     )
 
diff --git a/src/ptbench/data/montgomery/__init__.py b/src/ptbench/data/montgomery/__init__.py
index fd65f24d..719941f9 100644
--- a/src/ptbench/data/montgomery/__init__.py
+++ b/src/ptbench/data/montgomery/__init__.py
@@ -40,25 +40,19 @@ _protocols = [
     importlib.resources.files(__name__).joinpath("fold_9.json.bz2"),
 ]
 
-_root_path = None
+_datadir = load_rc().get("datadir.montgomery", os.path.realpath(os.curdir))
 
 
 def _raw_data_loader(sample):
-    # hack to allow tests to change "datadir.montgomery"
-    global _root_path
-    _root_path = _root_path or load_rc().get(
-        "datadir.montgomery", os.path.realpath(os.curdir)
-    )
-
     return dict(
-        data=load_pil_baw(os.path.join(_root_path, sample["data"])),  # type: ignore
+        data=load_pil_baw(os.path.join(_datadir, sample["data"])),  # type: ignore
         label=sample["label"],
     )
 
 
 def _loader(context, sample):
     # "context" is ignored in this case - database is homogeneous
-    # we returned delayed samples to avoid loading all images at once
+    # we return delayed samples to avoid loading all images at once
     return make_delayed(sample, _raw_data_loader)
 
 
diff --git a/src/ptbench/data/nih_cxr14_re/__init__.py b/src/ptbench/data/nih_cxr14_re/__init__.py
index 5cefea42..8dc3f223 100644
--- a/src/ptbench/data/nih_cxr14_re/__init__.py
+++ b/src/ptbench/data/nih_cxr14_re/__init__.py
@@ -38,7 +38,7 @@ _protocols = [
     importlib.resources.files(__name__).joinpath("cardiomegaly.json.bz2"),
 ]
 
-_root_path = load_rc().get("datadir.nih_cxr14_re", os.path.realpath(os.curdir))
+_datadir = load_rc().get("datadir.nih_cxr14_re", os.path.realpath(os.curdir))
 _idiap_folders = load_rc().get("nih_cxr14_re.idiap_folder_structure", False)
 
 
@@ -51,7 +51,7 @@ def _raw_data_loader(sample):
         return dict(
             data=load_pil_rgb(
                 os.path.join(
-                    _root_path,
+                    _datadir,
                     os.path.dirname(sample["data"]),
                     basename[:5],
                     basename,
@@ -61,7 +61,7 @@ def _raw_data_loader(sample):
         )
     else:
         return dict(
-            data=load_pil_rgb(os.path.join(_root_path, sample["data"])),
+            data=load_pil_rgb(os.path.join(_datadir, sample["data"])),
             label=sample["label"],
         )
 
diff --git a/src/ptbench/data/padchest/__init__.py b/src/ptbench/data/padchest/__init__.py
index eb4ccfe4..e52b6dd6 100644
--- a/src/ptbench/data/padchest/__init__.py
+++ b/src/ptbench/data/padchest/__init__.py
@@ -236,12 +236,12 @@ _protocols = [
     importlib.resources.files(__name__).joinpath("cardiomegaly_idiap.json.bz2"),
 ]
 
-_root_path = load_rc().get("datadir.padchest", os.path.realpath(os.curdir))
+_datadir = load_rc().get("datadir.padchest", os.path.realpath(os.curdir))
 
 
 def _raw_data_loader(sample):
     return dict(
-        data=load_pil(os.path.join(_root_path, sample["data"])),
+        data=load_pil(os.path.join(_datadir, sample["data"])),
         label=sample["label"],
     )
 
diff --git a/src/ptbench/data/shenzhen/__init__.py b/src/ptbench/data/shenzhen/__init__.py
index a8bcaa3a..c1488160 100644
--- a/src/ptbench/data/shenzhen/__init__.py
+++ b/src/ptbench/data/shenzhen/__init__.py
@@ -41,12 +41,12 @@ _protocols = [
     importlib.resources.files(__name__).joinpath("fold_9.json.bz2"),
 ]
 
-_root_path = load_rc().get("datadir.shenzhen", os.path.realpath(os.curdir))
+_datadir = load_rc().get("datadir.shenzhen", os.path.realpath(os.curdir))
 
 
 def _raw_data_loader(sample):
     return dict(
-        data=load_pil_baw(os.path.join(_root_path, sample["data"])),
+        data=load_pil_baw(os.path.join(_datadir, sample["data"])),
         label=sample["label"],
     )
 
diff --git a/src/ptbench/data/tbpoc/__init__.py b/src/ptbench/data/tbpoc/__init__.py
index 7ee817cf..0eba11a3 100644
--- a/src/ptbench/data/tbpoc/__init__.py
+++ b/src/ptbench/data/tbpoc/__init__.py
@@ -34,12 +34,12 @@ _protocols = [
     importlib.resources.files(__name__).joinpath("fold_9.json.bz2"),
 ]
 
-_root_path = load_rc().get("datadir.tbpoc", os.path.realpath(os.curdir))
+_datadir = load_rc().get("datadir.tbpoc", os.path.realpath(os.curdir))
 
 
 def _raw_data_loader(sample):
     return dict(
-        data=load_pil_baw(os.path.join(_root_path, sample["data"])),
+        data=load_pil_baw(os.path.join(_datadir, sample["data"])),
         label=sample["label"],
     )
 
diff --git a/tests/conftest.py b/tests/conftest.py
index f1aa8c92..86a852b5 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -2,9 +2,12 @@
 #
 # SPDX-License-Identifier: GPL-3.0-or-later
 
+import os
 import pathlib
+import typing
 
 import pytest
+import tomli_w
 
 
 @pytest.fixture
@@ -67,30 +70,56 @@ def temporary_basedir(tmp_path_factory):
     return tmp_path_factory.mktemp("test-cli")
 
 
-@pytest.fixture(scope="session")
-def montgomery_datadir(tmp_path_factory) -> pathlib.Path:
-    from ptbench.utils.rc import load_rc
-
-    database_dir = load_rc().get("datadir.montgomery")
-    if database_dir is not None:
-        return pathlib.Path(database_dir)
-
-    # else, we must extract the LFS component
-    archive = (
-        pathlib.Path(__file__).parents[0] / "data" / "lfs" / "test-database.zip"
-    )
-    assert archive.exists(), (
-        f"Neither datadir.montgomery is set on the global configuration, "
-        f"(typically ~/.config/ptbench.toml), or it is possible to detect "
-        f"the presence of {archive}' (did you git submodule init --update "
-        f"this submodule?)"
-    )
-
-    database_dir = tmp_path_factory.mktemp("montgomery_datadir")
+@pytest.fixture(scope="session", autouse=True)
+def ensure_montgomery(
+    tmp_path_factory,
+) -> typing.Generator[None, None, None]:
+    """A pytest fixture that ensures that datadir.montgomery is always
+    available."""
 
+    import tempfile
     import zipfile
 
-    with zipfile.ZipFile(archive) as zf:
-        zf.extractall(database_dir)
+    from ptbench.utils.rc import load_rc
+
+    rc = load_rc()
 
-    return database_dir
+    database_dir = rc.get("datadir.montgomery")
+    if database_dir is not None:
+        # if the user downloaded it, use that copy
+        yield
+
+    else:
+        # else, we must extract the LFS component (we are likely on the CI)
+        archive = (
+            pathlib.Path(__file__).parents[0]
+            / "data"
+            / "lfs"
+            / "test-database.zip"
+        )
+        assert archive.exists(), (
+            f"Neither datadir.montgomery is set on the global configuration, "
+            f"(typically ~/.config/ptbench.toml), or it is possible to detect "
+            f"the presence of {archive}' (did you git submodule init --update "
+            f"this submodule?)"
+        )
+
+        database_dir = tmp_path_factory.mktemp("montgomery_datadir")
+        rc.setdefault("datadir.montgomery", str(database_dir))
+
+        with zipfile.ZipFile(archive) as zf:
+            zf.extractall(database_dir)
+
+        with tempfile.TemporaryDirectory() as tmpdir:
+            config_filename = "ptbench.toml"
+            with open(os.path.join(tmpdir, config_filename), "wb") as f:
+                tomli_w.dump(rc.data, f)
+                f.flush()
+
+            old_config_home = os.environ.get("XDG_CONFIG_HOME")
+            os.environ["XDG_CONFIG_HOME"] = tmpdir
+            yield
+            if old_config_home is None:
+                del os.environ["XDG_CONFIG_HOME"]
+            else:
+                os.environ["XDG_CONFIG_HOME"] = old_config_home
diff --git a/tests/test_cli.py b/tests/test_cli.py
index 6507e3d0..3ed16d08 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -7,29 +7,10 @@
 import contextlib
 import os
 import re
-import tempfile
-
-import tomli_w
 
 from click.testing import CliRunner
 
 
-@contextlib.contextmanager
-def rc_context(**new_config):
-    with tempfile.TemporaryDirectory() as tmpdir:
-        config_filename = "ptbench.toml"
-        with open(os.path.join(tmpdir, config_filename), "wb") as f:
-            tomli_w.dump(new_config, f)
-            f.flush()
-        old_config_home = os.environ.get("XDG_CONFIG_HOME")
-        os.environ["XDG_CONFIG_HOME"] = tmpdir
-        yield
-        if old_config_home is None:
-            del os.environ["XDG_CONFIG_HOME"]
-        else:
-            os.environ["XDG_CONFIG_HOME"] = old_config_home
-
-
 @contextlib.contextmanager
 def stdout_logging():
     # copy logging messages to std out
@@ -187,261 +168,236 @@ def test_compare_help():
     _check_help(compare)
 
 
-def test_train_pasa_montgomery(temporary_basedir, montgomery_datadir):
-    # Temporarily modifies Montgomery datadir if need be
-    new_value = {"datadir.montgomery": str(montgomery_datadir)}
-    with rc_context(**new_value):
-        from ptbench.scripts.train import train
-
-        runner = CliRunner()
-
-        with stdout_logging() as buf:
-            output_folder = str(temporary_basedir / "results")
-            result = runner.invoke(
-                train,
-                [
-                    "pasa",
-                    "montgomery",
-                    "-vv",
-                    "--epochs=1",
-                    "--batch-size=1",
-                    "--normalization=current",
-                    f"--output-folder={output_folder}",
-                ],
-            )
-            _assert_exit_0(result)
+def test_train_pasa_montgomery(temporary_basedir):
+    from ptbench.scripts.train import train
 
-            assert os.path.exists(
-                os.path.join(output_folder, "model_final_epoch.pth")
-            )
-            assert os.path.exists(
-                os.path.join(output_folder, "model_lowest_valid_loss.pth")
-            )
-            assert os.path.exists(
-                os.path.join(output_folder, "last_checkpoint")
-            )
-            assert os.path.exists(os.path.join(output_folder, "constants.csv"))
-            assert os.path.exists(os.path.join(output_folder, "trainlog.csv"))
-            assert os.path.exists(
-                os.path.join(output_folder, "model_summary.txt")
-            )
+    runner = CliRunner()
 
-            keywords = {
-                r"^Found \(dedicated\) '__train__' set for training$": 1,
-                r"^Found \(dedicated\) '__valid__' set for validation$": 1,
-                r"^Continuing from epoch 0$": 1,
-                r"^Saving model summary at.*$": 1,
-                r"^Model has.*$": 1,
-                r"^Saving checkpoint": 2,
-                r"^Total training time:": 1,
-                r"^Z-normalization with mean": 1,
-            }
-            buf.seek(0)
-            logging_output = buf.read()
-
-            for k, v in keywords.items():
-                assert _str_counter(k, logging_output) == v, (
-                    f"Count for string '{k}' appeared "
-                    f"({_str_counter(k, logging_output)}) "
-                    f"instead of the expected {v}:\nOutput:\n{logging_output}"
-                )
-
-
-def test_predict_pasa_montgomery(
-    temporary_basedir, montgomery_datadir, datadir
-):
-    # Temporarily modifies Montgomery datadir if need be
-    new_value = {"datadir.montgomery": str(montgomery_datadir)}
-    with rc_context(**new_value):
-        from ptbench.scripts.predict import predict
-
-        runner = CliRunner()
-
-        with stdout_logging() as buf:
-            output_folder = str(temporary_basedir / "predictions")
-            result = runner.invoke(
-                predict,
-                [
-                    "pasa",
-                    "montgomery",
-                    "-vv",
-                    "--batch-size=1",
-                    "--relevance-analysis",
-                    f"--weight={str(datadir / 'lfs' / 'models' / 'pasa.pth')}",
-                    f"--output-folder={output_folder}",
-                ],
-            )
-            _assert_exit_0(result)
+    with stdout_logging() as buf:
+        output_folder = str(temporary_basedir / "results")
+        result = runner.invoke(
+            train,
+            [
+                "pasa",
+                "montgomery",
+                "-vv",
+                "--epochs=1",
+                "--batch-size=1",
+                "--normalization=current",
+                f"--output-folder={output_folder}",
+            ],
+        )
+        _assert_exit_0(result)
 
-            # check predictions are there
-            predictions_file1 = os.path.join(
-                output_folder, "train/predictions.csv"
-            )
-            predictions_file2 = os.path.join(
-                output_folder, "validation/predictions.csv"
-            )
-            predictions_file3 = os.path.join(
-                output_folder, "test/predictions.csv"
-            )
-            assert os.path.exists(predictions_file1)
-            assert os.path.exists(predictions_file2)
-            assert os.path.exists(predictions_file3)
-
-            keywords = {
-                r"^Loading checkpoint from.*$": 1,
-                r"^Total time:.*$": 3,
-                r"^Relevance analysis.*$": 3,
-            }
-            buf.seek(0)
-            logging_output = buf.read()
-
-            for k, v in keywords.items():
-                assert _str_counter(k, logging_output) == v, (
-                    f"Count for string '{k}' appeared "
-                    f"({_str_counter(k, logging_output)}) "
-                    f"instead of the expected {v}:\nOutput:\n{logging_output}"
-                )
-
-
-def test_predtojson(datadir, temporary_basedir, montgomery_datadir):
-    # Temporarily modify Montgomery datadir if need be
-    new_value = {"datadir.montgomery": str(montgomery_datadir)}
-    with rc_context(**new_value):
-        from ptbench.scripts.predtojson import predtojson
-
-        runner = CliRunner()
-
-        with stdout_logging() as buf:
-            predictions = str(datadir / "test_predictions.csv")
-            output_folder = str(temporary_basedir / "pred_to_json")
-            result = runner.invoke(
-                predtojson,
-                [
-                    "-vv",
-                    "train",
-                    f"{predictions}",
-                    "test",
-                    f"{predictions}",
-                    f"--output-folder={output_folder}",
-                ],
-            )
-            _assert_exit_0(result)
-
-            # check json file is there
-            assert os.path.exists(os.path.join(output_folder, "dataset.json"))
-
-            keywords = {
-                f"Output folder: {output_folder}": 1,
-                r"Saving JSON file...": 1,
-                r"^Loading predictions from.*$": 2,
-            }
-            buf.seek(0)
-            logging_output = buf.read()
-
-            for k, v in keywords.items():
-                assert _str_counter(k, logging_output) == v, (
-                    f"Count for string '{k}' appeared "
-                    f"({_str_counter(k, logging_output)}) "
-                    f"instead of the expected {v}:\nOutput:\n{logging_output}"
-                )
-
-
-def test_evaluate_pasa_montgomery(temporary_basedir, montgomery_datadir):
-    # Temporarily modify Montgomery datadir if need be
-    new_value = {"datadir.montgomery": str(montgomery_datadir)}
-    with rc_context(**new_value):
-        from ptbench.scripts.evaluate import evaluate
-
-        runner = CliRunner()
-
-        with stdout_logging() as buf:
-            prediction_folder = str(temporary_basedir / "predictions")
-            output_folder = str(temporary_basedir / "evaluations")
-            result = runner.invoke(
-                evaluate,
-                [
-                    "-vv",
-                    "montgomery",
-                    f"--predictions-folder={prediction_folder}",
-                    f"--output-folder={output_folder}",
-                    "--threshold=train",
-                    "--steps=2000",
-                ],
+        assert os.path.exists(
+            os.path.join(output_folder, "model_final_epoch.pth")
+        )
+        assert os.path.exists(
+            os.path.join(output_folder, "model_lowest_valid_loss.pth")
+        )
+        assert os.path.exists(os.path.join(output_folder, "last_checkpoint"))
+        assert os.path.exists(os.path.join(output_folder, "constants.csv"))
+        assert os.path.exists(os.path.join(output_folder, "trainlog.csv"))
+        assert os.path.exists(os.path.join(output_folder, "model_summary.txt"))
+
+        keywords = {
+            r"^Found \(dedicated\) '__train__' set for training$": 1,
+            r"^Found \(dedicated\) '__valid__' set for validation$": 1,
+            r"^Continuing from epoch 0$": 1,
+            r"^Saving model summary at.*$": 1,
+            r"^Model has.*$": 1,
+            r"^Saving checkpoint": 2,
+            r"^Total training time:": 1,
+            r"^Z-normalization with mean": 1,
+        }
+        buf.seek(0)
+        logging_output = buf.read()
+
+        for k, v in keywords.items():
+            assert _str_counter(k, logging_output) == v, (
+                f"Count for string '{k}' appeared "
+                f"({_str_counter(k, logging_output)}) "
+                f"instead of the expected {v}:\nOutput:\n{logging_output}"
             )
-            _assert_exit_0(result)
 
-            # check evaluations are there
-            assert os.path.exists(os.path.join(output_folder, "test.csv"))
-            assert os.path.exists(os.path.join(output_folder, "train.csv"))
-            assert os.path.exists(
-                os.path.join(output_folder, "test_score_table.pdf")
+
+def test_predict_pasa_montgomery(temporary_basedir, datadir):
+    from ptbench.scripts.predict import predict
+
+    runner = CliRunner()
+
+    with stdout_logging() as buf:
+        output_folder = str(temporary_basedir / "predictions")
+        result = runner.invoke(
+            predict,
+            [
+                "pasa",
+                "montgomery",
+                "-vv",
+                "--batch-size=1",
+                "--relevance-analysis",
+                f"--weight={str(datadir / 'lfs' / 'models' / 'pasa.pth')}",
+                f"--output-folder={output_folder}",
+            ],
+        )
+        _assert_exit_0(result)
+
+        # check predictions are there
+        predictions_file1 = os.path.join(output_folder, "train/predictions.csv")
+        predictions_file2 = os.path.join(
+            output_folder, "validation/predictions.csv"
+        )
+        predictions_file3 = os.path.join(output_folder, "test/predictions.csv")
+        assert os.path.exists(predictions_file1)
+        assert os.path.exists(predictions_file2)
+        assert os.path.exists(predictions_file3)
+
+        keywords = {
+            r"^Loading checkpoint from.*$": 1,
+            r"^Total time:.*$": 3,
+            r"^Relevance analysis.*$": 3,
+        }
+        buf.seek(0)
+        logging_output = buf.read()
+
+        for k, v in keywords.items():
+            assert _str_counter(k, logging_output) == v, (
+                f"Count for string '{k}' appeared "
+                f"({_str_counter(k, logging_output)}) "
+                f"instead of the expected {v}:\nOutput:\n{logging_output}"
             )
-            assert os.path.exists(
-                os.path.join(output_folder, "train_score_table.pdf")
+
+
+def test_predtojson(datadir, temporary_basedir):
+    from ptbench.scripts.predtojson import predtojson
+
+    runner = CliRunner()
+
+    with stdout_logging() as buf:
+        predictions = str(datadir / "test_predictions.csv")
+        output_folder = str(temporary_basedir / "pred_to_json")
+        result = runner.invoke(
+            predtojson,
+            [
+                "-vv",
+                "train",
+                f"{predictions}",
+                "test",
+                f"{predictions}",
+                f"--output-folder={output_folder}",
+            ],
+        )
+        _assert_exit_0(result)
+
+        # check json file is there
+        assert os.path.exists(os.path.join(output_folder, "dataset.json"))
+
+        keywords = {
+            f"Output folder: {output_folder}": 1,
+            r"Saving JSON file...": 1,
+            r"^Loading predictions from.*$": 2,
+        }
+        buf.seek(0)
+        logging_output = buf.read()
+
+        for k, v in keywords.items():
+            assert _str_counter(k, logging_output) == v, (
+                f"Count for string '{k}' appeared "
+                f"({_str_counter(k, logging_output)}) "
+                f"instead of the expected {v}:\nOutput:\n{logging_output}"
             )
 
-            keywords = {
-                r"^Skipping dataset '__train__'": 1,
-                r"^Evaluating threshold on.*$": 1,
-                r"^Maximum F1-score of.*$": 4,
-                r"^Set --f1_threshold=.*$": 1,
-                r"^Set --eer_threshold=.*$": 1,
-            }
-            buf.seek(0)
-            logging_output = buf.read()
-
-            for k, v in keywords.items():
-                assert _str_counter(k, logging_output) == v, (
-                    f"Count for string '{k}' appeared "
-                    f"({_str_counter(k, logging_output)}) "
-                    f"instead of the expected {v}:\nOutput:\n{logging_output}"
-                )
-
-
-def test_compare_pasa_montgomery(temporary_basedir, montgomery_datadir):
-    # Temporarily modify Montgomery datadir if need be
-    new_value = {"datadir.montgomery": str(montgomery_datadir)}
-    with rc_context(**new_value):
-        from ptbench.scripts.compare import compare
-
-        runner = CliRunner()
-
-        with stdout_logging() as buf:
-            predictions_folder = str(temporary_basedir / "predictions")
-            output_folder = str(temporary_basedir / "comparisons")
-            result = runner.invoke(
-                compare,
-                [
-                    "-vv",
-                    "train",
-                    f"{predictions_folder}/train/predictions.csv",
-                    "test",
-                    f"{predictions_folder}/test/predictions.csv",
-                    f"--output-figure={output_folder}/compare.pdf",
-                    f"--output-table={output_folder}/table.txt",
-                    "--threshold=0.5",
-                ],
+
+def test_evaluate_pasa_montgomery(temporary_basedir):
+    from ptbench.scripts.evaluate import evaluate
+
+    runner = CliRunner()
+
+    with stdout_logging() as buf:
+        prediction_folder = str(temporary_basedir / "predictions")
+        output_folder = str(temporary_basedir / "evaluations")
+        result = runner.invoke(
+            evaluate,
+            [
+                "-vv",
+                "montgomery",
+                f"--predictions-folder={prediction_folder}",
+                f"--output-folder={output_folder}",
+                "--threshold=train",
+                "--steps=2000",
+            ],
+        )
+        _assert_exit_0(result)
+
+        # check evaluations are there
+        assert os.path.exists(os.path.join(output_folder, "test.csv"))
+        assert os.path.exists(os.path.join(output_folder, "train.csv"))
+        assert os.path.exists(
+            os.path.join(output_folder, "test_score_table.pdf")
+        )
+        assert os.path.exists(
+            os.path.join(output_folder, "train_score_table.pdf")
+        )
+
+        keywords = {
+            r"^Skipping dataset '__train__'": 1,
+            r"^Evaluating threshold on.*$": 1,
+            r"^Maximum F1-score of.*$": 4,
+            r"^Set --f1_threshold=.*$": 1,
+            r"^Set --eer_threshold=.*$": 1,
+        }
+        buf.seek(0)
+        logging_output = buf.read()
+
+        for k, v in keywords.items():
+            assert _str_counter(k, logging_output) == v, (
+                f"Count for string '{k}' appeared "
+                f"({_str_counter(k, logging_output)}) "
+                f"instead of the expected {v}:\nOutput:\n{logging_output}"
             )
-            _assert_exit_0(result)
 
-            # check comparisons are there
-            assert os.path.exists(os.path.join(output_folder, "compare.pdf"))
-            assert os.path.exists(os.path.join(output_folder, "table.txt"))
 
-            keywords = {
-                r"^Dataset '\*': threshold =.*$": 1,
-                r"^Loading predictions from.*$": 2,
-                r"^Tabulating performance summary...": 1,
-            }
-            buf.seek(0)
-            logging_output = buf.read()
+def test_compare_pasa_montgomery(temporary_basedir):
+    from ptbench.scripts.compare import compare
+
+    runner = CliRunner()
+
+    with stdout_logging() as buf:
+        predictions_folder = str(temporary_basedir / "predictions")
+        output_folder = str(temporary_basedir / "comparisons")
+        result = runner.invoke(
+            compare,
+            [
+                "-vv",
+                "train",
+                f"{predictions_folder}/train/predictions.csv",
+                "test",
+                f"{predictions_folder}/test/predictions.csv",
+                f"--output-figure={output_folder}/compare.pdf",
+                f"--output-table={output_folder}/table.txt",
+                "--threshold=0.5",
+            ],
+        )
+        _assert_exit_0(result)
+
+        # check comparisons are there
+        assert os.path.exists(os.path.join(output_folder, "compare.pdf"))
+        assert os.path.exists(os.path.join(output_folder, "table.txt"))
+
+        keywords = {
+            r"^Dataset '\*': threshold =.*$": 1,
+            r"^Loading predictions from.*$": 2,
+            r"^Tabulating performance summary...": 1,
+        }
+        buf.seek(0)
+        logging_output = buf.read()
 
-            for k, v in keywords.items():
-                assert _str_counter(k, logging_output) == v, (
-                    f"Count for string '{k}' appeared "
-                    f"({_str_counter(k, logging_output)}) "
-                    f"instead of the expected {v}:\nOutput:\n{logging_output}"
-                )
+        for k, v in keywords.items():
+            assert _str_counter(k, logging_output) == v, (
+                f"Count for string '{k}' appeared "
+                f"({_str_counter(k, logging_output)}) "
+                f"instead of the expected {v}:\nOutput:\n{logging_output}"
+            )
 
 
 def test_train_signstotb_montgomery_rs(temporary_basedir, datadir):
@@ -638,115 +594,108 @@ def test_predict_logreg_montgomery_rs(temporary_basedir, datadir):
             )
 
 
-def test_aggregpred(temporary_basedir, montgomery_datadir):
-    # Temporarily modify Montgomery datadir if need be
-    new_value = {"datadir.montgomery": str(montgomery_datadir)}
-    with rc_context(**new_value):
-        from ptbench.scripts.aggregpred import aggregpred
+def test_aggregpred(temporary_basedir):
+    from ptbench.scripts.aggregpred import aggregpred
 
-        runner = CliRunner()
+    runner = CliRunner()
 
-        with stdout_logging() as buf:
-            predictions = str(
-                temporary_basedir / "predictions" / "train" / "predictions.csv"
-            )
-            output_folder = str(temporary_basedir / "aggregpred")
-            result = runner.invoke(
-                aggregpred,
-                [
-                    "-vv",
-                    f"{predictions}",
-                    f"{predictions}",
-                    f"--output-folder={output_folder}",
-                ],
-            )
-            _assert_exit_0(result)
+    with stdout_logging() as buf:
+        predictions = str(
+            temporary_basedir / "predictions" / "train" / "predictions.csv"
+        )
+        output_folder = str(temporary_basedir / "aggregpred")
+        result = runner.invoke(
+            aggregpred,
+            [
+                "-vv",
+                f"{predictions}",
+                f"{predictions}",
+                f"--output-folder={output_folder}",
+            ],
+        )
+        _assert_exit_0(result)
 
-            # check csv file is there
-            assert os.path.exists(os.path.join(output_folder, "aggregpred.csv"))
+        # check csv file is there
+        assert os.path.exists(os.path.join(output_folder, "aggregpred.csv"))
 
-            keywords = {
-                f"Output folder: {output_folder}": 1,
-                r"Saving aggregated CSV file...": 1,
-                r"^Loading predictions from.*$": 2,
-            }
-            buf.seek(0)
-            logging_output = buf.read()
+        keywords = {
+            f"Output folder: {output_folder}": 1,
+            r"Saving aggregated CSV file...": 1,
+            r"^Loading predictions from.*$": 2,
+        }
+        buf.seek(0)
+        logging_output = buf.read()
 
-            for k, v in keywords.items():
-                assert _str_counter(k, logging_output) == v, (
-                    f"Count for string '{k}' appeared "
-                    f"({_str_counter(k, logging_output)}) "
-                    f"instead of the expected {v}:\nOutput:\n{logging_output}"
-                )
+        for k, v in keywords.items():
+            assert _str_counter(k, logging_output) == v, (
+                f"Count for string '{k}' appeared "
+                f"({_str_counter(k, logging_output)}) "
+                f"instead of the expected {v}:\nOutput:\n{logging_output}"
+            )
 
 
 # Not enough RAM available to do this test
-# def test_predict_densenetrs_montgomery(temporary_basedir, montgomery_datadir, datadir):
-
-#     # Temporarily modify Montgomery datadir if need be
-#     new_value = {"datadir.montgomery": str(montgomery_datadir)}
-#     with rc_context(**new_value):
-
-#         from ptbench.scripts.predict import predict
-
-#         runner = CliRunner()
-
-#         with stdout_logging() as buf:
-
-#             output_folder = str(temporary_basedir / "predictions")
-#             result = runner.invoke(
-#                 predict,
-#                 [
-#                     "densenet_rs",
-#                     "montgomery_f0_rgb",
-#                     "-vv",
-#                     "--batch-size=1",
-#                     f"--weight={str(datadir / 'lfs' / 'models' / 'densenetrs.pth')}",
-#                     f"--output-folder={output_folder}",
-#                     "--grad-cams"
-#                 ],
-#             )
-#             _assert_exit_0(result)
-
-#             # check predictions are there
-#             predictions_file1 = os.path.join(output_folder, "train/predictions.csv")
-#             predictions_file2 = os.path.join(output_folder, "validation/predictions.csv")
-#             predictions_file3 = os.path.join(output_folder, "test/predictions.csv")
-#             assert os.path.exists(predictions_file1)
-#             assert os.path.exists(predictions_file2)
-#             assert os.path.exists(predictions_file3)
-#             # check some grad cams are there
-#             cam1 = os.path.join(output_folder, "train/cams/MCUCXR_0002_0_cam.png")
-#             cam2 = os.path.join(output_folder, "train/cams/MCUCXR_0126_1_cam.png")
-#             cam3 = os.path.join(output_folder, "train/cams/MCUCXR_0275_1_cam.png")
-#             cam4 = os.path.join(output_folder, "validation/cams/MCUCXR_0399_1_cam.png")
-#             cam5 = os.path.join(output_folder, "validation/cams/MCUCXR_0113_1_cam.png")
-#             cam6 = os.path.join(output_folder, "validation/cams/MCUCXR_0013_0_cam.png")
-#             cam7 = os.path.join(output_folder, "test/cams/MCUCXR_0027_0_cam.png")
-#             cam8 = os.path.join(output_folder, "test/cams/MCUCXR_0094_0_cam.png")
-#             cam9 = os.path.join(output_folder, "test/cams/MCUCXR_0375_1_cam.png")
-#             assert os.path.exists(cam1)
-#             assert os.path.exists(cam2)
-#             assert os.path.exists(cam3)
-#             assert os.path.exists(cam4)
-#             assert os.path.exists(cam5)
-#             assert os.path.exists(cam6)
-#             assert os.path.exists(cam7)
-#             assert os.path.exists(cam8)
-#             assert os.path.exists(cam9)
-
-#             keywords = {
-#                 r"^Loading checkpoint from.*$": 1,
-#                 r"^Total time:.*$": 3,
-#                 r"^Grad cams folder:.*$": 3,
-#             }
-#             buf.seek(0)
-#             logging_output = buf.read()
-
-#             for k, v in keywords.items():
-#                 assert _str_counter(k, logging_output) == v, (
-#                     f"Count for string '{k}' appeared "
-#                     f"({_str_counter(k, logging_output)}) "
-#                     f"instead of the expected {v}:\nOutput:\n{logging_output}"
-#                 )
+# def test_predict_densenetrs_montgomery(temporary_basedir, datadir):
+
+#    from ptbench.scripts.predict import predict
+
+#    runner = CliRunner()
+
+#    with stdout_logging() as buf:
+
+#        output_folder = str(temporary_basedir / "predictions")
+#        result = runner.invoke(
+#            predict,
+#            [
+#                "densenet_rs",
+#                "montgomery_f0_rgb",
+#                "-vv",
+#                "--batch-size=1",
+#                f"--weight={str(datadir / 'lfs' / 'models' / 'densenetrs.pth')}",
+#                f"--output-folder={output_folder}",
+#                "--grad-cams"
+#            ],
+#        )
+#        _assert_exit_0(result)
+
+#        # check predictions are there
+#        predictions_file1 = os.path.join(output_folder, "train/predictions.csv")
+#        predictions_file2 = os.path.join(output_folder, "validation/predictions.csv")
+#        predictions_file3 = os.path.join(output_folder, "test/predictions.csv")
+#        assert os.path.exists(predictions_file1)
+#        assert os.path.exists(predictions_file2)
+#        assert os.path.exists(predictions_file3)
+#        # check some grad cams are there
+#        cam1 = os.path.join(output_folder, "train/cams/MCUCXR_0002_0_cam.png")
+#        cam2 = os.path.join(output_folder, "train/cams/MCUCXR_0126_1_cam.png")
+#        cam3 = os.path.join(output_folder, "train/cams/MCUCXR_0275_1_cam.png")
+#        cam4 = os.path.join(output_folder, "validation/cams/MCUCXR_0399_1_cam.png")
+#        cam5 = os.path.join(output_folder, "validation/cams/MCUCXR_0113_1_cam.png")
+#        cam6 = os.path.join(output_folder, "validation/cams/MCUCXR_0013_0_cam.png")
+#        cam7 = os.path.join(output_folder, "test/cams/MCUCXR_0027_0_cam.png")
+#        cam8 = os.path.join(output_folder, "test/cams/MCUCXR_0094_0_cam.png")
+#        cam9 = os.path.join(output_folder, "test/cams/MCUCXR_0375_1_cam.png")
+#        assert os.path.exists(cam1)
+#        assert os.path.exists(cam2)
+#        assert os.path.exists(cam3)
+#        assert os.path.exists(cam4)
+#        assert os.path.exists(cam5)
+#        assert os.path.exists(cam6)
+#        assert os.path.exists(cam7)
+#        assert os.path.exists(cam8)
+#        assert os.path.exists(cam9)
+
+#        keywords = {
+#            r"^Loading checkpoint from.*$": 1,
+#            r"^Total time:.*$": 3,
+#            r"^Grad cams folder:.*$": 3,
+#        }
+#        buf.seek(0)
+#        logging_output = buf.read()
+
+#        for k, v in keywords.items():
+#            assert _str_counter(k, logging_output) == v, (
+#                f"Count for string '{k}' appeared "
+#                f"({_str_counter(k, logging_output)}) "
+#                f"instead of the expected {v}:\nOutput:\n{logging_output}"
+#            )
diff --git a/tests/test_config.py b/tests/test_config.py
index 6493892b..91f52a5a 100644
--- a/tests/test_config.py
+++ b/tests/test_config.py
@@ -2,13 +2,8 @@
 #
 # SPDX-License-Identifier: GPL-3.0-or-later
 
-import contextlib
-import os
-import tempfile
-
 import numpy as np
 import pytest
-import tomli_w
 import torch
 
 from torch.utils.data import ConcatDataset
@@ -21,23 +16,6 @@ from ptbench.configs.datasets import get_positive_weights, get_samples_weights
 N = 10
 
 
-@contextlib.contextmanager
-def rc_context(**new_config):
-    with tempfile.TemporaryDirectory() as tmpdir:
-        config_filename = "ptbench.toml"
-        with open(os.path.join(tmpdir, config_filename), "wb") as f:
-            tomli_w.dump(new_config, f)
-            f.flush()
-        old_config_home = os.environ.get("XDG_CONFIG_HOME")
-        os.environ["XDG_CONFIG_HOME"] = tmpdir
-        yield
-        if old_config_home is None:
-            del os.environ["XDG_CONFIG_HOME"]
-        else:
-            os.environ["XDG_CONFIG_HOME"] = old_config_home
-
-
-@pytest.mark.skip_if_rc_var_not_set("datadir.montgomery")
 def test_montgomery():
     def _check_subset(samples, size):
         assert len(samples) == size
@@ -60,20 +38,15 @@ def test_montgomery():
     _check_subset(dataset["test"], 28)
 
 
-def test_get_samples_weights(montgomery_datadir):
-    # Temporarily modify Montgomery datadir
-    new_value = {"datadir.montgomery": str(montgomery_datadir)}
-    with rc_context(**new_value):
-        from ptbench.configs.datasets.montgomery.default import dataset
+def test_get_samples_weights():
+    from ptbench.configs.datasets.montgomery.default import dataset
 
-        train_samples_weights = get_samples_weights(
-            dataset["__train__"]
-        ).numpy()
+    train_samples_weights = get_samples_weights(dataset["__train__"]).numpy()
 
-        unique, counts = np.unique(train_samples_weights, return_counts=True)
+    unique, counts = np.unique(train_samples_weights, return_counts=True)
 
-        np.testing.assert_equal(counts, np.array([51, 37]))
-        np.testing.assert_equal(unique, np.array(1 / counts, dtype=np.float32))
+    np.testing.assert_equal(counts, np.array([51, 37]))
+    np.testing.assert_equal(unique, np.array(1 / counts, dtype=np.float32))
 
 
 @pytest.mark.skip_if_rc_var_not_set("datadir.nih_cxr14_re")
@@ -87,22 +60,17 @@ def test_get_samples_weights_multi():
     )
 
 
-def test_get_samples_weights_concat(montgomery_datadir):
-    # Temporarily modify Montgomery datadir
-    new_value = {"datadir.montgomery": str(montgomery_datadir)}
-    with rc_context(**new_value):
-        from ptbench.configs.datasets.montgomery.default import dataset
+def test_get_samples_weights_concat():
+    from ptbench.configs.datasets.montgomery.default import dataset
 
-        train_dataset = ConcatDataset(
-            (dataset["__train__"], dataset["__train__"])
-        )
+    train_dataset = ConcatDataset((dataset["__train__"], dataset["__train__"]))
 
-        train_samples_weights = get_samples_weights(train_dataset).numpy()
+    train_samples_weights = get_samples_weights(train_dataset).numpy()
 
-        unique, counts = np.unique(train_samples_weights, return_counts=True)
+    unique, counts = np.unique(train_samples_weights, return_counts=True)
 
-        np.testing.assert_equal(counts, np.array([102, 74]))
-        np.testing.assert_equal(unique, np.array(2 / counts, dtype=np.float32))
+    np.testing.assert_equal(counts, np.array([102, 74]))
+    np.testing.assert_equal(unique, np.array(2 / counts, dtype=np.float32))
 
 
 @pytest.mark.skip_if_rc_var_not_set("datadir.nih_cxr14_re")
@@ -127,19 +95,14 @@ def test_get_samples_weights_multi_concat():
     np.testing.assert_equal(train_samples_weights, ref_samples_weights)
 
 
-def test_get_positive_weights(montgomery_datadir):
-    # Temporarily modify Montgomery datadir
-    new_value = {"datadir.montgomery": str(montgomery_datadir)}
-    with rc_context(**new_value):
-        from ptbench.configs.datasets.montgomery.default import dataset
+def test_get_positive_weights():
+    from ptbench.configs.datasets.montgomery.default import dataset
 
-        train_positive_weights = get_positive_weights(
-            dataset["__train__"]
-        ).numpy()
+    train_positive_weights = get_positive_weights(dataset["__train__"]).numpy()
 
-        np.testing.assert_equal(
-            train_positive_weights, np.array([51.0 / 37.0], dtype=np.float32)
-        )
+    np.testing.assert_equal(
+        train_positive_weights, np.array([51.0 / 37.0], dtype=np.float32)
+    )
 
 
 @pytest.mark.skip_if_rc_var_not_set("datadir.nih_cxr14_re")
@@ -204,21 +167,16 @@ def test_get_positive_weights_multi():
     )
 
 
-def test_get_positive_weights_concat(montgomery_datadir):
-    # Temporarily modify Montgomery datadir
-    new_value = {"datadir.montgomery": str(montgomery_datadir)}
-    with rc_context(**new_value):
-        from ptbench.configs.datasets.montgomery.default import dataset
+def test_get_positive_weights_concat():
+    from ptbench.configs.datasets.montgomery.default import dataset
 
-        train_dataset = ConcatDataset(
-            (dataset["__train__"], dataset["__train__"])
-        )
+    train_dataset = ConcatDataset((dataset["__train__"], dataset["__train__"]))
 
-        train_positive_weights = get_positive_weights(train_dataset).numpy()
+    train_positive_weights = get_positive_weights(train_dataset).numpy()
 
-        np.testing.assert_equal(
-            train_positive_weights, np.array([51.0 / 37.0], dtype=np.float32)
-        )
+    np.testing.assert_equal(
+        train_positive_weights, np.array([51.0 / 37.0], dtype=np.float32)
+    )
 
 
 @pytest.mark.skip_if_rc_var_not_set("datadir.nih_cxr14_re")
-- 
GitLab