diff --git a/tests/test_cli.py b/tests/test_cli.py
index 1b5ec0e45ac9be636b1e88cca08e2c01a7a54c16..d7f2c93302254167ce68da830c755c05caf54e40 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -154,7 +154,6 @@ def test_evaluate_help():
     _check_help(evaluate)
 
 
-@pytest.mark.skip(reason="Test need to be updated")
 @pytest.mark.skip_if_rc_var_not_set("datadir.montgomery")
 def test_train_pasa_montgomery(temporary_basedir):
     from ptbench.scripts.train import train
@@ -191,7 +190,7 @@ def test_train_pasa_montgomery(temporary_basedir):
             )
             == 1
         )
-        assert os.path.exists(os.path.join(output_folder, "model_summary.txt"))
+        assert os.path.exists(os.path.join(output_folder, "model-summary.txt"))
 
         keywords = {
             r"^Writing command-line for reproduction at .*$": 1,
@@ -216,7 +215,6 @@ def test_train_pasa_montgomery(temporary_basedir):
             )
 
 
-@pytest.mark.skip(reason="Test need to be updated")
 @pytest.mark.skip_if_rc_var_not_set("datadir.montgomery")
 def test_train_pasa_montgomery_from_checkpoint(temporary_basedir):
     from ptbench.scripts.train import train
@@ -251,7 +249,7 @@ def test_train_pasa_montgomery_from_checkpoint(temporary_basedir):
         == 1
     )
 
-    assert os.path.exists(os.path.join(output_folder, "model_summary.txt"))
+    assert os.path.exists(os.path.join(output_folder, "model-summary.txt"))
 
     with stdout_logging() as buf:
         result = runner.invoke(
@@ -284,7 +282,7 @@ def test_train_pasa_montgomery_from_checkpoint(temporary_basedir):
             == 2
         )
 
-        assert os.path.exists(os.path.join(output_folder, "model_summary.txt"))
+        assert os.path.exists(os.path.join(output_folder, "model-summary.txt"))
 
         keywords = {
             r"^Writing command-line for reproduction at .*$": 1,
@@ -310,7 +308,7 @@ def test_train_pasa_montgomery_from_checkpoint(temporary_basedir):
             )
 
 
-@pytest.mark.skip(reason="Test need to be updated")
+# @pytest.mark.skip(reason="Test need to be updated")
 @pytest.mark.skip_if_rc_var_not_set("datadir.montgomery")
 def test_predict_pasa_montgomery(temporary_basedir, datadir):
     from ptbench.scripts.predict import predict
@@ -318,7 +316,7 @@ def test_predict_pasa_montgomery(temporary_basedir, datadir):
     runner = CliRunner()
 
     with stdout_logging() as buf:
-        output_folder = str(temporary_basedir / "predictions")
+        output = str(temporary_basedir / "predictions")
         result = runner.invoke(
             predict,
             [
@@ -326,29 +324,24 @@ def test_predict_pasa_montgomery(temporary_basedir, datadir):
                 "montgomery",
                 "-vv",
                 "--batch-size=1",
-                f"--weight={str(datadir / 'lfs' / 'models' / 'pasa.ckpt')}",
-                f"--output-folder={output_folder}",
+                f"--weight={str(temporary_basedir / 'results' / 'model_final_epoch.ckpt')}",
+                f"--output={output}",
             ],
         )
         _assert_exit_0(result)
 
-        # check predictions are there
-        train_predictions_file = os.path.join(output_folder, "train.csv")
-        validation_predictions_file = os.path.join(
-            output_folder, "validation.csv"
-        )
-        test_predictions_file = os.path.join(output_folder, "test.csv")
-
-        assert os.path.exists(train_predictions_file)
-        assert os.path.exists(validation_predictions_file)
-        assert os.path.exists(test_predictions_file)
+        assert os.path.exists(output)
 
         keywords = {
+            r"^Loading dataset: * without caching. Trade-off: CPU RAM: less | Disk: more$": 3,
+            r"^Loading checkpoint from .*$": 1,
             r"^Restoring normalizer from checkpoint.$": 1,
-            r"^Output folder: .*$": 1,
-            r"^Loading dataset: * without caching. Trade-off: CPU RAM: less | Disk: more": 3,
-            r"^Saving predictions in .*$": 3,
+            r"^Running prediction on `train` split...$": 1,
+            r"^Running prediction on `validation` split...$": 1,
+            r"^Running prediction on `test` split...$": 1,
+            r"^Predictions saved to .*$": 1
         }
+
         buf.seek(0)
         logging_output = buf.read()
 
@@ -360,7 +353,7 @@ def test_predict_pasa_montgomery(temporary_basedir, datadir):
             )
 
 
-@pytest.mark.skip(reason="Test need to be updated")
+# @pytest.mark.skip(reason="Test need to be updated")
 @pytest.mark.skip_if_rc_var_not_set("datadir.montgomery")
 def test_evaluate_pasa_montgomery(temporary_basedir):
     from ptbench.scripts.evaluate import evaluate
@@ -375,223 +368,23 @@ def test_evaluate_pasa_montgomery(temporary_basedir):
             [
                 "-vv",
                 "montgomery",
-                f"--predictions-folder={prediction_folder}",
+                f"--predictions={prediction_folder}",
                 f"--output-folder={output_folder}",
-                "--threshold=test",
-                "--steps=2000",
+                "--threshold=test"
             ],
         )
         _assert_exit_0(result)
 
-        assert os.path.exists(os.path.join(output_folder, "scores.pdf"))
         assert os.path.exists(os.path.join(output_folder, "plots.pdf"))
-        assert os.path.exists(os.path.join(output_folder, "table.txt"))
-
-        keywords = {
-            r"^Evaluating threshold on.*$": 1,
-            r"^Maximum F1-score of.*$": 4,
-            r"^Set --f1_threshold=.*$": 1,
-            r"^Set --eer_threshold=.*$": 1,
-        }
-        buf.seek(0)
-        logging_output = buf.read()
-
-        for k, v in keywords.items():
-            assert _str_counter(k, logging_output) == v, (
-                f"Count for string '{k}' appeared "
-                f"({_str_counter(k, logging_output)}) "
-                f"instead of the expected {v}:\nOutput:\n{logging_output}"
-            )
-
-
-@pytest.mark.skip(reason="Test need to be updated")
-@pytest.mark.skip_if_rc_var_not_set("datadir.montgomery")
-def test_train_mlp_montgomery_rs(temporary_basedir, datadir):
-    from ptbench.scripts.train import train
-
-    runner = CliRunner()
-
-    with stdout_logging() as buf:
-        output_folder = str(temporary_basedir / "results/mlp")
-        result = runner.invoke(
-            train,
-            [
-                "mlp",
-                "montgomery_rs",
-                "-vv",
-                "--epochs=1",
-                "--batch-size=1",
-                f"--output-folder={output_folder}",
-            ],
-        )
-        _assert_exit_0(result)
-
-        assert os.path.exists(
-            os.path.join(output_folder, "model_final_epoch.ckpt")
-        )
-        assert os.path.exists(
-            os.path.join(output_folder, "model_lowest_valid_loss.ckpt")
-        )
-        assert os.path.exists(os.path.join(output_folder, "constants.csv"))
-        assert os.path.exists(
-            os.path.join(output_folder, "logs_csv", "version_0", "metrics.csv")
-        )
-        assert os.path.exists(
-            os.path.join(output_folder, "logs_tensorboard", "version_0")
-        )
-        assert os.path.exists(os.path.join(output_folder, "model_summary.txt"))
-
-        keywords = {
-            r"^Found \(dedicated\) '__train__' set for training$": 1,
-            r"^Found \(dedicated\) '__valid__' set for validation$": 1,
-            r"^Continuing from epoch 0$": 1,
-            r"^Saving model summary at.*$": 1,
-        }
-        buf.seek(0)
-        logging_output = buf.read()
-
-        for k, v in keywords.items():
-            assert _str_counter(k, logging_output) == v, (
-                f"Count for string '{k}' appeared "
-                f"({_str_counter(k, logging_output)}) "
-                f"instead of the expected {v}:\nOutput:\n{logging_output}"
-            )
-
-
-@pytest.mark.skip(reason="Test need to be updated")
-@pytest.mark.skip_if_rc_var_not_set("datadir.montgomery")
-def test_predict_mlp_montgomery_rs(temporary_basedir, datadir):
-    from ptbench.scripts.predict import predict
-
-    runner = CliRunner()
-
-    with stdout_logging() as buf:
-        output_folder = str(temporary_basedir / "predictions")
-        result = runner.invoke(
-            predict,
-            [
-                "mlp",
-                "montgomery_rs",
-                "-vv",
-                "--batch-size=1",
-                "--relevance-analysis",
-                f"--weight={str(datadir / 'lfs' / 'models' / 'mlp.ckpt')}",
-                f"--output-folder={output_folder}",
-            ],
-        )
-        _assert_exit_0(result)
-
-        # check predictions are there
-        predictions_file = os.path.join(output_folder, "train/predictions.csv")
-        RA1 = os.path.join(output_folder, "train_RA.pdf")
-        RA2 = os.path.join(output_folder, "validation_RA.pdf")
-        RA3 = os.path.join(output_folder, "test_RA.pdf")
-        assert os.path.exists(predictions_file)
-        assert os.path.exists(RA1)
-        assert os.path.exists(RA2)
-        assert os.path.exists(RA3)
-
-        keywords = {
-            r"^Loading checkpoint from.*$": 1,
-            r"^Starting relevance analysis for subset.*$": 3,
-            r"^Creating and saving plot at.*$": 3,
-        }
-        buf.seek(0)
-        logging_output = buf.read()
-
-        for k, v in keywords.items():
-            assert _str_counter(k, logging_output) == v, (
-                f"Count for string '{k}' appeared "
-                f"({_str_counter(k, logging_output)}) "
-                f"instead of the expected {v}:\nOutput:\n{logging_output}"
-            )
-
-
-@pytest.mark.skip(reason="Test need to be updated")
-@pytest.mark.skip_if_rc_var_not_set("datadir.montgomery")
-def test_train_logreg_montgomery_rs(temporary_basedir, datadir):
-    from ptbench.scripts.train import train
-
-    runner = CliRunner()
-
-    with stdout_logging() as buf:
-        output_folder = str(temporary_basedir / "results/logreg")
-        result = runner.invoke(
-            train,
-            [
-                "logistic_regression",
-                "montgomery_rs",
-                "-vv",
-                "--epochs=1",
-                "--batch-size=1",
-                f"--output-folder={output_folder}",
-            ],
-        )
-        _assert_exit_0(result)
-
-        assert os.path.exists(
-            os.path.join(output_folder, "model_final_epoch.ckpt")
-        )
-        assert os.path.exists(
-            os.path.join(output_folder, "model_lowest_valid_loss.ckpt")
-        )
-        assert os.path.exists(os.path.join(output_folder, "constants.csv"))
-        assert os.path.exists(
-            os.path.join(output_folder, "logs_csv", "version_0", "metrics.csv")
-        )
-        assert os.path.exists(
-            os.path.join(output_folder, "logs_tensorboard", "version_0")
-        )
-        assert os.path.exists(os.path.join(output_folder, "model_summary.txt"))
-
-        keywords = {
-            r"^Found \(dedicated\) '__train__' set for training$": 1,
-            r"^Found \(dedicated\) '__valid__' set for validation$": 1,
-            r"^Continuing from epoch 0$": 1,
-            r"^Saving model summary at.*$": 1,
-        }
-        buf.seek(0)
-        logging_output = buf.read()
-
-        for k, v in keywords.items():
-            assert _str_counter(k, logging_output) == v, (
-                f"Count for string '{k}' appeared "
-                f"({_str_counter(k, logging_output)}) "
-                f"instead of the expected {v}:\nOutput:\n{logging_output}"
-            )
-
-
-@pytest.mark.skip(reason="Test need to be updated")
-@pytest.mark.skip_if_rc_var_not_set("datadir.montgomery")
-def test_predict_logreg_montgomery_rs(temporary_basedir, datadir):
-    from ptbench.scripts.predict import predict
-
-    runner = CliRunner()
-
-    with stdout_logging() as buf:
-        output_folder = str(temporary_basedir / "predictions")
-        result = runner.invoke(
-            predict,
-            [
-                "logistic_regression",
-                "montgomery_rs",
-                "-vv",
-                "--batch-size=1",
-                f"--weight={str(datadir / 'lfs' / 'models' / 'logreg.ckpt')}",
-                f"--output-folder={output_folder}",
-            ],
-        )
-        _assert_exit_0(result)
-
-        # check predictions are there
-        predictions_file = os.path.join(output_folder, "train/predictions.csv")
-        wfile = os.path.join(output_folder, "LogReg_Weights.pdf")
-        assert os.path.exists(predictions_file)
-        assert os.path.exists(wfile)
+        assert os.path.exists(os.path.join(output_folder, "summary.rst"))
 
         keywords = {
-            r"^Loading checkpoint from.*$": 1,
-            r"^Logistic regression identified: saving model weights.*$": 1,
+            r"^Setting --threshold=.*$": 1,
+            r"^Analyzing split `train`...$": 1,
+            r"^Analyzing split `validation`...$": 1,
+            r"^Analyzing split `test`...$": 1,
+            r"^Saving measures at .*$": 1,
+            r"^Saving figures at .*$": 1
         }
         buf.seek(0)
         logging_output = buf.read()