[test.test_cli] Use more features from pytest for log capturing

6a8695c4 · André Anjos · 121fcaf6 · 6a8695c4
Commit 6a8695c4 authored 4 years ago by André Anjos
--- a/bob/ip/binseg/test/test_cli.py
+++ b/bob/ip/binseg/test/test_cli.py
@@ -6,8 +6,8 @@
 import os
 import re
 import fnmatch
+import logging
 import tempfile
-import contextlib

 from click.testing import CliRunner

@@ -16,24 +16,6 @@ from . import mock_dataset
 stare_datadir, stare_dataset = mock_dataset()


-@contextlib.contextmanager
-def stdout_logging():
-
-    ## copy logging messages to std out
-    import sys
-    import logging
-    import io
-
-    buf = io.StringIO()
-    ch = logging.StreamHandler(buf)
-    ch.setFormatter(logging.Formatter("%(message)s"))
-    ch.setLevel(logging.INFO)
-    logger = logging.getLogger("bob")
-    logger.addHandler(ch)
-    yield buf
-    logger.removeHandler(ch)
-
-
 def _assert_exit_0(result):

    assert (
@@ -65,14 +47,17 @@ def _str_counter(substr, s):
    return sum(1 for _ in re.finditer(substr, s, re.MULTILINE))


-def _check_experiment_stare(overlay):
+def _check_experiment_stare(caplog, overlay):

    from ..script.experiment import experiment

+    # ensures we capture only ERROR messages and above by default
+    caplog.set_level(logging.ERROR)
+
    runner = CliRunner()
-    with runner.isolated_filesystem(), stdout_logging() as buf, tempfile.NamedTemporaryFile(
-        mode="wt"
-    ) as config:
+    with runner.isolated_filesystem(), caplog.at_level(
+        logging.INFO, logger="bob.ip.binseg"
+    ), tempfile.NamedTemporaryFile(mode="wt") as config:

        # re-write STARE dataset configuration for test
        config.write("from bob.ip.binseg.data.stare import _make_dataset\n")
@@ -154,16 +139,18 @@ def _check_experiment_stare(overlay):
            os.path.join(eval_folder, "second-annotator", "train.csv")
        )
        # checks individual performance figures are there
-        traindir_sa = os.path.join(eval_folder, "second-annotator", "train",
-                "stare-images")
+        traindir_sa = os.path.join(
+            eval_folder, "second-annotator", "train", "stare-images"
+        )
        assert os.path.exists(traindir_sa)
        assert len(fnmatch.filter(os.listdir(traindir_sa), "*.csv")) == 10

        assert os.path.exists(
            os.path.join(eval_folder, "second-annotator", "test.csv")
        )
-        testdir_sa = os.path.join(eval_folder, "second-annotator", "test",
-                "stare-images")
+        testdir_sa = os.path.join(
+            eval_folder, "second-annotator", "test", "stare-images"
+        )
        assert os.path.exists(testdir_sa)
        assert len(fnmatch.filter(os.listdir(testdir_sa), "*.csv")) == 10

@@ -226,35 +213,30 @@ def _check_experiment_stare(overlay):
            r"^Saving table at": 1,
            r"^Ended comparison.*$": 1,
        }
-        buf.seek(0)
-        logging_output = buf.read()
+        messages = "\n".join([k.getMessage() for k in caplog.records])
        for k, v in keywords.items():
-            # if _str_counter(k, logging_output) != v:
-            #    print(f"Count for string '{k}' appeared " \
-            #        f"({_str_counter(k, result.output)}) " \
-            #        f"instead of the expected {v}")
-            assert _str_counter(k, logging_output) == v, (
+            assert _str_counter(k, messages) == v, (
                f"Count for string '{k}' appeared "
                f"({_str_counter(k, logging_output)}) "
                f"instead of the expected {v}"
            )


-def test_experiment_stare_with_overlay():
-    _check_experiment_stare(overlay=True)
+def test_experiment_stare_with_overlay(caplog):
+    _check_experiment_stare(caplog, overlay=True)


-def test_experiment_stare_without_overlay():
-    _check_experiment_stare(overlay=False)
+def test_experiment_stare_without_overlay(caplog):
+    _check_experiment_stare(caplog, overlay=False)


-def _check_train(runner):
+def _check_train(caplog, runner):

    from ..script.train import train

-    with tempfile.NamedTemporaryFile(
-        mode="wt"
-    ) as config, stdout_logging() as buf:
+    with tempfile.NamedTemporaryFile(mode="wt") as config, caplog.at_level(
+        logging.INFO, logger="bob.ip.binseg"
+    ):

        # single training set configuration
        config.write("from bob.ip.binseg.data.stare import _make_dataset\n")
@@ -298,28 +280,23 @@ def _check_train(runner):
            r"^Saving checkpoint to .*/model_final.pth$": 1,
            r"^Total training time:": 1,
        }
-        buf.seek(0)
-        logging_output = buf.read()

+        messages = "\n".join([k.getMessage() for k in caplog.records])
        for k, v in keywords.items():
-            # if _str_counter(k, logging_output) != v:
-            #    print(f"Count for string '{k}' appeared " \
-            #        f"({_str_counter(k, result.output)}) " \
-            #        f"instead of the expected {v}")
-            assert _str_counter(k, logging_output) == v, (
+            assert _str_counter(k, messages) == v, (
                f"Count for string '{k}' appeared "
                f"({_str_counter(k, logging_output)}) "
-                f"instead of the expected {v}:\nOutput:\n{logging_output}"
+                f"instead of the expected {v}"
            )


-def _check_predict(runner):
+def _check_predict(caplog, runner):

    from ..script.predict import predict

-    with tempfile.NamedTemporaryFile(
-        mode="wt"
-    ) as config, stdout_logging() as buf:
+    with tempfile.NamedTemporaryFile(mode="wt") as config, caplog.at_level(
+        logging.INFO, logger="bob.ip.binseg"
+    ):

        # single training set configuration
        config.write("from bob.ip.binseg.data.stare import _make_dataset\n")
@@ -360,28 +337,23 @@ def _check_predict(runner):
            r"^Loading checkpoint from.*$": 1,
            r"^Total time:.*$": 1,
        }
-        buf.seek(0)
-        logging_output = buf.read()

+        messages = "\n".join([k.getMessage() for k in caplog.records])
        for k, v in keywords.items():
-            # if _str_counter(k, logging_output) != v:
-            #    print(f"Count for string '{k}' appeared " \
-            #        f"({_str_counter(k, result.output)}) " \
-            #        f"instead of the expected {v}")
-            assert _str_counter(k, logging_output) == v, (
+            assert _str_counter(k, messages) == v, (
                f"Count for string '{k}' appeared "
                f"({_str_counter(k, logging_output)}) "
-                f"instead of the expected {v}:\nOutput:\n{logging_output}"
+                f"instead of the expected {v}"
            )


-def _check_evaluate(runner):
+def _check_evaluate(caplog, runner):

    from ..script.evaluate import evaluate

-    with tempfile.NamedTemporaryFile(
-        mode="wt"
-    ) as config, stdout_logging() as buf:
+    with tempfile.NamedTemporaryFile(mode="wt") as config, caplog.at_level(
+        logging.INFO, logger="bob.ip.binseg"
+    ):

        # single training set configuration
        config.write("from bob.ip.binseg.data.stare import _make_dataset\n")
@@ -418,8 +390,9 @@ def _check_evaluate(runner):
            os.path.join(output_folder, "second-annotator", "test.csv")
        )
        # checks individual performance figures are there
-        testdir_sa = os.path.join(output_folder, "second-annotator", "test",
-                "stare-images")
+        testdir_sa = os.path.join(
+            output_folder, "second-annotator", "test", "stare-images"
+        )
        assert os.path.exists(testdir_sa)
        assert len(fnmatch.filter(os.listdir(testdir_sa), "*.csv")) == 10

@@ -428,33 +401,20 @@ def _check_evaluate(runner):
        assert os.path.exists(basedir)
        assert len(fnmatch.filter(os.listdir(basedir), "*.png")) == 10

-        keywords = {
-            r"^Skipping dataset '__train__'": 0,
-            r"^Saving summaries over all input images.*$": 1,
-            r"^Maximum F1-score of.*\(chosen \*a posteriori\*\)$": 1,
-            r"^F1-score of.*\(chosen \*a priori\*\)$": 1,
-            r"^F1-score of.*\(second annotator; threshold=0.5\)$": 1,
-        }
-        buf.seek(0)
-        logging_output = buf.read()
-
+        messages = "\n".join([k.getMessage() for k in caplog.records])
        for k, v in keywords.items():
-            # if _str_counter(k, logging_output) != v:
-            #    print(f"Count for string '{k}' appeared " \
-            #        f"({_str_counter(k, result.output)}) " \
-            #        f"instead of the expected {v}")
-            assert _str_counter(k, logging_output) == v, (
+            assert _str_counter(k, messages) == v, (
                f"Count for string '{k}' appeared "
                f"({_str_counter(k, logging_output)}) "
-                f"instead of the expected {v}:\nOutput:\n{logging_output}"
+                f"instead of the expected {v}"
            )


-def _check_compare(runner):
+def _check_compare(caplog, runner):

    from ..script.compare import compare

-    with stdout_logging() as buf:
+    with caplog.at_level(logging.INFO, logger="bob.ip.binseg"):

        output_folder = "evaluations"
        result = runner.invoke(
@@ -481,28 +441,22 @@ def _check_compare(runner):
            r"^Tabulating performance summary...": 1,
            r"^Saving table at": 1,
        }
-        buf.seek(0)
-        logging_output = buf.read()
-
+        messages = "\n".join([k.getMessage() for k in caplog.records])
        for k, v in keywords.items():
-            # if _str_counter(k, logging_output) != v:
-            #    print(f"Count for string '{k}' appeared " \
-            #        f"({_str_counter(k, result.output)}) " \
-            #        f"instead of the expected {v}")
-            assert _str_counter(k, logging_output) == v, (
+            assert _str_counter(k, messages) == v, (
                f"Count for string '{k}' appeared "
                f"({_str_counter(k, logging_output)}) "
-                f"instead of the expected {v}:\nOutput:\n{logging_output}"
+                f"instead of the expected {v}"
            )


-def _check_significance(runner):
+def _check_significance(caplog, runner):

    from ..script.significance import significance

-    with tempfile.NamedTemporaryFile(
-        mode="wt"
-    ) as config, stdout_logging() as buf:
+    with tempfile.NamedTemporaryFile(mode="wt") as config, caplog.at_level(
+        logging.INFO, logger="bob.ip.binseg"
+    ):

        config.write("from bob.ip.binseg.data.stare import _make_dataset\n")
        config.write(f"_raw = _make_dataset('{stare_datadir}')\n")
@@ -520,11 +474,15 @@ def _check_significance(runner):
            [
                "-vv",
                config.name,
-                "--names=v1", "v2",
-                "--predictions=predictions", "predictions",
+                "--names=v1",
+                "v2",
+                "--predictions=predictions",
+                "predictions",
                "--threshold=0.5",
-                "--size=64", "64",
-                "--stride=32", "32",
+                "--size=64",
+                "64",
+                "--stride=32",
+                "32",
                "--figure=accuracy",
                f"--output-folder={ofolder}",
                f"--checkpoint-folder={cfolder}",
@@ -540,35 +498,32 @@ def _check_significance(runner):
        keywords = {
            r"^Evaluating sliding window 'accuracy' on": 2,
            r"^Evaluating sliding window 'accuracy' differences on": 1,
-            #r"^Basic statistics from distributions:$": 1,
+            # r"^Basic statistics from distributions:$": 1,
            r"^Writing analysis figures": 1,
            r"^Writing analysis summary": 1,
            r"^Differences are exactly zero": 2,
        }
-        buf.seek(0)
-        logging_output = buf.read()
-
+        messages = "\n".join([k.getMessage() for k in caplog.records])
        for k, v in keywords.items():
-            # if _str_counter(k, logging_output) != v:
-            #    print(f"Count for string '{k}' appeared " \
-            #        f"({_str_counter(k, result.output)}) " \
-            #        f"instead of the expected {v}")
-            assert _str_counter(k, logging_output) == v, (
+            assert _str_counter(k, messages) == v, (
                f"Count for string '{k}' appeared "
                f"({_str_counter(k, logging_output)}) "
-                f"instead of the expected {v}:\nOutput:\n{logging_output}"
+                f"instead of the expected {v}"
            )


-def test_discrete_experiment_stare():
+def test_discrete_experiment_stare(caplog):
+
+    # ensures we capture only ERROR messages and above by default
+    caplog.set_level(logging.ERROR)

    runner = CliRunner()
    with runner.isolated_filesystem():
-        _check_train(runner)
-        _check_predict(runner)
-        _check_evaluate(runner)
-        _check_compare(runner)
-        #_check_significance(runner)
+        _check_train(caplog, runner)
+        _check_predict(caplog, runner)
+        _check_evaluate(caplog, runner)
+        _check_compare(caplog, runner)
+        # _check_significance(caplog, runner)


 def test_train_help():