[script,engine,test] Allows the user to specify the number of steps for...

[script,engine,test] Allows the user to specify the number of steps for threshold/performance analysis

[script,engine,test] Allows the user to specify the number of steps for...
8c6ed30d · André Anjos · 14e39fb4 · 8c6ed30d · 8c6ed30d · 8c6ed30d
Commit 8c6ed30d authored 4 years ago by André Anjos
--- a/bob/ip/binseg/engine/evaluator.py
+++ b/bob/ip/binseg/engine/evaluator.py
@@ -49,7 +49,7 @@ def _posneg(pred, gt, threshold):
    return tp_tensor, fp_tensor, tn_tensor, fn_tensor


-def _sample_measures(pred, gt, bins):
+def _sample_measures(pred, gt, steps):
    """
    Calculates measures on one single sample and saves it to disk

@@ -63,9 +63,9 @@ def _sample_measures(pred, gt, bins):
    gt : torch.Tensor
        ground-truth (annotations)

-    bins : int
-        number of bins to use for threshold analysis.  The step size is
-        calculated from this by dividing ``1.0/bins``.
+    steps : int
+        number of steps to use for threshold analysis.  The step size is
+        calculated from this by dividing ``1.0/steps``.


    Returns
@@ -85,7 +85,7 @@ def _sample_measures(pred, gt, bins):

    """

-    step_size = 1.0 / bins
+    step_size = 1.0 / steps
    data = []

    for index, threshold in enumerate(numpy.arange(0.0, 1.0, step_size)):
@@ -219,6 +219,7 @@ def run(
    output_folder=None,
    overlayed_folder=None,
    threshold=None,
+    steps=1000,
 ):
    """
    Runs inference and calculates measures
@@ -254,6 +255,9 @@ def run(
        may bias your analysis.  This number is also used to print the a priori
        F1-score on the evaluated set.

+    steps : :py:class:`float`, Optional
+        number of threshold steps to consider when evaluating thresholds.
+

    Returns
    -------
@@ -264,7 +268,6 @@ def run(
    """

    # Collect overall measures
-    bins = 1000  # number of thresholds to analyse for
    data = {}

    for sample in tqdm(dataset):
@@ -279,7 +282,7 @@ def run(
            raise RuntimeError(
                f"{stem} entry already exists in data. Cannot overwrite."
            )
-        data[stem] = _sample_measures(pred, gt, bins)
+        data[stem] = _sample_measures(pred, gt, steps)

        if overlayed_folder is not None:
            overlay_image = _sample_analysis(
@@ -325,7 +328,7 @@ def run(
    if threshold is not None:

        # get the closest possible threshold we have
-        index = int(round(bins * threshold))
+        index = int(round(steps * threshold))
        f1_a_priori = avg_measures["f1_score"][index]
        actual_threshold = avg_measures["threshold"][index]


--- a/bob/ip/binseg/script/analyze.py
+++ b/bob/ip/binseg/script/analyze.py
@@ -117,6 +117,16 @@ logger = logging.getLogger(__name__)
    required=True,
    cls=ResourceOption,
 )
+@click.option(
+    "--steps",
+    "-S",
+    help="This number is used to define the number of threshold steps to "
+    "consider when evaluating the highest possible F1-score on test data.",
+    default=1000,
+    show_default=True,
+    required=True,
+    cls=ResourceOption,
+)
 @verbosity_option(cls=ResourceOption)
 @click.pass_context
 def analyze(
@@ -129,6 +139,7 @@ def analyze(
    device,
    overlayed,
    weight,
+    steps,
    verbose,
    **kwargs,
 ):
@@ -230,6 +241,7 @@ def analyze(
        second_annotator=second_annotator,
        overlayed=overlayed_folder,
        threshold=threshold,
+        steps=steps,
        verbose=verbose,
    )


--- a/bob/ip/binseg/script/evaluate.py
+++ b/bob/ip/binseg/script/evaluate.py
@@ -137,6 +137,16 @@ def _validate_threshold(t, dataset):
    required=False,
    cls=ResourceOption,
 )
+@click.option(
+    "--steps",
+    "-S",
+    help="This number is used to define the number of threshold steps to "
+    "consider when evaluating the highest possible F1-score on test data.",
+    default=1000,
+    show_default=True,
+    required=True,
+    cls=ResourceOption,
+)
 @verbosity_option(cls=ResourceOption)
 def evaluate(
    output_folder,
@@ -145,6 +155,7 @@ def evaluate(
    second_annotator,
    overlayed,
    threshold,
+    steps,
    **kwargs,
 ):
    """Evaluates an FCN on a binary segmentation task.
@@ -164,7 +175,8 @@ def evaluate(
    if isinstance(threshold, str):
        # first run evaluation for reference dataset, do not save overlays
        logger.info(f"Evaluating threshold on '{threshold}' set")
-        threshold = run(dataset[threshold], threshold, predictions_folder)
+        threshold = run(dataset[threshold], threshold, predictions_folder,
+                steps=steps)
        logger.info(f"Set --threshold={threshold:.5f}")

    # now run with the
@@ -173,7 +185,8 @@ def evaluate(
            logger.info(f"Skipping dataset '{k}' (not to be evaluated)")
            continue
        logger.info(f"Analyzing '{k}' set...")
-        run(v, k, predictions_folder, output_folder, overlayed, threshold)
+        run(v, k, predictions_folder, output_folder, overlayed, threshold,
+                steps=steps)
        second = second_annotator.get(k)
        if second is not None:
            compare_annotators(v, second, k, output_folder, overlayed)
--- a/bob/ip/binseg/script/experiment.py
+++ b/bob/ip/binseg/script/experiment.py
@@ -205,6 +205,16 @@ logger = logging.getLogger(__name__)
    required=False,
    cls=ResourceOption,
 )
+@click.option(
+    "--steps",
+    "-S",
+    help="This number is used to define the number of threshold steps to "
+    "consider when evaluating the highest possible F1-score on test data.",
+    default=1000,
+    show_default=True,
+    required=True,
+    cls=ResourceOption,
+)
 @verbosity_option(cls=ResourceOption)
 @click.pass_context
 def experiment(
@@ -226,6 +236,7 @@ def experiment(
    ssl,
    rampup,
    overlayed,
+    steps,
    verbose,
    **kwargs,
 ):
@@ -323,5 +334,6 @@ def experiment(
            device=device,
            overlayed=overlayed,
            weight=model_file,
+            steps=steps,
            verbose=verbose,
            )
--- a/bob/ip/binseg/test/test_cli.py
+++ b/bob/ip/binseg/test/test_cli.py
@@ -93,6 +93,7 @@ def _check_experiment_stare(overlay):
                "-vv",
                "--epochs=1",
                "--batch-size=1",
+                "--steps=10",
                f"--output-folder={output_folder}",
                ]
        if overlay:
@@ -354,6 +355,7 @@ def _check_evaluate(runner):
            [
                config.name,
                "-vv",
+                "--steps=10",
                f"--output-folder={output_folder}",
                "--predictions-folder=predictions",
                f"--overlayed={overlay_folder}",