diff --git a/bob/ip/binseg/engine/evaluator.py b/bob/ip/binseg/engine/evaluator.py index 1163584894f1b95c64363ff5d4675a5ea39fce85..eab56b66290bf8c5a92ce01cdb10595de9d0e47e 100644 --- a/bob/ip/binseg/engine/evaluator.py +++ b/bob/ip/binseg/engine/evaluator.py @@ -49,7 +49,7 @@ def _posneg(pred, gt, threshold): return tp_tensor, fp_tensor, tn_tensor, fn_tensor -def _sample_measures(pred, gt, bins): +def _sample_measures(pred, gt, steps): """ Calculates measures on one single sample and saves it to disk @@ -63,9 +63,9 @@ def _sample_measures(pred, gt, bins): gt : torch.Tensor ground-truth (annotations) - bins : int - number of bins to use for threshold analysis. The step size is - calculated from this by dividing ``1.0/bins``. + steps : int + number of steps to use for threshold analysis. The step size is + calculated from this by dividing ``1.0/steps``. Returns @@ -85,7 +85,7 @@ def _sample_measures(pred, gt, bins): """ - step_size = 1.0 / bins + step_size = 1.0 / steps data = [] for index, threshold in enumerate(numpy.arange(0.0, 1.0, step_size)): @@ -219,6 +219,7 @@ def run( output_folder=None, overlayed_folder=None, threshold=None, + steps=1000, ): """ Runs inference and calculates measures @@ -254,6 +255,9 @@ def run( may bias your analysis. This number is also used to print the a priori F1-score on the evaluated set. + steps : :py:class:`float`, Optional + number of threshold steps to consider when evaluating thresholds. + Returns ------- @@ -264,7 +268,6 @@ def run( """ # Collect overall measures - bins = 1000 # number of thresholds to analyse for data = {} for sample in tqdm(dataset): @@ -279,7 +282,7 @@ def run( raise RuntimeError( f"{stem} entry already exists in data. Cannot overwrite." ) - data[stem] = _sample_measures(pred, gt, bins) + data[stem] = _sample_measures(pred, gt, steps) if overlayed_folder is not None: overlay_image = _sample_analysis( @@ -325,7 +328,7 @@ def run( if threshold is not None: # get the closest possible threshold we have - index = int(round(bins * threshold)) + index = int(round(steps * threshold)) f1_a_priori = avg_measures["f1_score"][index] actual_threshold = avg_measures["threshold"][index] diff --git a/bob/ip/binseg/script/analyze.py b/bob/ip/binseg/script/analyze.py index 8a7e502139a47ae71292c173f211c3a92ac973f9..8ce4fde614efcf0e93679e3b409794ef5a07c24c 100644 --- a/bob/ip/binseg/script/analyze.py +++ b/bob/ip/binseg/script/analyze.py @@ -117,6 +117,16 @@ logger = logging.getLogger(__name__) required=True, cls=ResourceOption, ) +@click.option( + "--steps", + "-S", + help="This number is used to define the number of threshold steps to " + "consider when evaluating the highest possible F1-score on test data.", + default=1000, + show_default=True, + required=True, + cls=ResourceOption, +) @verbosity_option(cls=ResourceOption) @click.pass_context def analyze( @@ -129,6 +139,7 @@ def analyze( device, overlayed, weight, + steps, verbose, **kwargs, ): @@ -230,6 +241,7 @@ def analyze( second_annotator=second_annotator, overlayed=overlayed_folder, threshold=threshold, + steps=steps, verbose=verbose, ) diff --git a/bob/ip/binseg/script/evaluate.py b/bob/ip/binseg/script/evaluate.py index 8a4b33d1a7a44991bbc827dc25f83f127fd47875..2e558671d1d7bd4919f37f3a63d160387c1988f2 100644 --- a/bob/ip/binseg/script/evaluate.py +++ b/bob/ip/binseg/script/evaluate.py @@ -137,6 +137,16 @@ def _validate_threshold(t, dataset): required=False, cls=ResourceOption, ) +@click.option( + "--steps", + "-S", + help="This number is used to define the number of threshold steps to " + "consider when evaluating the highest possible F1-score on test data.", + default=1000, + show_default=True, + required=True, + cls=ResourceOption, +) @verbosity_option(cls=ResourceOption) def evaluate( output_folder, @@ -145,6 +155,7 @@ def evaluate( second_annotator, overlayed, threshold, + steps, **kwargs, ): """Evaluates an FCN on a binary segmentation task. @@ -164,7 +175,8 @@ def evaluate( if isinstance(threshold, str): # first run evaluation for reference dataset, do not save overlays logger.info(f"Evaluating threshold on '{threshold}' set") - threshold = run(dataset[threshold], threshold, predictions_folder) + threshold = run(dataset[threshold], threshold, predictions_folder, + steps=steps) logger.info(f"Set --threshold={threshold:.5f}") # now run with the @@ -173,7 +185,8 @@ def evaluate( logger.info(f"Skipping dataset '{k}' (not to be evaluated)") continue logger.info(f"Analyzing '{k}' set...") - run(v, k, predictions_folder, output_folder, overlayed, threshold) + run(v, k, predictions_folder, output_folder, overlayed, threshold, + steps=steps) second = second_annotator.get(k) if second is not None: compare_annotators(v, second, k, output_folder, overlayed) diff --git a/bob/ip/binseg/script/experiment.py b/bob/ip/binseg/script/experiment.py index 050910c38c29745382de7c7d9e310db427f7f9ea..9afa6f5bf0b9096e726042f9778cb35e04c417a6 100644 --- a/bob/ip/binseg/script/experiment.py +++ b/bob/ip/binseg/script/experiment.py @@ -205,6 +205,16 @@ logger = logging.getLogger(__name__) required=False, cls=ResourceOption, ) +@click.option( + "--steps", + "-S", + help="This number is used to define the number of threshold steps to " + "consider when evaluating the highest possible F1-score on test data.", + default=1000, + show_default=True, + required=True, + cls=ResourceOption, +) @verbosity_option(cls=ResourceOption) @click.pass_context def experiment( @@ -226,6 +236,7 @@ def experiment( ssl, rampup, overlayed, + steps, verbose, **kwargs, ): @@ -323,5 +334,6 @@ def experiment( device=device, overlayed=overlayed, weight=model_file, + steps=steps, verbose=verbose, ) diff --git a/bob/ip/binseg/test/test_cli.py b/bob/ip/binseg/test/test_cli.py index 74187b1ca1842dca8d0043a85a14a25cfd5d0ed1..3d3f728f1f9d07c694f2c4cc5149593fbbed95a5 100644 --- a/bob/ip/binseg/test/test_cli.py +++ b/bob/ip/binseg/test/test_cli.py @@ -93,6 +93,7 @@ def _check_experiment_stare(overlay): "-vv", "--epochs=1", "--batch-size=1", + "--steps=10", f"--output-folder={output_folder}", ] if overlay: @@ -354,6 +355,7 @@ def _check_evaluate(runner): [ config.name, "-vv", + "--steps=10", f"--output-folder={output_folder}", "--predictions-folder=predictions", f"--overlayed={overlay_folder}",