diff --git a/bob/ip/binseg/engine/significance.py b/bob/ip/binseg/engine/significance.py index 484236b230f044117e3ee10b69ad0f93f998037b..1bc7b9601629a7efc2c853e961f220d4e032ae8a 100644 --- a/bob/ip/binseg/engine/significance.py +++ b/bob/ip/binseg/engine/significance.py @@ -219,7 +219,39 @@ def _patch_measures(pred, gt, threshold, size, stride): def _visual_dataset_performance(stem, img, n, avg, std, outdir): - """Runs a visual performance assessment for each entry in a given dataset""" + """Runs a visual performance assessment for each entry in a given dataset + + + Parameters + ---------- + + stem : str + The input file stem, for which a figure will be saved in ``outdir``, + in PDF format + + img : pytorch.Tensor + A 3D tensor containing the original image that was analyzed + + n : numpy.ndarray + A 2D integer array with the same size as `img` that indicates how many + overlapping windows are available for each pixel in the image + + avg : numpy.ndarray + A 2D floating-point array with the average performance per pixel + calculated over all overlapping windows for that particular pixel + + std : numpy.ndarray + A 2D floating-point array with the unbiased standard-deviation + (``ddof=1``) performance per pixel calculated over all overlapping + windows for that particular pixel + + outdir : str + The base directory where to save output PDF images generated by this + procedure. The value of ``stem`` will be suffixed to this output + directory using a standard path join. The output filename will have a + ``.pdf`` extension. + + """ import matplotlib.pyplot as plt @@ -255,10 +287,70 @@ def _visual_dataset_performance(stem, img, n, avg, std, outdir): def _patch_performances_for_sample( - basedir, threshold, size, stride, dataset, k, figure, outdir=None, + basedir, threshold, size, stride, dataset, k, figure, outdir, ): """ Evaluates patch performances per sample + + + Parameters + ---------- + + basedir : str + folder where predictions for the dataset images has been previously + stored + + threshold : :py:class:`float` + this should be a threshold (floating point) to apply to prediction maps + to decide on positives and negatives. + + size : tuple + size (vertical, horizontal) for windows for which we will calculate + partial performances based on the threshold and existing ground-truth + + stride : tuple + strides (vertical, horizontal) for windows for which we will calculate + partial performances based on the threshold and existing ground-truth + + dataset : :py:class:`dict` of py:class:`torch.utils.data.Dataset` + datasets to iterate on + + k : int + the sample number (order inside the dataset, starting from zero), to + calculate patch performances for + + figure : str + the performance figure to use for calculating patch micro performances + (e.g. `f1_score` or `jaccard`). Must be available on the produced + performance dataframe. + + outdir : :py:class:`str` + path were to save a visual representation of patch performances. If + set to ``None``, then do not save those to disk. + + + Returns + ------- + + stem : str + The input file stem, that was just analyzed + + data : dict + A dictionary containing the following fields: + + * ``df``: a :py:class:`pandas.DataFrame` with the patch performance + figures in raster scan order. + * ``n``: a 2D integer :py:class:`numpy.ndarray` with the same size as + the original image pertaining to the analyzed sample, that indicates + how many overlapping windows are available for each pixel in the + image + * ``avg``: a 2D floating-point :py:class:`numpy.ndarray` with the + average performance per pixel calculated over all overlapping windows + for that particular pixel + * ``std``: a 2D floating-point :py:class:`numpy.ndarray` with the + unbiased standard-deviation (``ddof=1``) performance per pixel + calculated over all overlapping windows for that particular pixel + """ sample = dataset[k] @@ -357,13 +449,13 @@ def patch_performances( """ - # Collect overall measures - use_predictions_folder = os.path.join(predictions_folder, name) if not os.path.exists(use_predictions_folder): use_predictions_folder = predictions_folder with tqdm(range(len(dataset[name])), desc="patch-perf") as pbar: + # we avoid the multiprocessing module if nproc==1 + # so it is easier to run ipdb if nproc != 1: if nproc <= 0: nproc = multiprocessing.cpu_count() @@ -406,11 +498,71 @@ def patch_performances( return dict(data) -def _visual_performances_for_sample( - size, stride, dataset, k, df, figure, outdir=None -): +def _visual_performances_for_sample(size, stride, dataset, k, df, figure, outdir): """ Displays patch performances per sample + + This is a simplified version of :py:func:`_patch_performances_for_sample` + in which the patch performances are not recalculated and used as input. It + can be used in case you have the patch performances stored in disk or if + you're evaluating differences between patches of 2 different systems. + + + Parameters + ---------- + + size : tuple + size (vertical, horizontal) for windows for which we will calculate + partial performances based on the threshold and existing ground-truth + + stride : tuple + strides (vertical, horizontal) for windows for which we will calculate + partial performances based on the threshold and existing ground-truth + + dataset : :py:class:`dict` of py:class:`torch.utils.data.Dataset` + datasets to iterate on + + k : int + the sample number (order inside the dataset, starting from zero), to + calculate patch performances for + + df : pandas.DataFrame + the previously calculated dataframe to use for this patch performance + assessment. + + figure : str + the performance figure to use for calculating patch micro performances + (e.g. `f1_score` or `jaccard`). Must be available on the produced + performance dataframe. + + outdir : :py:class:`str` + path were to save a visual representation of patch performances. If + set to ``None``, then do not save those to disk. + + + Returns + ------- + + stem : str + The input file stem, that was just analyzed + + data : dict + A dictionary containing the following fields: + + * ``df``: a :py:class:`pandas.DataFrame` with the patch performance + figures in raster scan order. Notice this is just a copy of the + input data frame with the same name. + * ``n``: a 2D integer :py:class:`numpy.ndarray` with the same size as + the original image pertaining to the analyzed sample, that indicates + how many overlapping windows are available for each pixel in the + image + * ``avg``: a 2D floating-point :py:class:`numpy.ndarray` with the + average performance per pixel calculated over all overlapping windows + for that particular pixel + * ``std``: a 2D floating-point :py:class:`numpy.ndarray` with the + unbiased standard-deviation (``ddof=1``) performance per pixel + calculated over all overlapping windows for that particular pixel + """ sample = dataset[k] @@ -428,6 +580,11 @@ def visual_performances( """ Displays the performances for multiple image patches, for a whole dataset + This is a simplified version of :py:func:`patch_performances` in which the + patch performances are not recalculated and used as input. It can be used + in case you have the patch performances stored in disk or if you're + evaluating differences between patches of 2 different systems. + Parameters --------- @@ -497,6 +654,8 @@ def visual_performances( stems = list(dataset[name].keys()) with tqdm(range(len(dataset[name])), desc="visual-perf") as pbar: + # we avoid the multiprocessing module if nproc==1 + # so it is easier to run ipdb if nproc != 1: if nproc <= 0: nproc = multiprocessing.cpu_count()