[engine.significance] Document auxiliar functions

e48694a4 · André Anjos · f3527300 · e48694a4
Commit e48694a4 authored 4 years ago by André Anjos
--- a/bob/ip/binseg/engine/significance.py
+++ b/bob/ip/binseg/engine/significance.py
@@ -219,7 +219,39 @@ def _patch_measures(pred, gt, threshold, size, stride):
 def _visual_dataset_performance(stem, img, n, avg, std, outdir):
-    """Runs a visual performance assessment for each entry in a given dataset"""
+    """Runs a visual performance assessment for each entry in a given dataset
+    Parameters
+    ----------
+    stem : str
+        The input file stem, for which a figure will be saved in ``outdir``,
+        in PDF format
+    img : pytorch.Tensor
+        A 3D tensor containing the original image that was analyzed
+    n : numpy.ndarray
+        A 2D integer array with the same size as `img` that indicates how many
+        overlapping windows are available for each pixel in the image
+    avg : numpy.ndarray
+        A 2D floating-point array with the average performance per pixel
+        calculated over all overlapping windows for that particular pixel
+    std : numpy.ndarray
+        A 2D floating-point array with the unbiased standard-deviation
+        (``ddof=1``) performance per pixel calculated over all overlapping
+        windows for that particular pixel
+    outdir : str
+        The base directory where to save output PDF images generated by this
+        procedure.  The value of ``stem`` will be suffixed to this output
+        directory using a standard path join.  The output filename will have a
+        ``.pdf`` extension.
+    """
    import matplotlib.pyplot as plt
@@ -255,10 +287,70 @@ def _visual_dataset_performance(stem, img, n, avg, std, outdir):
 def _patch_performances_for_sample(
-    basedir, threshold, size, stride, dataset, k, figure, outdir=None,
+    basedir, threshold, size, stride, dataset, k, figure, outdir,
 ):
    """
    Evaluates patch performances per sample
+    Parameters
+    ----------
+    basedir : str
+        folder where predictions for the dataset images has been previously
+        stored
+    threshold : :py:class:`float`
+        this should be a threshold (floating point) to apply to prediction maps
+        to decide on positives and negatives.
+    size : tuple
+        size (vertical, horizontal) for windows for which we will calculate
+        partial performances based on the threshold and existing ground-truth
+    stride : tuple
+        strides (vertical, horizontal) for windows for which we will calculate
+        partial performances based on the threshold and existing ground-truth
+    dataset : :py:class:`dict` of py:class:`torch.utils.data.Dataset`
+        datasets to iterate on
+    k : int
+        the sample number (order inside the dataset, starting from zero), to
+        calculate patch performances for
+    figure : str
+        the performance figure to use for calculating patch micro performances
+        (e.g. `f1_score` or `jaccard`).  Must be available on the produced
+        performance dataframe.
+    outdir : :py:class:`str`
+        path were to save a visual representation of patch performances.  If
+        set to ``None``, then do not save those to disk.
+    Returns
+    -------
+    stem : str
+        The input file stem, that was just analyzed
+    data : dict
+        A dictionary containing the following fields:
+        * ``df``: a :py:class:`pandas.DataFrame` with the patch performance
+          figures in raster scan order.
+        * ``n``: a 2D integer :py:class:`numpy.ndarray` with the same size as
+          the original image pertaining to the analyzed sample, that indicates
+          how many overlapping windows are available for each pixel in the
+          image
+        * ``avg``: a 2D floating-point :py:class:`numpy.ndarray` with the
+          average performance per pixel calculated over all overlapping windows
+          for that particular pixel
+        * ``std``: a 2D floating-point :py:class:`numpy.ndarray` with the
+          unbiased standard-deviation (``ddof=1``) performance per pixel
+          calculated over all overlapping windows for that particular pixel
    """
    sample = dataset[k]
@@ -357,13 +449,13 @@ def patch_performances(
    """
-    # Collect overall measures
    use_predictions_folder = os.path.join(predictions_folder, name)
    if not os.path.exists(use_predictions_folder):
        use_predictions_folder = predictions_folder
    with tqdm(range(len(dataset[name])), desc="patch-perf") as pbar:
+        # we avoid the multiprocessing module if nproc==1
+        # so it is easier to run ipdb
        if nproc != 1:
            if nproc <= 0:
                nproc = multiprocessing.cpu_count()
@@ -406,11 +498,71 @@ def patch_performances(
    return dict(data)
-def _visual_performances_for_sample(
+def _visual_performances_for_sample(size, stride, dataset, k, df, figure, outdir):
-    size, stride, dataset, k, df, figure, outdir=None
-):
    """
    Displays patch performances per sample
+    This is a simplified version of :py:func:`_patch_performances_for_sample`
+    in which the patch performances are not recalculated and used as input.  It
+    can be used in case you have the patch performances stored in disk or if
+    you're evaluating differences between patches of 2 different systems.
+    Parameters
+    ----------
+    size : tuple
+        size (vertical, horizontal) for windows for which we will calculate
+        partial performances based on the threshold and existing ground-truth
+    stride : tuple
+        strides (vertical, horizontal) for windows for which we will calculate
+        partial performances based on the threshold and existing ground-truth
+    dataset : :py:class:`dict` of py:class:`torch.utils.data.Dataset`
+        datasets to iterate on
+    k : int
+        the sample number (order inside the dataset, starting from zero), to
+        calculate patch performances for
+    df : pandas.DataFrame
+        the previously calculated dataframe to use for this patch performance
+        assessment.
+    figure : str
+        the performance figure to use for calculating patch micro performances
+        (e.g. `f1_score` or `jaccard`).  Must be available on the produced
+        performance dataframe.
+    outdir : :py:class:`str`
+        path were to save a visual representation of patch performances.  If
+        set to ``None``, then do not save those to disk.
+    Returns
+    -------
+    stem : str
+        The input file stem, that was just analyzed
+    data : dict
+        A dictionary containing the following fields:
+        * ``df``: a :py:class:`pandas.DataFrame` with the patch performance
+          figures in raster scan order.  Notice this is just a copy of the
+          input data frame with the same name.
+        * ``n``: a 2D integer :py:class:`numpy.ndarray` with the same size as
+          the original image pertaining to the analyzed sample, that indicates
+          how many overlapping windows are available for each pixel in the
+          image
+        * ``avg``: a 2D floating-point :py:class:`numpy.ndarray` with the
+          average performance per pixel calculated over all overlapping windows
+          for that particular pixel
+        * ``std``: a 2D floating-point :py:class:`numpy.ndarray` with the
+          unbiased standard-deviation (``ddof=1``) performance per pixel
+          calculated over all overlapping windows for that particular pixel
    """
    sample = dataset[k]
@@ -428,6 +580,11 @@ def visual_performances(
    """
    Displays the performances for multiple image patches, for a whole dataset
+    This is a simplified version of :py:func:`patch_performances` in which the
+    patch performances are not recalculated and used as input.  It can be used
+    in case you have the patch performances stored in disk or if you're
+    evaluating differences between patches of 2 different systems.
    Parameters
    ---------
@@ -497,6 +654,8 @@ def visual_performances(
    stems = list(dataset[name].keys())
    with tqdm(range(len(dataset[name])), desc="visual-perf") as pbar:
+        # we avoid the multiprocessing module if nproc==1
+        # so it is easier to run ipdb
        if nproc != 1:
            if nproc <= 0:
                nproc = multiprocessing.cpu_count()