diff --git a/bob/ip/binseg/engine/significance.py b/bob/ip/binseg/engine/significance.py
index 484236b230f044117e3ee10b69ad0f93f998037b..1bc7b9601629a7efc2c853e961f220d4e032ae8a 100644
--- a/bob/ip/binseg/engine/significance.py
+++ b/bob/ip/binseg/engine/significance.py
@@ -219,7 +219,39 @@ def _patch_measures(pred, gt, threshold, size, stride):
 
 
 def _visual_dataset_performance(stem, img, n, avg, std, outdir):
-    """Runs a visual performance assessment for each entry in a given dataset"""
+    """Runs a visual performance assessment for each entry in a given dataset
+
+
+    Parameters
+    ----------
+
+    stem : str
+        The input file stem, for which a figure will be saved in ``outdir``,
+        in PDF format
+
+    img : pytorch.Tensor
+        A 3D tensor containing the original image that was analyzed
+
+    n : numpy.ndarray
+        A 2D integer array with the same size as `img` that indicates how many
+        overlapping windows are available for each pixel in the image
+
+    avg : numpy.ndarray
+        A 2D floating-point array with the average performance per pixel
+        calculated over all overlapping windows for that particular pixel
+
+    std : numpy.ndarray
+        A 2D floating-point array with the unbiased standard-deviation
+        (``ddof=1``) performance per pixel calculated over all overlapping
+        windows for that particular pixel
+
+    outdir : str
+        The base directory where to save output PDF images generated by this
+        procedure.  The value of ``stem`` will be suffixed to this output
+        directory using a standard path join.  The output filename will have a
+        ``.pdf`` extension.
+
+    """
 
     import matplotlib.pyplot as plt
 
@@ -255,10 +287,70 @@ def _visual_dataset_performance(stem, img, n, avg, std, outdir):
 
 
 def _patch_performances_for_sample(
-    basedir, threshold, size, stride, dataset, k, figure, outdir=None,
+    basedir, threshold, size, stride, dataset, k, figure, outdir,
 ):
     """
     Evaluates patch performances per sample
+
+
+    Parameters
+    ----------
+
+    basedir : str
+        folder where predictions for the dataset images has been previously
+        stored
+
+    threshold : :py:class:`float`
+        this should be a threshold (floating point) to apply to prediction maps
+        to decide on positives and negatives.
+
+    size : tuple
+        size (vertical, horizontal) for windows for which we will calculate
+        partial performances based on the threshold and existing ground-truth
+
+    stride : tuple
+        strides (vertical, horizontal) for windows for which we will calculate
+        partial performances based on the threshold and existing ground-truth
+
+    dataset : :py:class:`dict` of py:class:`torch.utils.data.Dataset`
+        datasets to iterate on
+
+    k : int
+        the sample number (order inside the dataset, starting from zero), to
+        calculate patch performances for
+
+    figure : str
+        the performance figure to use for calculating patch micro performances
+        (e.g. `f1_score` or `jaccard`).  Must be available on the produced
+        performance dataframe.
+
+    outdir : :py:class:`str`
+        path were to save a visual representation of patch performances.  If
+        set to ``None``, then do not save those to disk.
+
+
+    Returns
+    -------
+
+    stem : str
+        The input file stem, that was just analyzed
+
+    data : dict
+        A dictionary containing the following fields:
+
+        * ``df``: a :py:class:`pandas.DataFrame` with the patch performance
+          figures in raster scan order.
+        * ``n``: a 2D integer :py:class:`numpy.ndarray` with the same size as
+          the original image pertaining to the analyzed sample, that indicates
+          how many overlapping windows are available for each pixel in the
+          image
+        * ``avg``: a 2D floating-point :py:class:`numpy.ndarray` with the
+          average performance per pixel calculated over all overlapping windows
+          for that particular pixel
+        * ``std``: a 2D floating-point :py:class:`numpy.ndarray` with the
+          unbiased standard-deviation (``ddof=1``) performance per pixel
+          calculated over all overlapping windows for that particular pixel
+
     """
 
     sample = dataset[k]
@@ -357,13 +449,13 @@ def patch_performances(
 
     """
 
-    # Collect overall measures
-
     use_predictions_folder = os.path.join(predictions_folder, name)
     if not os.path.exists(use_predictions_folder):
         use_predictions_folder = predictions_folder
 
     with tqdm(range(len(dataset[name])), desc="patch-perf") as pbar:
+        # we avoid the multiprocessing module if nproc==1
+        # so it is easier to run ipdb
         if nproc != 1:
             if nproc <= 0:
                 nproc = multiprocessing.cpu_count()
@@ -406,11 +498,71 @@ def patch_performances(
     return dict(data)
 
 
-def _visual_performances_for_sample(
-    size, stride, dataset, k, df, figure, outdir=None
-):
+def _visual_performances_for_sample(size, stride, dataset, k, df, figure, outdir):
     """
     Displays patch performances per sample
+
+    This is a simplified version of :py:func:`_patch_performances_for_sample`
+    in which the patch performances are not recalculated and used as input.  It
+    can be used in case you have the patch performances stored in disk or if
+    you're evaluating differences between patches of 2 different systems.
+
+
+    Parameters
+    ----------
+
+    size : tuple
+        size (vertical, horizontal) for windows for which we will calculate
+        partial performances based on the threshold and existing ground-truth
+
+    stride : tuple
+        strides (vertical, horizontal) for windows for which we will calculate
+        partial performances based on the threshold and existing ground-truth
+
+    dataset : :py:class:`dict` of py:class:`torch.utils.data.Dataset`
+        datasets to iterate on
+
+    k : int
+        the sample number (order inside the dataset, starting from zero), to
+        calculate patch performances for
+
+    df : pandas.DataFrame
+        the previously calculated dataframe to use for this patch performance
+        assessment.
+
+    figure : str
+        the performance figure to use for calculating patch micro performances
+        (e.g. `f1_score` or `jaccard`).  Must be available on the produced
+        performance dataframe.
+
+    outdir : :py:class:`str`
+        path were to save a visual representation of patch performances.  If
+        set to ``None``, then do not save those to disk.
+
+
+    Returns
+    -------
+
+    stem : str
+        The input file stem, that was just analyzed
+
+    data : dict
+        A dictionary containing the following fields:
+
+        * ``df``: a :py:class:`pandas.DataFrame` with the patch performance
+          figures in raster scan order.  Notice this is just a copy of the
+          input data frame with the same name.
+        * ``n``: a 2D integer :py:class:`numpy.ndarray` with the same size as
+          the original image pertaining to the analyzed sample, that indicates
+          how many overlapping windows are available for each pixel in the
+          image
+        * ``avg``: a 2D floating-point :py:class:`numpy.ndarray` with the
+          average performance per pixel calculated over all overlapping windows
+          for that particular pixel
+        * ``std``: a 2D floating-point :py:class:`numpy.ndarray` with the
+          unbiased standard-deviation (``ddof=1``) performance per pixel
+          calculated over all overlapping windows for that particular pixel
+
     """
 
     sample = dataset[k]
@@ -428,6 +580,11 @@ def visual_performances(
     """
     Displays the performances for multiple image patches, for a whole dataset
 
+    This is a simplified version of :py:func:`patch_performances` in which the
+    patch performances are not recalculated and used as input.  It can be used
+    in case you have the patch performances stored in disk or if you're
+    evaluating differences between patches of 2 different systems.
+
 
     Parameters
     ---------
@@ -497,6 +654,8 @@ def visual_performances(
     stems = list(dataset[name].keys())
 
     with tqdm(range(len(dataset[name])), desc="visual-perf") as pbar:
+        # we avoid the multiprocessing module if nproc==1
+        # so it is easier to run ipdb
         if nproc != 1:
             if nproc <= 0:
                 nproc = multiprocessing.cpu_count()