From 1f9e0c39265ce042037b320c5b87fe6786cccdd6 Mon Sep 17 00:00:00 2001 From: Andre Anjos <andre.dos.anjos@gmail.com> Date: Thu, 23 Jul 2020 16:39:34 +0200 Subject: [PATCH] [engine.significance] patch -> sliding window nomenclature; Remove pandas requirement --- bob/ip/binseg/engine/significance.py | 336 ++++++++++++------------ bob/ip/binseg/script/significance.py | 230 ++++++++-------- bob/ip/binseg/test/test_significance.py | 315 +++++++++++++--------- 3 files changed, 472 insertions(+), 409 deletions(-) diff --git a/bob/ip/binseg/engine/significance.py b/bob/ip/binseg/engine/significance.py index 6bbed5c5..a699cb5b 100644 --- a/bob/ip/binseg/engine/significance.py +++ b/bob/ip/binseg/engine/significance.py @@ -2,7 +2,6 @@ # coding=utf-8 import os -import itertools import textwrap import multiprocessing @@ -12,7 +11,6 @@ logger = logging.getLogger(__name__) import h5py from tqdm import tqdm import numpy -import pandas import torch.nn import scipy.stats import tabulate @@ -20,13 +18,24 @@ import tabulate from .evaluator import _sample_measures_for_threshold -def _performance_summary(size, patch_perf, patch_size, patch_stride, figure): +PERFORMANCE_FIGURES = [ + "precision", + "recall", + "specificity", + "accuracy", + "jaccard", + "f1_score", + ] +"""List of performance figures supported by this module, in order""" + + +def _performance_summary(size, winperf, winsize, winstride, figure): """Generates an array that represents the performance per pixel of the original image The returned array corresponds to a stacked version of performances for - each pixel in the original image taking into consideration the patch - performances, their size and stride. + each pixel in the original image taking into consideration the sliding + window performances, their size and stride. Parameters @@ -36,17 +45,20 @@ def _performance_summary(size, patch_perf, patch_size, patch_stride, figure): A two tuple with the original height and width of the image being analyzed - patch_perf : typing.Sequence - An ordered sequence of patch performances (in raster direction - every - row, from left to right and then rows from top to bottom). + winperf : numpy.ndarray + A 3D array with shape ``(N, H, W)``, where ``N`` represents the number + of performance measures supported by this module, ``(H,W)`` is the + total number of vertical and horizontal sliding windows. - patch_size : tuple - A two tuple that indicates the size of each patch (height, width) + winsize : tuple + A two tuple that indicates the size of the sliding window (height, + width) - patch_stride: tuple - A two tuple that indicates the stride of each patch (height, width) + winstride : tuple + A two tuple that indicates the stride of the sliding window (height, + width) - figure: str + figure : str Name of the performance figure to use for the summary @@ -59,14 +71,13 @@ def _performance_summary(size, patch_perf, patch_size, patch_stride, figure): avg : numpy.ndarray A 2D numpy array containing the average performances for every pixel on - the input image considering the patch sizes and strides applied when - windowing the image + the input image considering the sliding window sizes and strides + applied to the image std : numpy.ndarray A 2D numpy array containing the (unbiased) standard deviations for the provided performance figure, for every pixel on the input image - considering the patch sizes and strides applied when windowing the - image + considering the sliding window sizes and strides applied to the image """ @@ -77,33 +88,30 @@ def _performance_summary(size, patch_perf, patch_size, patch_stride, figure): # torch unfolding works exactly. The last windows on the left and bottom # parts of the image may be extended with zeros. final_size = list(size) - rem = (size[0] - patch_size[0]) % patch_stride[0] + rem = (size[0] - winsize[0]) % winstride[0] if rem != 0: - final_size[0] += patch_stride[0] - rem - rem = (size[1] - patch_size[1]) % patch_stride[1] + final_size[0] += winstride[0] - rem + rem = (size[1] - winsize[1]) % winstride[1] if rem != 0: - final_size[1] += patch_stride[1] - rem + final_size[1] += winstride[1] - rem n = numpy.zeros(final_size, dtype=int) - ylen = ((final_size[0] - patch_size[0]) // patch_stride[0]) + 1 - xlen = ((final_size[1] - patch_size[1]) // patch_stride[1]) + 1 # calculates the stacked performance layers = int( - numpy.ceil(patch_size[0] / patch_stride[0]) - * numpy.ceil(patch_size[1] / patch_stride[1]) - ) - perf = numpy.zeros( - [layers] + final_size, dtype=patch_perf[figure].iloc[0].dtype + numpy.ceil(winsize[0] / winstride[0]) + * numpy.ceil(winsize[1] / winstride[1]) ) + figindex = PERFORMANCE_FIGURES.index(figure) + perf = numpy.zeros([layers] + final_size, dtype=winperf.dtype) n = -1 * numpy.ones(final_size, dtype=int) - col = numpy.array(patch_perf[figure]) - for j in range(ylen): + data = winperf[PERFORMANCE_FIGURES.index(figure)] + for j in range(data.shape[0]): yup = slice( - patch_stride[0] * j, (patch_stride[0] * j) + patch_size[0], 1 + winstride[0] * j, (winstride[0] * j) + winsize[0], 1 ) - for i in range(xlen): + for i in range(data.shape[1]): xup = slice( - patch_stride[1] * i, (patch_stride[1] * i) + patch_size[1], 1 + winstride[1] * i, (winstride[1] * i) + winsize[1], 1 ) nup = n[yup, xup] nup += 1 @@ -112,7 +120,7 @@ def _performance_summary(size, patch_perf, patch_size, patch_stride, figure): range(xup.start, xup.stop, xup.step), indexing="ij", ) - perf[nup.flat, yr.flat, xr.flat] = col[(j * xlen) + i] + perf[nup.flat, yr.flat, xr.flat] = data[j, i] # for each element in the ``perf``matrix, calculates avg and std. n += 1 # adjust for starting at -1 before @@ -125,9 +133,9 @@ def _performance_summary(size, patch_perf, patch_size, patch_stride, figure): return n, avg, std -def _patch_measures(pred, gt, threshold, size, stride): +def _winperf_measures(pred, gt, threshold, size, stride): """ - Calculates measures on patches of a single sample + Calculates measures on sliding windows of a single sample Parameters @@ -140,7 +148,7 @@ def _patch_measures(pred, gt, threshold, size, stride): ground-truth (annotations) threshold : float - threshold to use for evaluating individual patch performances + threshold to use for evaluating individual sliding window performances size : tuple size (vertical, horizontal) for windows for which we will calculate @@ -154,18 +162,15 @@ def _patch_measures(pred, gt, threshold, size, stride): Returns ------- - measures : pandas.DataFrame + measures : numpy.ndarray - A pandas dataframe with the following columns: + A 3D float array with all supported performance entries for each + sliding window. - * patch: int - * threshold: float - * precision: float - * recall: float - * specificity: float - * accuracy: float - * jaccard: float - * f1_score: float + The first dimension of the array is therefore 6. The other two + dimensions correspond to resulting size of the sliding window operation + applied to the input data and taking into consideration the sliding + window size and the stride. """ @@ -185,36 +190,20 @@ def _patch_measures(pred, gt, threshold, size, stride): gt_padded = torch.nn.functional.pad(gt.squeeze(0), padding) # this will create as many views as required - pred_patches = pred_padded.unfold(0, size[0], stride[0]).unfold( + pred_windows = pred_padded.unfold(0, size[0], stride[0]).unfold( 1, size[1], stride[1] ) - gt_patches = gt_padded.unfold(0, size[0], stride[0]).unfold( + gt_windows = gt_padded.unfold(0, size[0], stride[0]).unfold( 1, size[1], stride[1] ) - assert pred_patches.shape == gt_patches.shape - ylen, xlen, _, _ = pred_patches.shape + assert pred_windows.shape == gt_windows.shape + ylen, xlen, _, _ = pred_windows.shape - data = [ - (j, i) - + _sample_measures_for_threshold( - pred_patches[j, i, :, :], gt_patches[j, i, :, :], threshold + retval = numpy.array([_sample_measures_for_threshold( + pred_windows[j, i, :, :], gt_windows[j, i, :, :], threshold ) - for j, i in itertools.product(range(ylen), range(xlen)) - ] - - return pandas.DataFrame( - data, - columns=( - "y", - "x", - "precision", - "recall", - "specificity", - "accuracy", - "jaccard", - "f1_score", - ), - ) + for j in range(ylen) for i in range(xlen)]) + return retval.transpose(1,0).reshape(6, ylen, xlen) def _visual_dataset_performance(stem, img, n, avg, std, outdir): @@ -286,11 +275,11 @@ def _visual_dataset_performance(stem, img, n, avg, std, outdir): plt.close(fig) -def _patch_performances_for_sample( +def _winperf_for_sample( basedir, threshold, size, stride, dataset, k, figure, outdir, ): """ - Evaluates patch performances per sample + Evaluates sliding window performances per sample Parameters @@ -317,16 +306,17 @@ def _patch_performances_for_sample( k : int the sample number (order inside the dataset, starting from zero), to - calculate patch performances for + calculate sliding window performances for figure : str - the performance figure to use for calculating patch micro performances - (e.g. `accuracy`, `f1_score` or `jaccard`). Must be available on the - produced performance dataframe. + the performance figure to use for calculating sliding window micro + performances (e.g. `accuracy`, `f1_score` or `jaccard`). Must be + a supported performance figure as defined in + :py:attr:`PERFORMANCE_FIGURES` outdir : str - path were to save a visual representation of patch performances. If - set to ``None``, then do not save those to disk. + path were to save a visual representation of sliding window + performances. If set to ``None``, then do not save those to disk. Returns @@ -338,8 +328,8 @@ def _patch_performances_for_sample( data : dict A dictionary containing the following fields: - * ``df``: a :py:class:`pandas.DataFrame` with the patch performance - figures in raster scan order. + * ``winperf``: a 3D :py:class:`numpy.ndarray` with the sliding window + performance figures * ``n``: a 2D integer :py:class:`numpy.ndarray` with the same size as the original image pertaining to the analyzed sample, that indicates how many overlapping windows are available for each pixel in the @@ -356,16 +346,16 @@ def _patch_performances_for_sample( sample = dataset[k] with h5py.File(os.path.join(basedir, sample[0] + ".hdf5"), "r") as f: pred = torch.from_numpy(f["array"][:]) - df = _patch_measures(pred, sample[2], threshold, size, stride) + winperf = _winperf_measures(pred, sample[2], threshold, size, stride) n, avg, std = _performance_summary( - sample[1].shape[1:], df, size, stride, figure + sample[1].shape[1:], winperf, size, stride, figure ) if outdir is not None: _visual_dataset_performance(sample[0], sample[1], n, avg, std, outdir) - return sample[0], dict(df=df, n=n, avg=avg, std=std) + return sample[0], dict(winperf=winperf, n=n, avg=avg, std=std) -def patch_performances( +def sliding_window_performances( dataset, name, predictions_folder, @@ -377,7 +367,7 @@ def patch_performances( outdir=None, ): """ - Evaluates the performances for multiple image patches, for a whole dataset + Evaluates sliding window performances for a whole dataset Parameters @@ -407,7 +397,8 @@ def patch_performances( partial performances based on the threshold and existing ground-truth figure : str - the performance figure to use for calculating patch micro performances + the performance figure to use for calculating sliding window micro + performances nproc : :py:class:`int`, Optional the number of processing cores to use for performance evaluation. @@ -417,8 +408,8 @@ def patch_performances( multiprocessing. outdir : :py:class:`str`, Optional - path were to save a visual representation of patch performances. If - set to ``None``, then do not save those to disk. + path were to save a visual representation of sliding window + performances. If set to ``None``, then do not save those to disk. Returns @@ -428,23 +419,21 @@ def patch_performances( A dictionary in which keys are filename stems and values are dictionaries with the following contents: - ``df``: :py:class:`pandas.DataFrame` - A dataframe with all the patch performances aggregated, for all - input images. + ``winperf``: numpy.ndarray - ``n`` : :py:class:`numpy.ndarray` + ``n`` : numpy.ndarray A 2D numpy array containing the number of performance scores for every pixel in the original image ``avg`` : :py:class:`numpy.ndarray` A 2D numpy array containing the average performances for every - pixel on the input image considering the patch sizes and strides - applied when windowing the image + pixel on the input image considering the sliding window sizes and + strides applied to the image ``std`` : :py:class:`numpy.ndarray` A 2D numpy array containing the (unbiased) standard deviations for the provided performance figure, for every pixel on the input image - considering the patch sizes and strides applied when windowing the + considering the sliding window sizes and strides applied to the image """ @@ -453,7 +442,7 @@ def patch_performances( if not os.path.exists(use_predictions_folder): use_predictions_folder = predictions_folder - with tqdm(range(len(dataset[name])), desc="patch-perf") as pbar: + with tqdm(range(len(dataset[name])), desc="sld-win-perf") as pbar: # we avoid the multiprocessing module if nproc==1 # so it is easier to run ipdb if nproc != 1: @@ -462,7 +451,7 @@ def patch_performances( pool = multiprocessing.Pool(nproc) results = [ pool.apply_async( - _patch_performances_for_sample, + _winperf_for_sample, args=( use_predictions_folder, threshold, @@ -483,7 +472,7 @@ def patch_performances( else: data = [] for k in pbar: - df = _patch_performances_for_sample( + winperf = _winperf_for_sample( use_predictions_folder, threshold, size, @@ -493,21 +482,22 @@ def patch_performances( figure, outdir, ) - data.append(df) + data.append(winperf) return dict(data) def _visual_performances_for_sample( - size, stride, dataset, k, df, figure, outdir + size, stride, dataset, k, winperf, figure, outdir ): """ - Displays patch performances per sample + Displays sliding windows performances per sample - This is a simplified version of :py:func:`_patch_performances_for_sample` - in which the patch performances are not recalculated and used as input. It - can be used in case you have the patch performances stored in disk or if - you're evaluating differences between patches of 2 different systems. + This is a simplified version of :py:func:`_winper_for_sample` + in which the sliding window performances are not recalculated and used as + input. It can be used in case you have the sliding window performances + stored in disk or if you're evaluating differences between sliding windows + of 2 different systems. Parameters @@ -526,20 +516,21 @@ def _visual_performances_for_sample( k : int the sample number (order inside the dataset, starting from zero), to - calculate patch performances for + calculate sliding window performances for - df : pandas.DataFrame - the previously calculated dataframe to use for this patch performance + winperf : numpy.ndarray + the previously calculated sliding window performances to use for this assessment. figure : str - the performance figure to use for calculating patch micro performances - (e.g. `f1_score` or `jaccard`). Must be available on the produced - performance dataframe. + the performance figure to use for calculating sliding window micro + performances (e.g. `accuracy`, `f1_score` or `jaccard`). Must be + a supported performance figure as defined in + :py:attr:`PERFORMANCE_FIGURES` outdir : :py:class:`str` - path were to save a visual representation of patch performances. If - set to ``None``, then do not save those to disk. + path were to save a visual representation of sliding window + performances. If set to ``None``, then do not save those to disk. Returns @@ -551,9 +542,9 @@ def _visual_performances_for_sample( data : dict A dictionary containing the following fields: - * ``df``: a :py:class:`pandas.DataFrame` with the patch performance - figures in raster scan order. Notice this is just a copy of the - input data frame with the same name. + * ``winperf``: a 3D float :py:class:`numpy.ndarray` with the sliding + window performance figures. Notice this is just a copy of the input + sliding window performance figures with the same name. * ``n``: a 2D integer :py:class:`numpy.ndarray` with the same size as the original image pertaining to the analyzed sample, that indicates how many overlapping windows are available for each pixel in the @@ -569,23 +560,24 @@ def _visual_performances_for_sample( sample = dataset[k] n, avg, std = _performance_summary( - sample[1].shape[1:], df, size, stride, figure + sample[1].shape[1:], winperf, size, stride, figure ) if outdir is not None: _visual_dataset_performance(sample[0], sample[1], n, avg, std, outdir) - return sample[0], dict(df=df, n=n, avg=avg, std=std) + return sample[0], dict(winperf=winperf, n=n, avg=avg, std=std) def visual_performances( - dataset, name, dfs, size, stride, figure, nproc=1, outdir=None, + dataset, name, winperfs, size, stride, figure, nproc=1, outdir=None, ): """ - Displays the performances for multiple image patches, for a whole dataset + Displays the performances for for a whole dataset - This is a simplified version of :py:func:`patch_performances` in which the - patch performances are not recalculated and used as input. It can be used - in case you have the patch performances stored in disk or if you're - evaluating differences between patches of 2 different systems. + This is a simplified version of :py:func:`sliding_window_performances` in + which the sliding window performances are not recalculated and used as + input. It can be used in case you have the sliding window performances + stored in disk or if you're evaluating differences between sliding windows + of 2 different systems. Parameters @@ -598,9 +590,9 @@ def visual_performances( the local name of this dataset (e.g. ``train``, or ``test``), to be used when saving measures files. - dfs : dict - a dictionary mapping dataset stems to dataframes containing the patch - performances to be evaluated + winperfs : dict + a dictionary mapping dataset stems to arrays containing the sliding + window performances to be evaluated size : tuple size (vertical, horizontal) for windows for which we will calculate @@ -611,7 +603,8 @@ def visual_performances( partial performances based on the threshold and existing ground-truth figure : str - the performance figure to use for calculating patch micro performances + the performance figure to use for calculating sliding window micro + performances nproc : :py:class:`int`, Optional the number of processing cores to use for performance evaluation. @@ -621,8 +614,8 @@ def visual_performances( multiprocessing. outdir : :py:class:`str`, Optional - path were to save a visual representation of patch performances. If - set to ``None``, then do not save those to disk. + path were to save a visual representation of sliding window + performances. If set to ``None``, then do not save those to disk. Returns @@ -632,23 +625,23 @@ def visual_performances( A dictionary in which keys are filename stems and values are dictionaries with the following contents: - ``df``: :py:class:`pandas.DataFrame` - A dataframe with all the patch performances aggregated, for all - input images. + ``winperf``: numpy.ndarray + A 3D float array with all the sliding window performances for the + input image - ``n`` : :py:class:`numpy.ndarray` + ``n`` : numpy.ndarray A 2D numpy array containing the number of performance scores for every pixel in the original image - ``avg`` : :py:class:`numpy.ndarray` + ``avg`` : numpy.ndarray A 2D numpy array containing the average performances for every - pixel on the input image considering the patch sizes and strides - applied when windowing the image + pixel on the input image considering the sliding window sizes and + strides applied to the image ``std`` : :py:class:`numpy.ndarray` A 2D numpy array containing the (unbiased) standard deviations for the provided performance figure, for every pixel on the input image - considering the patch sizes and strides applied when windowing the + considering the sliding window sizes and strides applied to the image """ @@ -670,7 +663,7 @@ def visual_performances( stride, dataset[name], k, - dfs[stems[k]], + winperfs[stems[k]], figure, outdir, ), @@ -684,22 +677,22 @@ def visual_performances( else: data = [] for k in pbar: - df = _visual_performances_for_sample( + winperf = _visual_performances_for_sample( size, stride, dataset[name], k, - dfs[stems[k]], + winperfs[stems[k]], figure, outdir, ) - data.append(df) + data.append(winperf) return dict(data) def index_of_outliers(c): - """Finds indexes of outliers (+/- 1.5*IQR) on a pandas dataframe column + """Finds indexes of outliers (+/- 1.5*IQR) on an array of random values The IQR measures the midspread or where 50% of a normal distribution would sit, if the input data is, indeed, normal. 1.5 IQR corresponds to a @@ -711,32 +704,32 @@ def index_of_outliers(c): Parameters ---------- - c : pandas.DataFrame - This should be a **single** column of a pandas dataframe with the - ``quantile`` method + c : numpy.ndarray + A 1D float array Returns ------- - indexes : typing.Sequence + indexes : numpy.ndarray Indexes of the input column that are considered outliers in the distribution (outside the 1.5 Interquartile Range). """ - iqr = c.quantile(0.75) - c.quantile(0.25) - limits = (c.quantile(0.25) - 1.5 * iqr, c.quantile(0.75) + 1.5 * iqr) + l, h = numpy.quantile(c, (0.25, 0.75)) + iqr = h - l + limits = (l - 1.5 * iqr, h + 1.5 * iqr) return (c < limits[0]) | (c > limits[1]) def write_analysis_text(names, da, db, f): """Writes a text file containing the most important statistics - Compares patch performances in ``da`` and ``db`` taking into consideration - their statistical properties. A significance test is applied to check - whether observed differences in the statistics of both distributions is - significant. + Compares sliding window performances in ``da`` and ``db`` taking into + consideration their statistical properties. A significance test is applied + to check whether observed differences in the statistics of both + distributions is significant. Parameters @@ -747,14 +740,14 @@ def write_analysis_text(names, da, db, f): analyzed da : numpy.ndarray - A 1D numpy array containing all the performance figures per patch - analyzed and organized in a particular order (raster), for the first - system (first entry of ``names``) + A 1D numpy array containing all the performance figures per sliding + window analyzed and organized in a particular order (raster), for the + first system (first entry of ``names``) db : numpy.ndarray - A 1D numpy array containing all the performance figures per patch - analyzed and organized in a particular order (raster), for the second - system (second entry of ``names``) + A 1D numpy array containing all the performance figures per sliding + window analyzed and organized in a particular order (raster), for the + second system (second entry of ``names``) f : file An open file that will be used dump the analysis to @@ -802,7 +795,7 @@ def write_analysis_text(names, da, db, f): f.write(textwrap.indent(tdata, " ")) f.write("\n") - # Note: dependent variable = patch performance figure in our case + # Note: dependent variable = sliding window performance figure in our case # Assumptions of a Paired T-test: # * The dependent variable must be continuous (interval/ratio). [OK] # * The observations are independent of one another. [OK] @@ -811,10 +804,11 @@ def write_analysis_text(names, da, db, f): if (diff == 0.0).all(): logger.error("Differences are exactly zero between both " - "patch distributions, for **all** samples. Statistical " - "significance tests are not meaningful in this context and " - "will be skipped. This typically indicates an issue with " - "the setup of prediction folders (duplicated?)") + "sliding window distributions, for **all** samples. " + "Statistical significance tests are not meaningful in " + "this context and will be skipped. This typically " + "indicates an issue with the setup of prediction folders " + "(duplicated?)") return f.write("\nPaired significance tests:\n") @@ -851,14 +845,14 @@ def write_analysis_figures(names, da, db, fname): analyzed da : numpy.ndarray - A 1D numpy array containing all the performance figures per patch - analyzed and organized in a particular order (raster), for the first - system (first entry of ``names``) + A 1D numpy array containing all the performance figures per sliding + window analyzed and organized in a particular order (raster), for the + first system (first entry of ``names``) db : numpy.ndarray - A 1D numpy array containing all the performance figures per patch - analyzed and organized in a particular order (raster), for the second - system (second entry of ``names``) + A 1D numpy array containing all the performance figures per sliding + window analyzed and organized in a particular order (raster), for the + second system (second entry of ``names``) fname : str The filename to use for storing the summarized performance figures diff --git a/bob/ip/binseg/script/significance.py b/bob/ip/binseg/script/significance.py index 9e409b41..cf387c9b 100755 --- a/bob/ip/binseg/script/significance.py +++ b/bob/ip/binseg/script/significance.py @@ -19,15 +19,16 @@ logger = logging.getLogger(__name__) from .evaluate import _validate_threshold, run as run_evaluation from ..engine.significance import ( - patch_performances, + sliding_window_performances, visual_performances, write_analysis_text, write_analysis_figures, index_of_outliers, + PERFORMANCE_FIGURES, ) -def _eval_patches( +def _eval_sliding_windows( system_name, threshold, evaluate, @@ -41,7 +42,7 @@ def _eval_patches( nproc, checkpointdir, ): - """Calculates the patch performances on a dataset + """Calculates the sliding window performances on a dataset Parameters @@ -79,12 +80,12 @@ def _eval_patches( possible F1-score on train/test data. size : tuple - Two values indicating the size of windows to be used for patch - analysis. The values represent height and width respectively + Two values indicating the size of windows to be used for the sliding + window analysis. The values represent height and width respectively stride : tuple - Two values indicating the stride of windows to be used for patch - analysis. The values represent height and width respectively + Two values indicating the stride of windows to be used for the sliding + window analysis. The values represent height and width respectively outdir : str Path where to store visualizations. If set to ``None``, then do not @@ -102,7 +103,7 @@ def _eval_patches( checkpointdir : str If set to a string (instead of ``None``), then stores a cached version - of the patch performances on disk, for a particular system. + of the sliding window performances on disk, for a particular system. Returns @@ -112,45 +113,49 @@ def _eval_patches( A dictionary in which keys are filename stems and values are dictionaries with the following contents: - ``df``: :py:class:`pandas.DataFrame` - A dataframe with all the patch performances aggregated, for all - input images. + ``winperf``: numpy.ndarray + A dataframe with all the sliding window performances aggregated, + for all input images. - ``n`` : :py:class:`numpy.ndarray` + ``n`` : numpy.ndarray A 2D numpy array containing the number of performance scores for every pixel in the original image - ``avg`` : :py:class:`numpy.ndarray` + ``avg`` : numpy.ndarray A 2D numpy array containing the average performances for every - pixel on the input image considering the patch sizes and strides - applied when windowing the image + pixel on the input image considering the sliding window sizes and + strides applied to the image - ``std`` : :py:class:`numpy.ndarray` + ``std`` : numpy.ndarray A 2D numpy array containing the (unbiased) standard deviations for the provided performance figure, for every pixel on the input image - considering the patch sizes and strides applied when windowing the + considering the sliding window sizes and strides applied to the image """ if checkpointdir is not None: - chkpt_fname = os.path.join(checkpointdir, - f"{system_name}-{evaluate}-{threshold}-" \ - f"{size[0]}x{size[1]}+{stride[0]}x{stride[1]}-{figure}.pkl.gz" - ) + chkpt_fname = os.path.join( + checkpointdir, + f"{system_name}-{evaluate}-{threshold}-" + f"{size[0]}x{size[1]}+{stride[0]}x{stride[1]}-{figure}.pkl.gz", + ) os.makedirs(os.path.dirname(chkpt_fname), exist_ok=True) if os.path.exists(chkpt_fname): logger.info(f"Loading checkpoint from {chkpt_fname}...") # loads and returns checkpoint from file try: - with __import__('gzip').GzipFile(chkpt_fname, "r") as f: - return __import__('pickle').load(f) + with __import__("gzip").GzipFile(chkpt_fname, "r") as f: + return __import__("pickle").load(f) except EOFError as e: - logger.warning(f"Could not load patch performance from " \ - f"{chkpt_fname}: {e}. Calculating...") + logger.warning( + f"Could not load sliding window performance " + f"from {chkpt_fname}: {e}. Calculating..." + ) else: - logger.debug(f"Checkpoint not available at {chkpt_fname}. " \ - f"Calculating...") + logger.debug( + f"Checkpoint not available at {chkpt_fname}. " f"Calculating..." + ) else: chkpt_fname = None @@ -167,13 +172,13 @@ def _eval_patches( ) logger.info(f"Set --threshold={threshold:.5f} for '{system_name}'") - # for a given threshold on each system, calculate patch performances + # for a given threshold on each system, calculate sliding window performances logger.info( - f"Evaluating patch '{figure}' on '{evaluate}' set for " + f"Evaluating sliding window '{figure}' on '{evaluate}' set for " f"'{system_name}' using windows of size {size} and stride {stride}" ) - retval = patch_performances( + retval = sliding_window_performances( dataset, evaluate, preddir, @@ -185,18 +190,28 @@ def _eval_patches( outdir, ) - # cache patch performance for later use, if necessary + # cache sliding window performance for later use, if necessary if chkpt_fname is not None: logger.debug(f"Storing checkpoint at {chkpt_fname}...") - with __import__('gzip').GzipFile(chkpt_fname, "w") as f: - __import__('pickle').dump(retval, f) + with __import__("gzip").GzipFile(chkpt_fname, "w") as f: + __import__("pickle").dump(retval, f) return retval -def _eval_differences(names, perfs, evaluate, dataset, size, stride, outdir, - figure, nproc, checkpointdir): - """Evaluate differences in the performance patches between two systems +def _eval_differences( + names, + perfs, + evaluate, + dataset, + size, + stride, + outdir, + figure, + nproc, + checkpointdir, +): + """Evaluate differences in the performance sliding windows between two systems Parameters ---------- @@ -205,8 +220,8 @@ def _eval_differences(names, perfs, evaluate, dataset, size, stride, outdir, Names of the first and second systems perfs : :py:class:`tuple` of :py:class:`dict` - Dictionaries for the patch performances of each system, as returned by - :py:func:`_eval_patches` + Dictionaries for the sliding window performances of each system, as + returned by :py:func:`_eval_sliding_windows` evaluate : str Name of the dataset key to use from ``dataset`` to evaluate (typically, @@ -217,12 +232,12 @@ def _eval_differences(names, perfs, evaluate, dataset, size, stride, outdir, :py:class:`torch.utils.data.dataset.Dataset` instances size : tuple - Two values indicating the size of windows to be used for patch + Two values indicating the size of windows to be used for sliding window analysis. The values represent height and width respectively stride : tuple - Two values indicating the stride of windows to be used for patch - analysis. The values represent height and width respectively + Two values indicating the stride of windows to be used for sliding + window analysis. The values represent height and width respectively outdir : str If set to ``None``, then do not output performance visualizations. @@ -241,80 +256,65 @@ def _eval_differences(names, perfs, evaluate, dataset, size, stride, outdir, checkpointdir : str If set to a string (instead of ``None``), then stores a cached version - of the patch performances on disk, for a particular difference between - systems. + of the sliding window performances on disk, for a particular difference + between systems. Returns ------- d : dict - A dictionary representing patch performance differences across all - files and patches. The format of this is similar to the individual - inputs ``perf1`` and ``perf2``. + A dictionary representing sliding window performance differences across + all files and sliding windows. The format of this is similar to the + individual inputs ``perf1`` and ``perf2``. """ if checkpointdir is not None: - chkpt_fname = os.path.join(checkpointdir, - f"{names[0]}-{names[1]}-{evaluate}-" \ - f"{size[0]}x{size[1]}+{stride[0]}x{stride[1]}-{figure}.pkl.gz" - ) + chkpt_fname = os.path.join( + checkpointdir, + f"{names[0]}-{names[1]}-{evaluate}-" + f"{size[0]}x{size[1]}+{stride[0]}x{stride[1]}-{figure}.pkl.gz", + ) os.makedirs(os.path.dirname(chkpt_fname), exist_ok=True) if os.path.exists(chkpt_fname): logger.info(f"Loading checkpoint from {chkpt_fname}...") # loads and returns checkpoint from file try: - with __import__('gzip').GzipFile(chkpt_fname, "r") as f: - return __import__('pickle').load(f) + with __import__("gzip").GzipFile(chkpt_fname, "r") as f: + return __import__("pickle").load(f) except EOFError as e: - logger.warning(f"Could not load patch performance from " \ - f"{chkpt_fname}: {e}. Calculating...") + logger.warning( + f"Could not load sliding window performance " + f"from {chkpt_fname}: {e}. Calculating..." + ) else: - logger.debug(f"Checkpoint not available at {chkpt_fname}. " \ - f"Calculating...") + logger.debug( + f"Checkpoint not available at {chkpt_fname}. " f"Calculating..." + ) else: chkpt_fname = None - perf_diff = dict([(k, perfs[0][k]["df"].copy()) for k in perfs[0]]) - - # we can subtract these - to_subtract = ( - "precision", - "recall", - "specificity", - "accuracy", - "jaccard", - "f1_score", + perf_diff = dict( + [(k, perfs[0][k]["winperf"] - perfs[1][k]["winperf"]) for k in perfs[0]] ) - for k in perf_diff: - for col in to_subtract: - perf_diff[k][col] -= perfs[1][k]["df"][col] - - # for a given threshold on each system, calculate patch performances + # for a given threshold on each system, calculate sliding window performances logger.info( - f"Evaluating patch '{figure}' differences on '{evaluate}' set on " - f"'{names[0]}-{names[1]}' using windows of size {size} and " + f"Evaluating sliding window '{figure}' differences on '{evaluate}' " + f"set on '{names[0]}-{names[1]}' using windows of size {size} and " f"stride {stride}" ) retval = visual_performances( - dataset, - evaluate, - perf_diff, - size, - stride, - figure, - nproc, - outdir, + dataset, evaluate, perf_diff, size, stride, figure, nproc, outdir, ) - # cache patch performance for later use, if necessary + # cache sliding window performance for later use, if necessary if chkpt_fname is not None: logger.debug(f"Storing checkpoint at {chkpt_fname}...") - with __import__('gzip').GzipFile(chkpt_fname, "w") as f: - __import__('pickle').dump(retval, f) + with __import__("gzip").GzipFile(chkpt_fname, "w") as f: + __import__("pickle").dump(retval, f) return retval @@ -408,8 +408,8 @@ def _eval_differences(names, perfs, evaluate, dataset, size, stride, outdir, "--size", "-s", help="This is a tuple with two values indicating the size of windows to " - "be used for patch analysis. The values represent height and width " - "respectively.", + "be used for sliding window analysis. The values represent height and " + "width respectively.", default=(128, 128), nargs=2, type=int, @@ -421,8 +421,8 @@ def _eval_differences(names, perfs, evaluate, dataset, size, stride, outdir, "--stride", "-t", help="This is a tuple with two values indicating the stride of windows to " - "be used for patch analysis. The values represent height and width " - "respectively.", + "be used for sliding window analysis. The values represent height and " + "width respectively.", default=(32, 32), nargs=2, type=int, @@ -487,7 +487,8 @@ def _eval_differences(names, perfs, evaluate, dataset, size, stride, outdir, @click.option( "--checkpoint-folder", "-k", - help="Path where to store checkpointed versions of patch performances", + help="Path where to store checkpointed versions of sliding window " + "performances", required=False, type=click.Path(), show_default=True, @@ -521,7 +522,7 @@ def significance( threshold = _validate_threshold(threshold, dataset) assert evaluate in dataset, f"No dataset named '{evaluate}'" - perf1 = _eval_patches( + perf1 = _eval_sliding_windows( names[0], threshold, evaluate, @@ -530,15 +531,17 @@ def significance( steps, size, stride, - (output_folder - if output_folder is None - else os.path.join(output_folder, names[0])), + ( + output_folder + if output_folder is None + else os.path.join(output_folder, names[0]) + ), figure, parallel, checkpoint_folder, ) - perf2 = _eval_patches( + perf2 = _eval_sliding_windows( names[1], threshold, evaluate, @@ -547,33 +550,38 @@ def significance( steps, size, stride, - (output_folder + ( + output_folder if output_folder is None - else os.path.join(output_folder, names[1])), + else os.path.join(output_folder, names[1]) + ), figure, parallel, checkpoint_folder, ) perf_diff = _eval_differences( - names, - (perf1, perf2), - evaluate, - dataset, - size, - stride, - (output_folder - if output_folder is None - else os.path.join(output_folder, "diff")), - figure, - parallel, - checkpoint_folder, - ) + names, + (perf1, perf2), + evaluate, + dataset, + size, + stride, + ( + output_folder + if output_folder is None + else os.path.join(output_folder, "diff") + ), + figure, + parallel, + checkpoint_folder, + ) # loads all figures for the given threshold stems = list(perf1.keys()) - da = numpy.array([perf1[k]["df"][figure] for k in stems]).flatten() - db = numpy.array([perf2[k]["df"][figure] for k in stems]).flatten() + figindex = PERFORMANCE_FIGURES.index(figure) + da = numpy.array([perf1[k]["winperf"][figindex] for k in stems]).flatten() + db = numpy.array([perf2[k]["winperf"][figindex] for k in stems]).flatten() diff = da - db while remove_outliers: diff --git a/bob/ip/binseg/test/test_significance.py b/bob/ip/binseg/test/test_significance.py index 0e5c5dd5..7d00ff8b 100755 --- a/bob/ip/binseg/test/test_significance.py +++ b/bob/ip/binseg/test/test_significance.py @@ -9,159 +9,220 @@ import pandas import nose.tools import torch -from ..engine.significance import _patch_measures +from ..engine.significance import ( + _winperf_measures, + _performance_summary, + PERFORMANCE_FIGURES, +) from ..utils.measure import base_measures -def _check_patch_measures(pred, gt, threshold, size, stride, expected): +def _check_window_measures(pred, gt, threshold, size, stride, expected): pred = torch.tensor(pred) gt = torch.tensor(gt) - actual = _patch_measures(pred, gt, threshold, size, stride) + actual = _winperf_measures(pred, gt, threshold, size, stride) # transforms tp,tn,fp,fn through base_measures() - expected = pandas.DataFrame([k[:2] + base_measures(*k[2:]) for k in expected], - columns=[ - "y", - "x", - "precision", # tp/(tp+fp) - "recall", # tpr = tp/p = tp/(tp+fn) - "specificity", # tnr = tn/n = tn/(tn+fp) - "accuracy", # (tp+tn)/(p+n) = (tp+tn)/(tp+fn+tn+fp) - "jaccard", # f1/(2-f1) = tp/(tp+fp+fn) - "f1_score", # 2*rp/(r+p) = 2*tp/(2*tp+fp+fn) - ]) - - assert (actual == expected).all().all(), f"Actual output:\n{actual}\n " \ - f"**!=** Expected output:\n{expected}" - - -def test_patch_measures_alltrue(): - - pred = numpy.ones((4,4), dtype=float) - gt = numpy.ones((4,4), dtype=bool) + expected_shape = numpy.array(expected).shape[:2] + expected = numpy.array([base_measures(*c) for r in expected for c in r]).T + expected = expected.reshape((len(PERFORMANCE_FIGURES),) + expected_shape) + + assert numpy.allclose( + actual, expected + ), f"Actual output:\n{actual}\n **!=** Expected output:\n{expected}" + + +def test_winperf_measures_alltrue(): + + pred = numpy.ones((4, 4), dtype=float) + gt = numpy.ones((4, 4), dtype=bool) threshold = 0.5 - size = (2,2) - stride = (1,1) + size = (2, 2) + stride = (1, 1) expected = [ - #y, x, tp, fp, tn, fn - (0, 0, 4, 0, 0, 0), - (0, 1, 4, 0, 0, 0), - (0, 2, 4, 0, 0, 0), - (1, 0, 4, 0, 0, 0), - (1, 1, 4, 0, 0, 0), - (1, 2, 4, 0, 0, 0), - (2, 0, 4, 0, 0, 0), - (2, 1, 4, 0, 0, 0), - (2, 2, 4, 0, 0, 0), - ] - _check_patch_measures(pred, gt, threshold, size, stride, expected) - - -def test_patch_measures_alltrue_with_padding(): - - pred = numpy.ones((3,3), dtype=float) - gt = numpy.ones((3,3), dtype=bool) + # tp, fp, tn, fn + [(4, 0, 0, 0), (4, 0, 0, 0), (4, 0, 0, 0)], + [(4, 0, 0, 0), (4, 0, 0, 0), (4, 0, 0, 0)], + [(4, 0, 0, 0), (4, 0, 0, 0), (4, 0, 0, 0)], + ] + _check_window_measures(pred, gt, threshold, size, stride, expected) + + +def test_winperf_measures_alltrue_with_padding(): + + pred = numpy.ones((3, 3), dtype=float) + gt = numpy.ones((3, 3), dtype=bool) threshold = 0.5 - size = (2,2) - stride = (2,2) + size = (2, 2) + stride = (2, 2) expected = [ - #y, x, tp, fp, tn, fn - (0, 0, 4, 0, 0, 0), - (0, 1, 2, 0, 2, 0), - (1, 0, 2, 0, 2, 0), - (1, 1, 1, 0, 3, 0), - ] - _check_patch_measures(pred, gt, threshold, size, stride, expected) + # tp, fp, tn, fn + [(4, 0, 0, 0), (2, 0, 2, 0)], + [(2, 0, 2, 0), (1, 0, 3, 0)], + ] + _check_window_measures(pred, gt, threshold, size, stride, expected) -def test_patch_measures_dot_with_padding(): +def test_winperf_measures_dot_with_padding(): - pred = numpy.ones((3,3), dtype=float) - gt = numpy.zeros((3,3), dtype=bool) - gt[1,1] = 1.0 #white dot pattern + pred = numpy.ones((3, 3), dtype=float) + gt = numpy.zeros((3, 3), dtype=bool) + gt[1, 1] = 1.0 # white dot pattern threshold = 0.5 - size = (2,2) - stride = (2,2) + size = (2, 2) + stride = (2, 2) expected = [ - #y, x, tp, fp, tn, fn - (0, 0, 1, 3, 0, 0), - (0, 1, 0, 2, 2, 0), - (1, 0, 0, 2, 2, 0), - (1, 1, 0, 1, 3, 0), - ] - _check_patch_measures(pred, gt, threshold, size, stride, expected) - - -def test_patch_measures_cross(): - - pred = numpy.zeros((5,5), dtype=float) - pred[2,:] = 1.0 - pred[:,2] = 1.0 - pred[2,2] = 0.0 #make one mistake at the center of the cross - gt = numpy.zeros((5,5), dtype=bool) - gt[2,:] = 1.0 - gt[:,2] = 1.0 #white cross pattern + # tp, fp, tn, fn + [(1, 3, 0, 0), (0, 2, 2, 0)], + [(0, 2, 2, 0), (0, 1, 3, 0)], + ] + _check_window_measures(pred, gt, threshold, size, stride, expected) + + +def test_winperf_measures_cross(): + + pred = numpy.zeros((5, 5), dtype=float) + pred[2, :] = 1.0 + pred[:, 2] = 1.0 + pred[2, 2] = 0.0 # make one mistake at the center of the cross + gt = numpy.zeros((5, 5), dtype=bool) + gt[2, :] = 1.0 + gt[:, 2] = 1.0 # white cross pattern threshold = 0.5 - size = (3,3) - stride = (1,1) + size = (3, 3) + stride = (1, 1) expected = [ - #y, x, tp, fp, tn, fn - (0, 0, 4, 0, 4, 1), - (0, 1, 4, 0, 4, 1), - (0, 2, 4, 0, 4, 1), - (1, 0, 4, 0, 4, 1), - (1, 1, 4, 0, 4, 1), - (1, 2, 4, 0, 4, 1), - (2, 0, 4, 0, 4, 1), - (2, 1, 4, 0, 4, 1), - (2, 2, 4, 0, 4, 1), - ] - _check_patch_measures(pred, gt, threshold, size, stride, expected) - - -def test_patch_measures_cross_with_padding(): - - pred = numpy.zeros((5,5), dtype=float) - gt = numpy.zeros((5,5), dtype=bool) - gt[2,:] = 1.0 - gt[:,2] = 1.0 #white cross pattern + # tp, fp, tn, fn + [(4, 0, 4, 1), (4, 0, 4, 1), (4, 0, 4, 1)], + [(4, 0, 4, 1), (4, 0, 4, 1), (4, 0, 4, 1)], + [(4, 0, 4, 1), (4, 0, 4, 1), (4, 0, 4, 1)], + ] + _check_window_measures(pred, gt, threshold, size, stride, expected) + + +def test_winperf_measures_cross_with_padding(): + + pred = numpy.zeros((5, 5), dtype=float) + gt = numpy.zeros((5, 5), dtype=bool) + gt[2, :] = 1.0 + gt[:, 2] = 1.0 # white cross pattern threshold = 0.5 - size = (4,4) - stride = (2,2) + size = (4, 4) + stride = (2, 2) expected = [ - #y, x, tp, fp, tn, fn - (0, 0, 0, 0, 9, 7), - (0, 1, 0, 0, 10, 6), - (1, 0, 0, 0, 10, 6), - (1, 1, 0, 0, 11, 5), - ] - _check_patch_measures(pred, gt, threshold, size, stride, expected) - - -def test_patch_measures_cross_with_padding_2(): - - pred = numpy.zeros((5,5), dtype=float) - pred[2,:] = 1.0 - pred[:,2] = 1.0 - pred[2,2] = 0.0 #make one mistake at the center of the cross - gt = numpy.zeros((5,5), dtype=bool) - gt[2,:] = 1.0 - gt[:,2] = 1.0 #white cross pattern + # tp, fp, tn, fn + [(0, 0, 9, 7), (0, 0, 10, 6)], + [(0, 0, 10, 6), (0, 0, 11, 5)], + ] + _check_window_measures(pred, gt, threshold, size, stride, expected) + + +def test_winperf_measures_cross_with_padding_2(): + + pred = numpy.zeros((5, 5), dtype=float) + pred[2, :] = 1.0 + pred[:, 2] = 1.0 + pred[2, 2] = 0.0 # make one mistake at the center of the cross + gt = numpy.zeros((5, 5), dtype=bool) + gt[2, :] = 1.0 + gt[:, 2] = 1.0 # white cross pattern threshold = 0.5 - size = (4,4) - stride = (2,2) + size = (4, 4) + stride = (2, 2) expected = [ - #y, x, tp, fp, tn, fn - (0, 0, 6, 0, 9, 1), - (0, 1, 5, 0, 10, 1), - (1, 0, 5, 0, 10, 1), - (1, 1, 4, 0, 11, 1), - ] - _check_patch_measures(pred, gt, threshold, size, stride, expected) + # tp, fp, tn, fn + [(6, 0, 9, 1), (5, 0, 10, 1)], + [(5, 0, 10, 1), (4, 0, 11, 1)], + ] + _check_window_measures(pred, gt, threshold, size, stride, expected) + + +def _check_performance_summary(pred, gt, threshold, size, stride, s, figure): + + figsize = pred.shape + pred = torch.tensor(pred) + gt = torch.tensor(gt) + + # notice _winperf_measures() was previously tested (above) + measures = _winperf_measures(pred, gt, threshold, size, stride) + + n_actual, avg_actual, std_actual = _performance_summary( + figsize, measures, size, stride, figure + ) + + # the following code is not optimal, but easier to debug than the + # list comprehension versions: + # n_expected = numpy.array([len(k) for j in s for k in j]).reshape(figsize) + # avg_expected = numpy.array([measures.iloc[k][figure].mean() for j in s for k in j]).reshape(figsize) + # std_expected = numpy.array([measures.iloc[k][figure].std(ddof=1) for j in s for k in j]).reshape(figsize) + n_expected = numpy.zeros_like(n_actual) + avg_expected = numpy.zeros_like(avg_actual) + std_expected = numpy.zeros_like(std_actual) + figindex = PERFORMANCE_FIGURES.index(figure) + for y, row in enumerate(s): + for x, cell in enumerate(row): + n_expected[y, x] = len(cell) + entries = tuple(numpy.array(cell).T) # convert indexing to numpy + avg_expected[y, x] = measures[figindex][entries].mean() + std_expected[y, x] = measures[figindex][entries].std(ddof=1) + std_expected = numpy.nan_to_num(std_expected) + + assert (n_actual == n_expected).all(), ( + f"Actual N output:\n{n_actual}\n " + f"**!=** Expected N output:\n{n_expected}" + ) + + assert (avg_actual == avg_expected).all(), ( + f"Actual average output:\n{avg_actual}\n " + f"**!=** Expected average output:\n{avg_expected}" + ) + + assert (std_actual == std_expected).all(), ( + f"Actual std.deviation output:\n{std_actual}\n " + f"**!=** Expected std.deviation output:\n{std_expected}" + ) + + +def test_performance_summary_alltrue_accuracy(): + + pred = numpy.ones((4, 4), dtype=float) + gt = numpy.ones((4, 4), dtype=bool) + threshold = 0.5 + size = (2, 2) + stride = (1, 1) + + # what we expect will happen for the accumulation of statistics + # each number represents the pandas dataframe index in ``measures`` + # that needs to be accumulated for that particular pixel in the + # original image + stats = [ + # first row of image + [[(0, 0)], [(0, 0), (0, 1)], [(0, 1), (0, 2)], [(0, 2)]], + # second row of image + [ + [(0, 0), (1, 0)], + [(0, 0), (0, 1), (1, 0), (1, 1)], + [(0, 1), (0, 2), (1, 1), (1, 2)], + [(0, 2), (1, 2)], + ], + # third row of image + [ + [(1, 0), (2, 0)], + [(1, 0), (1, 1), (2, 0), (2, 1)], + [(1, 1), (1, 2), (2, 1), (2, 2)], + [(1, 2), (2, 2)], + ], + # fourth row of image + [[(2, 0)], [(2, 0), (2, 1)], [(2, 1), (2, 2)], [(2, 2)]], + ] + + _check_performance_summary( + pred, gt, threshold, size, stride, stats, "accuracy" + ) -- GitLab