Skip to content
Snippets Groups Projects
Commit e48694a4 authored by André Anjos's avatar André Anjos :speech_balloon:
Browse files

[engine.significance] Document auxiliar functions

parent f3527300
No related branches found
No related tags found
No related merge requests found
...@@ -219,7 +219,39 @@ def _patch_measures(pred, gt, threshold, size, stride): ...@@ -219,7 +219,39 @@ def _patch_measures(pred, gt, threshold, size, stride):
def _visual_dataset_performance(stem, img, n, avg, std, outdir): def _visual_dataset_performance(stem, img, n, avg, std, outdir):
"""Runs a visual performance assessment for each entry in a given dataset""" """Runs a visual performance assessment for each entry in a given dataset
Parameters
----------
stem : str
The input file stem, for which a figure will be saved in ``outdir``,
in PDF format
img : pytorch.Tensor
A 3D tensor containing the original image that was analyzed
n : numpy.ndarray
A 2D integer array with the same size as `img` that indicates how many
overlapping windows are available for each pixel in the image
avg : numpy.ndarray
A 2D floating-point array with the average performance per pixel
calculated over all overlapping windows for that particular pixel
std : numpy.ndarray
A 2D floating-point array with the unbiased standard-deviation
(``ddof=1``) performance per pixel calculated over all overlapping
windows for that particular pixel
outdir : str
The base directory where to save output PDF images generated by this
procedure. The value of ``stem`` will be suffixed to this output
directory using a standard path join. The output filename will have a
``.pdf`` extension.
"""
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
...@@ -255,10 +287,70 @@ def _visual_dataset_performance(stem, img, n, avg, std, outdir): ...@@ -255,10 +287,70 @@ def _visual_dataset_performance(stem, img, n, avg, std, outdir):
def _patch_performances_for_sample( def _patch_performances_for_sample(
basedir, threshold, size, stride, dataset, k, figure, outdir=None, basedir, threshold, size, stride, dataset, k, figure, outdir,
): ):
""" """
Evaluates patch performances per sample Evaluates patch performances per sample
Parameters
----------
basedir : str
folder where predictions for the dataset images has been previously
stored
threshold : :py:class:`float`
this should be a threshold (floating point) to apply to prediction maps
to decide on positives and negatives.
size : tuple
size (vertical, horizontal) for windows for which we will calculate
partial performances based on the threshold and existing ground-truth
stride : tuple
strides (vertical, horizontal) for windows for which we will calculate
partial performances based on the threshold and existing ground-truth
dataset : :py:class:`dict` of py:class:`torch.utils.data.Dataset`
datasets to iterate on
k : int
the sample number (order inside the dataset, starting from zero), to
calculate patch performances for
figure : str
the performance figure to use for calculating patch micro performances
(e.g. `f1_score` or `jaccard`). Must be available on the produced
performance dataframe.
outdir : :py:class:`str`
path were to save a visual representation of patch performances. If
set to ``None``, then do not save those to disk.
Returns
-------
stem : str
The input file stem, that was just analyzed
data : dict
A dictionary containing the following fields:
* ``df``: a :py:class:`pandas.DataFrame` with the patch performance
figures in raster scan order.
* ``n``: a 2D integer :py:class:`numpy.ndarray` with the same size as
the original image pertaining to the analyzed sample, that indicates
how many overlapping windows are available for each pixel in the
image
* ``avg``: a 2D floating-point :py:class:`numpy.ndarray` with the
average performance per pixel calculated over all overlapping windows
for that particular pixel
* ``std``: a 2D floating-point :py:class:`numpy.ndarray` with the
unbiased standard-deviation (``ddof=1``) performance per pixel
calculated over all overlapping windows for that particular pixel
""" """
sample = dataset[k] sample = dataset[k]
...@@ -357,13 +449,13 @@ def patch_performances( ...@@ -357,13 +449,13 @@ def patch_performances(
""" """
# Collect overall measures
use_predictions_folder = os.path.join(predictions_folder, name) use_predictions_folder = os.path.join(predictions_folder, name)
if not os.path.exists(use_predictions_folder): if not os.path.exists(use_predictions_folder):
use_predictions_folder = predictions_folder use_predictions_folder = predictions_folder
with tqdm(range(len(dataset[name])), desc="patch-perf") as pbar: with tqdm(range(len(dataset[name])), desc="patch-perf") as pbar:
# we avoid the multiprocessing module if nproc==1
# so it is easier to run ipdb
if nproc != 1: if nproc != 1:
if nproc <= 0: if nproc <= 0:
nproc = multiprocessing.cpu_count() nproc = multiprocessing.cpu_count()
...@@ -406,11 +498,71 @@ def patch_performances( ...@@ -406,11 +498,71 @@ def patch_performances(
return dict(data) return dict(data)
def _visual_performances_for_sample( def _visual_performances_for_sample(size, stride, dataset, k, df, figure, outdir):
size, stride, dataset, k, df, figure, outdir=None
):
""" """
Displays patch performances per sample Displays patch performances per sample
This is a simplified version of :py:func:`_patch_performances_for_sample`
in which the patch performances are not recalculated and used as input. It
can be used in case you have the patch performances stored in disk or if
you're evaluating differences between patches of 2 different systems.
Parameters
----------
size : tuple
size (vertical, horizontal) for windows for which we will calculate
partial performances based on the threshold and existing ground-truth
stride : tuple
strides (vertical, horizontal) for windows for which we will calculate
partial performances based on the threshold and existing ground-truth
dataset : :py:class:`dict` of py:class:`torch.utils.data.Dataset`
datasets to iterate on
k : int
the sample number (order inside the dataset, starting from zero), to
calculate patch performances for
df : pandas.DataFrame
the previously calculated dataframe to use for this patch performance
assessment.
figure : str
the performance figure to use for calculating patch micro performances
(e.g. `f1_score` or `jaccard`). Must be available on the produced
performance dataframe.
outdir : :py:class:`str`
path were to save a visual representation of patch performances. If
set to ``None``, then do not save those to disk.
Returns
-------
stem : str
The input file stem, that was just analyzed
data : dict
A dictionary containing the following fields:
* ``df``: a :py:class:`pandas.DataFrame` with the patch performance
figures in raster scan order. Notice this is just a copy of the
input data frame with the same name.
* ``n``: a 2D integer :py:class:`numpy.ndarray` with the same size as
the original image pertaining to the analyzed sample, that indicates
how many overlapping windows are available for each pixel in the
image
* ``avg``: a 2D floating-point :py:class:`numpy.ndarray` with the
average performance per pixel calculated over all overlapping windows
for that particular pixel
* ``std``: a 2D floating-point :py:class:`numpy.ndarray` with the
unbiased standard-deviation (``ddof=1``) performance per pixel
calculated over all overlapping windows for that particular pixel
""" """
sample = dataset[k] sample = dataset[k]
...@@ -428,6 +580,11 @@ def visual_performances( ...@@ -428,6 +580,11 @@ def visual_performances(
""" """
Displays the performances for multiple image patches, for a whole dataset Displays the performances for multiple image patches, for a whole dataset
This is a simplified version of :py:func:`patch_performances` in which the
patch performances are not recalculated and used as input. It can be used
in case you have the patch performances stored in disk or if you're
evaluating differences between patches of 2 different systems.
Parameters Parameters
--------- ---------
...@@ -497,6 +654,8 @@ def visual_performances( ...@@ -497,6 +654,8 @@ def visual_performances(
stems = list(dataset[name].keys()) stems = list(dataset[name].keys())
with tqdm(range(len(dataset[name])), desc="visual-perf") as pbar: with tqdm(range(len(dataset[name])), desc="visual-perf") as pbar:
# we avoid the multiprocessing module if nproc==1
# so it is easier to run ipdb
if nproc != 1: if nproc != 1:
if nproc <= 0: if nproc <= 0:
nproc = multiprocessing.cpu_count() nproc = multiprocessing.cpu_count()
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment