diff --git a/bob/ip/binseg/engine/evaluator.py b/bob/ip/binseg/engine/evaluator.py index af2c0a2d1e2cabe5b1eb6a226d87aebd89dca7cb..d6aff378e64cb5e2edb69bc0cb5150acf0f29564 100644 --- a/bob/ip/binseg/engine/evaluator.py +++ b/bob/ip/binseg/engine/evaluator.py @@ -5,6 +5,7 @@ import os +import PIL import numpy import pandas from tqdm import tqdm @@ -22,7 +23,34 @@ import logging logger = logging.getLogger(__name__) -def _sample_metrics(stem, pred, gt): +def _posneg(pred, gt, threshold): + """Calculates true and false positives and negatives""" + + gt = gt.byte() # byte tensor + + # threshold + binary_pred = torch.gt(pred, threshold).byte() + + # equals and not-equals + equals = torch.eq(binary_pred, gt).type(torch.uint8) # tensor + notequals = torch.ne(binary_pred, gt).type(torch.uint8) # tensor + + # true positives + tp_tensor = gt * binary_pred + + # false positives + fp_tensor = torch.eq((binary_pred + tp_tensor), 1) + + # true negatives + tn_tensor = equals - tp_tensor + + # false negatives + fn_tensor = notequals - fp_tensor.type(torch.uint8) + + return tp_tensor, fp_tensor, tn_tensor, fn_tensor + + +def _sample_metrics(pred, gt): """ Calculates metrics on one single sample and saves it to disk @@ -30,9 +58,6 @@ def _sample_metrics(stem, pred, gt): Parameters ---------- - stem : str - original filename without extension and relative to its root-path - pred : torch.Tensor pixel-wise predictions @@ -58,36 +83,17 @@ def _sample_metrics(stem, pred, gt): """ step_size = 0.01 - gts = gt.byte() - data = [] for threshold in numpy.arange(0.0, 1.0, step_size): - # threshold - binary_pred = torch.gt(pred, threshold).byte() - - # equals and not-equals - equals = torch.eq(binary_pred, gts).type(torch.uint8) # tensor - notequals = torch.ne(binary_pred, gts).type(torch.uint8) # tensor + tp_tensor, fp_tensor, tn_tensor, fn_tensor = _posneg(pred, gt, threshold) - # true positives - tp_tensor = gt * binary_pred # tensor - tp_count = torch.sum(tp_tensor).item() # scalar - - # false positives - fp_tensor = torch.eq((binary_pred + tp_tensor), 1) + # calc metrics from scalars + tp_count = torch.sum(tp_tensor).item() fp_count = torch.sum(fp_tensor).item() - - # true negatives - tn_tensor = equals - tp_tensor tn_count = torch.sum(tn_tensor).item() - - # false negatives - fn_tensor = notequals - fp_tensor.type(torch.uint8) fn_count = torch.sum(fn_tensor).item() - - # calc metrics precision, recall, specificity, accuracy, jaccard, f1_score = \ base_metrics(tp_count, fp_count, tn_count, fn_count) @@ -105,7 +111,82 @@ def _sample_metrics(stem, pred, gt): )) -def run(data_loader, predictions_folder, output_folder): +def _sample_analysis( + img, + pred, + gt, + threshold, + tp_color=(0, 255, 0), # (128,128,128) Gray + fp_color=(0, 0, 255), # (70, 240, 240) Cyan + fn_color=(255, 0, 0), # (245, 130, 48) Orange + overlay=True, + ): + """Visualizes true positives, false positives and false negatives + + + Parameters + ---------- + + img : torch.Tensor + original image + + pred : torch.Tensor + pixel-wise predictions + + gt : torch.Tensor + ground-truth (annotations) + + threshold : float + The threshold to be used while analyzing this image's probability map + + tp_color : tuple + RGB value for true positives + + fp_color : tuple + RGB value for false positives + + fn_color : tuple + RGB value for false negatives + + overlay : :py:class:`bool`, Optional + If set to ``True`` (which is the default), then overlay annotations on + top of the image. Otherwise, represent data on a black canvas. + + + Returns + ------- + + figure : PIL.Image.Image + + A PIL image that contains the overlayed analysis of true-positives + (TP), false-positives (FP) and false negatives (FN). + + """ + + tp_tensor, fp_tensor, tn_tensor, fn_tensor = _posneg(pred, gt, threshold) + + # change to PIL representation + tp_pil = VF.to_pil_image(tp_tensor.float()) + tp_pil_colored = PIL.ImageOps.colorize(tp_pil, (0, 0, 0), tp_color) + + fp_pil = VF.to_pil_image(fp_tensor.float()) + fp_pil_colored = PIL.ImageOps.colorize(fp_pil, (0, 0, 0), fp_color) + + fn_pil = VF.to_pil_image(fn_tensor.float()) + fn_pil_colored = PIL.ImageOps.colorize(fn_pil, (0, 0, 0), fn_color) + + tp_pil_colored.paste(fp_pil_colored, mask=fp_pil) + tp_pil_colored.paste(fn_pil_colored, mask=fn_pil) + + if overlay: + img = VF.to_pil_image(img) # PIL Image + tp_pil_colored = PIL.Image.blend(img, tp_pil_colored, 0.4) + + return tp_pil_colored + + +def run(data_loader, predictions_folder, output_folder, overlayed_folder=None, + overlay_threshold=None): """ Runs inference and calculates metrics @@ -123,6 +204,17 @@ def run(data_loader, predictions_folder, output_folder): output_folder : str folder where to store results + overlayed_folder : :py:class:`str`, Optional + if not ``None``, then it should be the name of a folder where to store + overlayed versions of the images and ground-truths + + overlay_threshold : :py:class:`float`, Optional + if ``overlayed_folder``, then this should be threshold (floating point) + to apply to prediction maps to decide on positives and negatives for + overlaying analysis (graphical output). This number should come from + the training set or a separate validation set. Using a test set value + may bias your analysis. + """ logger.info("Start evaluation") @@ -146,7 +238,18 @@ def run(data_loader, predictions_folder, output_folder): if stem in data: raise RuntimeError(f"{stem} entry already exists in data. " f"Cannot overwrite.") - data[stem] = _sample_metrics(stem, pred, gt) + data[stem] = _sample_metrics(pred, gt) + + if overlayed_folder is not None: + overlay_image = _sample_analysis(image, pred, gt, + threshold=overlay_threshold, overlay=True) + fullpath = os.path.join(overlayed_folder, f"{stem}.png") + tqdm.write(f"Saving {fullpath}...") + fulldir = os.path.dirname(fullpath) + if not os.path.exists(fulldir): + tqdm.write(f"Creating directory {fulldir}...") + os.makedirs(fulldir, exist_ok=True) + overlay_image.save(fullpath) # Merges all dataframes together df_metrics = pandas.concat(data.values()) diff --git a/bob/ip/binseg/engine/ssltrainer.py b/bob/ip/binseg/engine/ssltrainer.py index 4d1d1c2c2d778b01b7ffc7c31c8b2b7b1cbeb7cc..7bd4aba027cd70fd62286abfa154445516a3a48b 100644 --- a/bob/ip/binseg/engine/ssltrainer.py +++ b/bob/ip/binseg/engine/ssltrainer.py @@ -338,7 +338,7 @@ def run( # plots a version of the CSV trainlog into a PDF logdf = pd.read_csv(logfile_name, header=0, names=logfile_fields) - fig = loss_curve(logdf, title="Loss Evolution") + fig = loss_curve(logdf) figurefile_name = os.path.join(output_folder, "trainlog.pdf") logger.info(f"Saving {figurefile_name}") fig.savefig(figurefile_name) diff --git a/bob/ip/binseg/engine/trainer.py b/bob/ip/binseg/engine/trainer.py index da49327b605f9f3362ccc98724dc81dfc129a935..040e5651514a1b12d89f3a3c00b2f8b9b5eb90d8 100644 --- a/bob/ip/binseg/engine/trainer.py +++ b/bob/ip/binseg/engine/trainer.py @@ -180,7 +180,7 @@ def run( # plots a version of the CSV trainlog into a PDF logdf = pandas.read_csv(logfile_name, header=0, names=logfile_fields) - fig = loss_curve(logdf, title="Loss Evolution") + fig = loss_curve(logdf) figurefile_name = os.path.join(output_folder, "trainlog.pdf") logger.info(f"Saving {figurefile_name}") fig.savefig(figurefile_name) diff --git a/bob/ip/binseg/script/binseg.py b/bob/ip/binseg/script/binseg.py index 65a9b1664dd9dce2a70b215e4414c44cb1040844..0a301ddf33cd3f950d41d3dd705a805c7d573880 100644 --- a/bob/ip/binseg/script/binseg.py +++ b/bob/ip/binseg/script/binseg.py @@ -3,171 +3,12 @@ """The main entry for bob ip binseg (click-based) scripts.""" - -import os import pkg_resources - import click from click_plugins import with_plugins - -import logging -import torch - -from bob.extension.scripts.click_helper import ( - verbosity_option, - ConfigCommand, - ResourceOption, - AliasedGroup, -) - -from bob.ip.binseg.utils.checkpointer import DetectronCheckpointer -from torch.utils.data import DataLoader -from bob.ip.binseg.utils.plot import plot_overview -from bob.ip.binseg.utils.click import OptionEatAll -from bob.ip.binseg.utils.rsttable import create_overview_grid -from bob.ip.binseg.utils.plot import metricsviz -from bob.ip.binseg.utils.transformfolder import transformfolder as transfld - -logger = logging.getLogger(__name__) - +from bob.extension.scripts.click_helper import AliasedGroup @with_plugins(pkg_resources.iter_entry_points("bob.ip.binseg.cli")) @click.group(cls=AliasedGroup) def binseg(): """Binary 2D Image Segmentation Benchmark commands.""" - - -# Plot comparison -@binseg.command(entry_point_group="bob.ip.binseg.config", cls=ConfigCommand) -@click.option( - "--output-path-list", - "-l", - required=True, - help="Pass all output paths as arguments", - cls=OptionEatAll, -) -@click.option( - "--output-path", "-o", required=True, -) -@click.option( - "--title", "-t", required=False, -) -@verbosity_option(cls=ResourceOption) -def compare(output_path_list, output_path, title, **kwargs): - """ Compares multiple metrics files that are stored in the format mymodel/results/Metrics.csv """ - logger.debug("Output paths: {}".format(output_path_list)) - logger.info("Plotting precision vs recall curves for {}".format(output_path_list)) - fig = plot_overview(output_path_list, title) - if not os.path.exists(output_path): - os.makedirs(output_path) - fig_filename = os.path.join(output_path, "precision_recall_comparison.pdf") - logger.info("saving {}".format(fig_filename)) - fig.savefig(fig_filename) - - -# Create grid table with results -@binseg.command(entry_point_group="bob.ip.binseg.config", cls=ConfigCommand) -@click.option( - "--output-path", "-o", required=True, -) -@verbosity_option(cls=ResourceOption) -def gridtable(output_path, **kwargs): - """ Creates an overview table in grid rst format for all Metrics.csv in the output_path - tree structure: - ├── DATABASE - ├── MODEL - ├── images - └── results - """ - logger.info("Creating grid for all results in {}".format(output_path)) - create_overview_grid(output_path) - - -# Create metrics viz -@binseg.command(entry_point_group="bob.ip.binseg.config", cls=ConfigCommand) -@click.option("--dataset", "-d", required=True, cls=ResourceOption) -@click.option( - "--output-path", "-o", required=True, -) -@verbosity_option(cls=ResourceOption) -def visualize(dataset, output_path, **kwargs): - """ Creates the following visualizations of the probabilties output maps: - overlayed: test images overlayed with prediction probabilities vessel tree - tpfnfpviz: highlights true positives, false negatives and false positives - - Required tree structure: - ├── DATABASE - ├── MODEL - ├── images - └── results - """ - logger.info("Creating TP, FP, FN visualizations for {}".format(output_path)) - metricsviz(dataset=dataset, output_path=output_path) - -# Apply image transforms to a folder containing images -@binseg.command(entry_point_group="bob.ip.binseg.config", cls=ConfigCommand) -@click.option("--source-path", "-s", required=True, cls=ResourceOption) -@click.option("--target-path", "-t", required=True, cls=ResourceOption) -@click.option("--transforms", "-a", required=True, cls=ResourceOption) -@verbosity_option(cls=ResourceOption) -def transformfolder(source_path, target_path, transforms, **kwargs): - logger.info( - "Applying transforms to images in {} and saving them to {}".format( - source_path, target_path - ) - ) - transfld(source_path, target_path, transforms) - - -# Evaluate only. Runs evaluation on predicted probability maps (--prediction-folder) -@binseg.command(entry_point_group="bob.ip.binseg.config", cls=ConfigCommand) -@click.option( - "--output-path", "-o", required=True, default="output", cls=ResourceOption -) -@click.option( - "--prediction-folder", - "-p", - help="Path containing output probability maps", - required=True, - cls=ResourceOption, -) -@click.option( - "--prediction-extension", - "-x", - help='Extension (e.g. ".png") for the prediction files', - default=".png", - required=False, - cls=ResourceOption, -) -@click.option("--dataset", "-d", required=True, cls=ResourceOption) -@click.option("--title", required=False, cls=ResourceOption) -@click.option("--legend", cls=ResourceOption) -@verbosity_option(cls=ResourceOption) -def evalpred( - output_path, - prediction_folder, - prediction_extension, - dataset, - title, - legend, - **kwargs -): - """ Run inference and evalaute the model performance """ - - # PyTorch dataloader - data_loader = DataLoader( - dataset=dataset, - batch_size=1, - shuffle=False, - pin_memory=torch.cuda.is_available(), - ) - - # Run eval - do_eval( - prediction_folder, - data_loader, - output_folder=output_path, - title=title, - legend=legend, - prediction_extension=prediction_extension, - ) diff --git a/bob/ip/binseg/script/compare.py b/bob/ip/binseg/script/compare.py new file mode 100644 index 0000000000000000000000000000000000000000..3b2b0ae998e8dbf05015dda950975e78a042b176 --- /dev/null +++ b/bob/ip/binseg/script/compare.py @@ -0,0 +1,51 @@ +#!/usr/bin/env python +# coding=utf-8 + +import click + +from bob.extension.scripts.click_helper import ( + verbosity_option, + AliasedGroup, +) + +from ..utils.plot import combined_precision_recall_f1iso_confintval + +import logging +logger = logging.getLogger(__name__) + + +@click.command( + epilog="""Examples: + +\b + 1. Compares system A and B, with their own pre-computed metric files: +\b + $ bob binseg compare -vv A path/to/A/metrics.csv B path/to/B/metrics.csv +""", +) +@click.argument( + 'label_path', + nargs=-1, + ) +@click.option( + "--output", + "-o", + help="Path where write the output figure (PDF format)", + show_default=True, + required=True, + default="comparison.pdf", + type=click.Path(), +) +@verbosity_option() +def compare(label_path, output, **kwargs): + """Compares multiple systems together""" + + # hack to get a dictionary from arguments passed to input + if len(label_path) % 2 != 0: + raise click.ClickException("Input label-paths should be doubles" + " composed of name-path entries") + data = dict(zip(label_path[::2], label_path[1::2])) + + fig = combined_precision_recall_f1iso_confintval(data) + logger.info(f"Saving plot at {output}") + fig.savefig(output) diff --git a/bob/ip/binseg/script/evaluate.py b/bob/ip/binseg/script/evaluate.py index 11ca9719aa98cb122f142e7ff73ca0d317b84abe..1576f3875ebe5a1ad328293d234ebf0bfdec714d 100644 --- a/bob/ip/binseg/script/evaluate.py +++ b/bob/ip/binseg/script/evaluate.py @@ -2,15 +2,12 @@ # coding=utf-8 import click -from click_plugins import with_plugins - from torch.utils.data import DataLoader from bob.extension.scripts.click_helper import ( verbosity_option, ConfigCommand, ResourceOption, - AliasedGroup, ) from ..engine.evaluator import run @@ -47,6 +44,7 @@ logger = logging.getLogger(__name__) help="Path where to store the analysis result (created if does not exist)", required=True, default="results", + type=click.Path(), cls=ResourceOption, ) @click.option( @@ -54,6 +52,7 @@ logger = logging.getLogger(__name__) "-p", help="Path where predictions are currently stored", required=True, + type=click.Path(exists=True, file_okay=False, dir_okay=True), cls=ResourceOption, ) @click.option( @@ -65,7 +64,7 @@ logger = logging.getLogger(__name__) ) @click.option( "--overlayed", - "-A", + "-O", help="Creates overlayed representations of the output probability maps, " "similar to --overlayed in prediction-mode, except it includes " "distinctive colours for true and false positives and false negatives. " @@ -77,10 +76,27 @@ logger = logging.getLogger(__name__) required=False, cls=ResourceOption, ) +@click.option( + "--overlay-threshold", + "-T", + help="If you set --overlayed, then you can provide a value to be used as " + "threshold to be applied on probability maps and decide for positives and " + "negatives. This binary output will be used to define true and false " + "positives, and false negatives for the overlay analysis. This number " + "should either come from the training set or a separate validation set " + "to avoid biasing the analysis", + default=0.5, + type=click.FloatRange(min=0.0, max=1.0), + show_default=True, + required=False, + cls=ResourceOption, +) @verbosity_option(cls=ResourceOption) -def evaluate(output_folder, predictions_folder, dataset, overlayed, **kwargs): +def evaluate(output_folder, predictions_folder, dataset, overlayed, + overlay_threshold, **kwargs): """Evaluates an FCN on a binary segmentation task. """ data_loader = DataLoader(dataset=dataset, batch_size=1, shuffle=False, pin_memory=False) - run(dataset, predictions_folder, output_folder) + run(dataset, predictions_folder, output_folder, overlayed, + overlay_threshold) diff --git a/bob/ip/binseg/script/predict.py b/bob/ip/binseg/script/predict.py index 2c7929ec2127f76f3afaeaae34287fcfe8b6e862..d4d8bd75677acd94517f52f6b63a0b375e49e809 100644 --- a/bob/ip/binseg/script/predict.py +++ b/bob/ip/binseg/script/predict.py @@ -4,8 +4,6 @@ import os import click -from click_plugins import with_plugins - import torch from torch.utils.data import DataLoader @@ -13,7 +11,6 @@ from bob.extension.scripts.click_helper import ( verbosity_option, ConfigCommand, ResourceOption, - AliasedGroup, ) from ..engine.predictor import run @@ -52,6 +49,7 @@ logger = logging.getLogger(__name__) required=True, default="results", cls=ResourceOption, + type=click.Path(), ) @click.option( "--model", @@ -74,6 +72,7 @@ logger = logging.getLogger(__name__) required=True, show_default=True, default=1, + type=click.IntRange(min=1), cls=ResourceOption, ) @click.option( diff --git a/bob/ip/binseg/script/train.py b/bob/ip/binseg/script/train.py index 3302e9ea59539ef6cf33fbbc6d34ee3f0e46cab2..7eb4bb992074e9005c32519b6661daa0f0b837b5 100644 --- a/bob/ip/binseg/script/train.py +++ b/bob/ip/binseg/script/train.py @@ -4,8 +4,6 @@ import os import click -from click_plugins import with_plugins - import torch from torch.utils.data import DataLoader @@ -13,7 +11,6 @@ from bob.extension.scripts.click_helper import ( verbosity_option, ConfigCommand, ResourceOption, - AliasedGroup, ) from ..utils.checkpointer import DetectronCheckpointer @@ -52,6 +49,7 @@ logger = logging.getLogger(__name__) "-o", help="Path where to store the generated model (created if does not exist)", required=True, + type=click.Path(), default="results", cls=ResourceOption, ) @@ -115,6 +113,7 @@ logger = logging.getLogger(__name__) required=True, show_default=True, default=2, + type=click.IntRange(min=1), cls=ResourceOption, ) @click.option( @@ -136,6 +135,7 @@ logger = logging.getLogger(__name__) show_default=True, required=True, default=1000, + type=click.IntRange(min=1), cls=ResourceOption, ) @click.option( @@ -149,6 +149,7 @@ logger = logging.getLogger(__name__) show_default=True, required=True, default=0, + type=click.IntRange(min=0), cls=ResourceOption, ) @click.option( @@ -167,6 +168,7 @@ logger = logging.getLogger(__name__) show_default=True, required=False, default=42, + type=click.IntRange(min=0), cls=ResourceOption, ) @click.option( @@ -184,6 +186,7 @@ logger = logging.getLogger(__name__) show_default=True, required=True, default=900, + type=click.IntRange(min=0), cls=ResourceOption, ) @verbosity_option(cls=ResourceOption) diff --git a/bob/ip/binseg/test/test_batchmetrics.py b/bob/ip/binseg/test/test_batchmetrics.py index 76b1313d4ed819cecfc8b2dd7e0f5125fd9723d9..172e66f2e92c9c130d0f5205d7cacaf53fefe7e3 100644 --- a/bob/ip/binseg/test/test_batchmetrics.py +++ b/bob/ip/binseg/test/test_batchmetrics.py @@ -31,9 +31,8 @@ class Tester(unittest.TestCase): def test_batch_metrics(self): dfs = [] - for stem, pred, gt in zip(self.names, self.predictions, - self.ground_truths): - dfs.append(_sample_metrics(stem, pred, gt)) + for pred, gt in zip(self.predictions, self.ground_truths): + dfs.append(_sample_metrics(pred, gt)) bm = pandas.concat(dfs) self.assertEqual(len(bm), 2 * 100) diff --git a/bob/ip/binseg/utils/FreeMono.ttf b/bob/ip/binseg/utils/FreeMono.ttf deleted file mode 100644 index 7485f9e4c84d5a372c81e11df2cd9f5e2eb2064a..0000000000000000000000000000000000000000 Binary files a/bob/ip/binseg/utils/FreeMono.ttf and /dev/null differ diff --git a/bob/ip/binseg/utils/click.py b/bob/ip/binseg/utils/click.py deleted file mode 100644 index 792cebfd34f50d3fbe2a08e6d829fb17aaba91e3..0000000000000000000000000000000000000000 --- a/bob/ip/binseg/utils/click.py +++ /dev/null @@ -1,52 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -import click - - -class OptionEatAll(click.Option): - """ - Allows for ``*args`` and ``**kwargs`` to be passed to click - - https://stackoverflow.com/questions/48391777/nargs-equivalent-for-options-in-click - """ - - def __init__(self, *args, **kwargs): - self.save_other_options = kwargs.pop("save_other_options", True) - nargs = kwargs.pop("nargs", -1) - assert nargs == -1, "nargs, if set, must be -1 not {}".format(nargs) - super(OptionEatAll, self).__init__(*args, **kwargs) - self._previous_parser_process = None - self._eat_all_parser = None - - def add_to_parser(self, parser, ctx): - def parser_process(value, state): - # method to hook to the parser.process - done = False - value = [value] - if self.save_other_options: - # grab everything up to the next option - while state.rargs and not done: - for prefix in self._eat_all_parser.prefixes: - if state.rargs[0].startswith(prefix): - done = True - if not done: - value.append(state.rargs.pop(0)) - else: - # grab everything remaining - value += state.rargs - state.rargs[:] = [] - value = tuple(value) - - # call the actual process - self._previous_parser_process(value, state) - - retval = super(OptionEatAll, self).add_to_parser(parser, ctx) - for name in self.opts: - our_parser = parser._long_opt.get(name) or parser._short_opt.get(name) - if our_parser: - self._eat_all_parser = our_parser - self._previous_parser_process = our_parser.process - our_parser.process = parser_process - break - return retval diff --git a/bob/ip/binseg/utils/plot.py b/bob/ip/binseg/utils/plot.py index ecfbe92bbcb9b438f2d9a7cb8d06fa777f7a6584..29a0d28f1eef949a103c757530a4605078546d74 100644 --- a/bob/ip/binseg/utils/plot.py +++ b/bob/ip/binseg/utils/plot.py @@ -1,54 +1,51 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -import os -import csv +from itertools import cycle -import numpy as np -import pandas as pd -import PIL - -import torchvision.transforms.functional as VF -import torch +import numpy +import pandas import matplotlib matplotlib.use("agg") +import matplotlib.pyplot as plt -def precision_recall_f1iso(precision, recall, names, title=None): - """ - Author: Andre Anjos (andre.anjos@idiap.ch). +import logging +logger = logging.getLogger(__name__) + + +def precision_recall_f1iso(precision, recall, names): + """Creates a precision-recall plot of the given data. - Creates a precision-recall plot of the given data. The plot will be annotated with F1-score iso-lines (in which the F1-score maintains the same value) Parameters ---------- + precision : :py:class:`numpy.ndarray` or :py:class:`list` - A list of 1D np arrays containing the Y coordinates of the plot, or - the precision, or a 2D np array in which the rows correspond to each - of the system's precision coordinates. + A list of 1D arrays containing the Y coordinates of the plot, or the + precision, or a 2D np array in which the rows correspond to each of the + system's precision coordinates. + recall : :py:class:`numpy.ndarray` or :py:class:`list` - A list of 1D np arrays containing the X coordinates of the plot, or - the recall, or a 2D np array in which the rows correspond to each - of the system's recall coordinates. + A list of 1D arrays containing the X coordinates of the plot, or the + recall, or a 2D np array in which the rows correspond to each of the + system's recall coordinates. + names : :py:class:`list` An iterable over the names of each of the systems along the rows of ``precision`` and ``recall`` - title : :py:class:`str`, optional - A title for the plot. If not set, omits the title + Returns ------- - matplotlib.figure.Figure + + figure : matplotlib.figure.Figure A matplotlib figure you can save or display - """ - import matplotlib - matplotlib.use("agg") - import matplotlib.pyplot as plt - from itertools import cycle + """ fig, ax1 = plt.subplots(1) lines = ["-", "--", "-.", ":"] @@ -72,7 +69,9 @@ def precision_recall_f1iso(precision, recall, names, title=None): next(linecycler), label="[F={:.4f}] {}".format(f1.max(), n), ) - ax1.plot(ori, opi, marker="o", linestyle=None, markersize=3, color="black") + ax1.plot( + ori, opi, marker="o", linestyle=None, markersize=3, color="black" + ) ax1.grid(linestyle="--", linewidth=1, color="gray", alpha=0.2) if len(names) > 1: plt.legend(loc="lower left", framealpha=0.5) @@ -80,15 +79,13 @@ def precision_recall_f1iso(precision, recall, names, title=None): ax1.set_ylabel("Precision") ax1.set_xlim([0.0, 1.0]) ax1.set_ylim([0.0, 1.0]) - if title is not None: - ax1.set_title(title) # Annotates plot with F1-score iso-lines ax2 = ax1.twinx() - f_scores = np.linspace(0.1, 0.9, num=9) + f_scores = numpy.linspace(0.1, 0.9, num=9) tick_locs = [] tick_labels = [] for f_score in f_scores: - x = np.linspace(0.01, 1) + x = numpy.linspace(0.01, 1) y = f_score * x / (2 * x - f_score) (l,) = plt.plot(x[y >= 0], y[y >= 0], color="green", alpha=0.1) tick_locs.append(y[-1]) @@ -117,42 +114,62 @@ def precision_recall_f1iso(precision, recall, names, title=None): def precision_recall_f1iso_confintval( - precision, recall, pr_upper, pr_lower, re_upper, re_lower, names, title=None + precision, recall, pr_upper, pr_lower, re_upper, re_lower, names ): - """ - Creates a precision-recall plot of the given data. + """Creates a precision-recall plot of the given data, with confidence + intervals + The plot will be annotated with F1-score iso-lines (in which the F1-score maintains the same value) Parameters ---------- + precision : :py:class:`numpy.ndarray` or :py:class:`list` - A list of 1D np arrays containing the Y coordinates of the plot, or - the precision, or a 2D np array in which the rows correspond to each + A list of 1D arrays containing the Y coordinates of the plot, or the + precision, or a 2D array in which the rows correspond to each of the system's precision coordinates. recall : :py:class:`numpy.ndarray` or :py:class:`list` - A list of 1D np arrays containing the X coordinates of the plot, or - the recall, or a 2D np array in which the rows correspond to each + A list of 1D arrays containing the X coordinates of the plot, or + the recall, or a 2D array in which the rows correspond to each of the system's recall coordinates. + pr_upper : :py:class:`numpy.ndarray` or :py:class:`list` + A list of 1D arrays containing the upper bound of the confidence + interval for the Y coordinates of the plot, or the precision upper + bound, or a 2D array in which the rows correspond to each of the + system's precision upper-bound coordinates. + + pr_lower : :py:class:`numpy.ndarray` or :py:class:`list` + A list of 1D arrays containing the lower bound of the confidence + interval for the Y coordinates of the plot, or the precision lower + bound, or a 2D array in which the rows correspond to each of the + system's precision lower-bound coordinates. + + re_upper : :py:class:`numpy.ndarray` or :py:class:`list` + A list of 1D arrays containing the upper bound of the confidence + interval for the Y coordinates of the plot, or the recall upper bound, + or a 2D array in which the rows correspond to each of the system's + recall upper-bound coordinates. + + re_lower : :py:class:`numpy.ndarray` or :py:class:`list` + A list of 1D arrays containing the lower bound of the confidence + interval for the Y coordinates of the plot, or the recall lower bound, + or a 2D array in which the rows correspond to each of the system's + recall lower-bound coordinates. + names : :py:class:`list` An iterable over the names of each of the systems along the rows of ``precision`` and ``recall`` - title : :py:class:`str`, optional - A title for the plot. If not set, omits the title Returns ------- - matplotlib.figure.Figure + figure : matplotlib.figure.Figure A matplotlib figure you can save or display - """ - import matplotlib - matplotlib.use("agg") - import matplotlib.pyplot as plt - from itertools import cycle + """ fig, ax1 = plt.subplots(1) lines = ["-", "--", "-.", ":"] @@ -195,20 +212,22 @@ def precision_recall_f1iso_confintval( next(linecycler), label="[F={:.4f}] {}".format(f1.max(), n), ) - ax1.plot(ori, opi, marker="o", linestyle=None, markersize=3, color="black") + ax1.plot( + ori, opi, marker="o", linestyle=None, markersize=3, color="black" + ) # Plot confidence # Upper bound # ax1.plot(r95ui[p95ui>0], p95ui[p95ui>0]) # Lower bound # ax1.plot(r95li[p95li>0], p95li[p95li>0]) # create the limiting polygon - vert_x = np.concatenate((rui[pui > 0], rli[pli > 0][::-1])) - vert_y = np.concatenate((pui[pui > 0], pli[pli > 0][::-1])) + vert_x = numpy.concatenate((rui[pui > 0], rli[pli > 0][::-1])) + vert_y = numpy.concatenate((pui[pui > 0], pli[pli > 0][::-1])) # hacky workaround to plot 2nd human - if np.isclose(np.mean(rui), rui[1], rtol=1e-05): + if numpy.isclose(numpy.mean(rui), rui[1], rtol=1e-05): print("found human") p = plt.Polygon( - np.column_stack((vert_x, vert_y)), + numpy.column_stack((vert_x, vert_y)), facecolor="none", alpha=0.2, edgecolor=next(colorcycler), @@ -216,7 +235,7 @@ def precision_recall_f1iso_confintval( ) else: p = plt.Polygon( - np.column_stack((vert_x, vert_y)), + numpy.column_stack((vert_x, vert_y)), facecolor=next(colorcycler), alpha=0.2, edgecolor="none", @@ -231,15 +250,13 @@ def precision_recall_f1iso_confintval( ax1.set_ylabel("Precision") ax1.set_xlim([0.0, 1.0]) ax1.set_ylim([0.0, 1.0]) - if title is not None: - ax1.set_title(title) # Annotates plot with F1-score iso-lines ax2 = ax1.twinx() - f_scores = np.linspace(0.1, 0.9, num=9) + f_scores = numpy.linspace(0.1, 0.9, num=9) tick_locs = [] tick_labels = [] for f_score in f_scores: - x = np.linspace(0.01, 1) + x = numpy.linspace(0.01, 1) y = f_score * x / (2 * x - f_score) (l,) = plt.plot(x[y >= 0], y[y >= 0], color="green", alpha=0.1) tick_locs.append(y[-1]) @@ -267,7 +284,7 @@ def precision_recall_f1iso_confintval( return fig -def loss_curve(df, title=None): +def loss_curve(df): """Creates a loss curve in a Matplotlib figure. Parameters @@ -277,9 +294,6 @@ def loss_curve(df, title=None): A dataframe containing, at least, "epoch", "median-loss" and "learning-rate" columns, that will be plotted. - title : :py:class:`str`, Optional - Optional title, that will be set on the figure if passed - Returns ------- @@ -287,10 +301,8 @@ def loss_curve(df, title=None): A figure, that may be saved or displayed """ - import matplotlib.pyplot as plt ax1 = df.plot(x="epoch", y="median-loss", grid=True) - if title is not None: ax1.set_title(title) ax1.set_ylabel("Median Loss") ax1.grid(linestyle="--", linewidth=1, color="gray", alpha=0.2) ax2 = df["learning-rate"].plot(secondary_y=True, legend=True, grid=True,) @@ -301,61 +313,25 @@ def loss_curve(df, title=None): return fig -def read_metricscsv(file): - """ - Read precision and recall from csv file +def combined_precision_recall_f1iso_confintval(data): + """Plots comparison chart of all evaluated models Parameters ---------- - file : str - path to file + + data : dict + A dict in which keys are the names of the systems and the values are + paths to ``metrics.csv`` style files. + Returns ------- - :py:class:`numpy.ndarray` - :py:class:`numpy.ndarray` - """ - with open(file, "r") as infile: - metricsreader = csv.reader(infile) - # skip header row - next(metricsreader) - precision = [] - recall = [] - pr_upper = [] - pr_lower = [] - re_upper = [] - re_lower = [] - for row in metricsreader: - precision.append(float(row[1])) - recall.append(float(row[2])) - pr_upper.append(float(row[8])) - pr_lower.append(float(row[9])) - re_upper.append(float(row[11])) - re_lower.append(float(row[12])) - return ( - np.array(precision), - np.array(recall), - np.array(pr_upper), - np.array(pr_lower), - np.array(re_upper), - np.array(re_lower), - ) + figure : matplotlib.figure.Figure + A figure, with all systems combined into a single plot. -def plot_overview(outputfolders, title): """ - Plots comparison chart of all trained models - Parameters - ---------- - outputfolder : list - list containing output paths of all evaluated models (e.g. ``['DRIVE/model1', 'DRIVE/model2']``) - title : str - title of plot - Returns - ------- - matplotlib.figure.Figure - """ precisions = [] recalls = [] pr_ups = [] @@ -363,103 +339,20 @@ def plot_overview(outputfolders, title): re_ups = [] re_lows = [] names = [] - for folder in outputfolders: - # metrics - metrics_path = os.path.join(folder, "results/Metrics.csv") - pr, re, pr_upper, pr_lower, re_upper, re_lower = read_metricscsv(metrics_path) - precisions.append(pr) - recalls.append(re) - pr_ups.append(pr_upper) - pr_lows.append(pr_lower) - re_ups.append(re_upper) - re_lows.append(re_lower) - modelname = folder.split("/")[-1] - name = "{} ".format(modelname) + + for name, metrics_path in data.items(): + logger.info(f"Loading metrics from {metrics_path}...") + df = pandas.read_csv(metrics_path) + precisions.append(df.precision.to_numpy()) + recalls.append(df.recall.to_numpy()) + pr_ups.append(df.pr_upper.to_numpy()) + pr_lows.append(df.pr_lower.to_numpy()) + re_ups.append(df.re_upper.to_numpy()) + re_lows.append(df.re_lower.to_numpy()) names.append(name) - # title = folder.split('/')[-4] + fig = precision_recall_f1iso_confintval( - precisions, recalls, pr_ups, pr_lows, re_ups, re_lows, names, title + precisions, recalls, pr_ups, pr_lows, re_ups, re_lows, names ) - return fig - - -def metricsviz( - dataset, - output_path, - tp_color=(0, 255, 0), # (128,128,128) Gray - fp_color=(0, 0, 255), # (70, 240, 240) Cyan - fn_color=(255, 0, 0), # (245, 130, 48) Orange - overlayed=True, -): - """ Visualizes true positives, false positives and false negatives - Default colors TP: Gray, FP: Cyan, FN: Orange - Parameters - ---------- - dataset : :py:class:`torch.utils.data.Dataset` - output_path : str - path where results and probability output images are stored. E.g. ``'DRIVE/MODEL'`` - tp_color : tuple - RGB values, by default (128,128,128) - fp_color : tuple - RGB values, by default (70, 240, 240) - fn_color : tuple - RGB values, by default (245, 130, 48) - """ - - for sample in dataset: - # get sample - name = sample[0] - img = VF.to_pil_image(sample[1]) # PIL Image - gt = sample[2].byte() # byte tensor - - # read metrics - metrics = pd.read_csv(os.path.join(output_path, "results", "Metrics.csv")) - optimal_threshold = metrics["threshold"][metrics["f1_score"].idxmax()] - - # read probability output - pred = Image.open(os.path.join(output_path, "images", name)) - pred = pred.convert(mode="L") - pred = VF.to_tensor(pred) - binary_pred = torch.gt(pred, optimal_threshold).byte() - - # calc metrics - # equals and not-equals - equals = torch.eq(binary_pred, gt) # tensor - notequals = torch.ne(binary_pred, gt) # tensor - # true positives - tp_tensor = gt * binary_pred # tensor - tp_pil = VF.to_pil_image(tp_tensor.float()) - tp_pil_colored = PIL.ImageOps.colorize(tp_pil, (0, 0, 0), tp_color) - # false positives - fp_tensor = torch.eq((binary_pred + tp_tensor), 1) - fp_pil = VF.to_pil_image(fp_tensor.float()) - fp_pil_colored = PIL.ImageOps.colorize(fp_pil, (0, 0, 0), fp_color) - # false negatives - fn_tensor = notequals - fp_tensor - fn_pil = VF.to_pil_image(fn_tensor.float()) - fn_pil_colored = PIL.ImageOps.colorize(fn_pil, (0, 0, 0), fn_color) - - # paste together - tp_pil_colored.paste(fp_pil_colored, mask=fp_pil) - tp_pil_colored.paste(fn_pil_colored, mask=fn_pil) - - if overlayed: - tp_pil_colored = PIL.Image.blend(img, tp_pil_colored, 0.4) - img_metrics = pd.read_csv( - os.path.join(output_path, "results", name + ".csv") - ) - f1 = img_metrics[" f1_score"].max() - # add f1-score - fnt_size = tp_pil_colored.size[1] // 25 - draw = PIL.ImageDraw.Draw(tp_pil_colored) - fnt = PIL.ImageFont.truetype("FreeMono.ttf", fnt_size) - draw.text((0, 0), "F1: {:.4f}".format(f1), (255, 255, 255), font=fnt) - - # save to disk - overlayed_path = os.path.join(output_path, "tpfnfpviz") - fullpath = os.path.join(overlayed_path, name) - fulldir = os.path.dirname(fullpath) - if not os.path.exists(fulldir): - os.makedirs(fulldir) - tp_pil_colored.save(fullpath) + return fig diff --git a/bob/ip/binseg/utils/rsttable.py b/bob/ip/binseg/utils/rsttable.py deleted file mode 100644 index c5329d8aee9ea28fd202fb057076d0f6a11eca24..0000000000000000000000000000000000000000 --- a/bob/ip/binseg/utils/rsttable.py +++ /dev/null @@ -1,56 +0,0 @@ -import pandas as pd -from tabulate import tabulate -import os -from pathlib import Path - - -def get_paths(output_path, filename): - """ - Parameters - ---------- - output_path : str - path in which to look for files - filename : str - - Returns - ------- - list - list of file paths - """ - datadir = Path(output_path) - file_paths = sorted(list(datadir.glob("**/{}".format(filename)))) - file_paths = [f.as_posix() for f in file_paths] - return file_paths - - -def create_overview_grid(output_path): - """ Reads all Metrics.csv in a certain output path and pivots them to a rst grid table""" - filename = "Metrics.csv" - metrics = get_paths(output_path, filename) - f1s = [] - stds = [] - models = [] - databases = [] - for m in metrics: - metrics = pd.read_csv(m) - maxf1 = metrics["f1_score"].max() - idmaxf1 = metrics["f1_score"].idxmax() - std = metrics["std_f1"][idmaxf1] - stds.append(std) - f1s.append(maxf1) - model = m.split("/")[-3] - models.append(model) - database = m.split("/")[-4] - databases.append(database) - df = pd.DataFrame() - df["database"] = databases - df["model"] = models - df["f1"] = f1s - df["std"] = stds - pivot = df.pivot(index="database", columns="model", values="f1") - pivot2 = df.pivot(index="database", columns="model", values="std") - - with open(os.path.join(output_path, "Metrics_overview.rst"), "w+") as outfile: - outfile.write(tabulate(pivot, headers=pivot.columns, tablefmt="grid")) - with open(os.path.join(output_path, "Metrics_overview_std.rst"), "w+") as outfile: - outfile.write(tabulate(pivot2, headers=pivot2.columns, tablefmt="grid")) diff --git a/bob/ip/binseg/utils/summary.py b/bob/ip/binseg/utils/summary.py index 97fc09da9b89404d96e2230ca49c8a0b81db9c8f..493d9d163833c64f26599a69e087381d54a2a62e 100644 --- a/bob/ip/binseg/utils/summary.py +++ b/bob/ip/binseg/utils/summary.py @@ -10,11 +10,11 @@ from torch.nn.modules.module import _addindent def summary(model, file=sys.stderr): """Counts the number of paramters in each layers - + Parameters ---------- model : :py:class:`torch.nn.Module` - + Returns ------- int diff --git a/bob/ip/binseg/utils/transformfolder.py b/bob/ip/binseg/utils/transformfolder.py deleted file mode 100644 index 95c3353947530e6ff33423db6adf3fc222a481b8..0000000000000000000000000000000000000000 --- a/bob/ip/binseg/utils/transformfolder.py +++ /dev/null @@ -1,28 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -from pathlib import Path, PurePosixPath -from PIL import Image -from torchvision.transforms.functional import to_pil_image - - -def transformfolder(source_path, target_path, transforms): - """Applies a set of transfroms on an image folder - - Parameters - ---------- - source_path : str - [description] - target_path : str - [description] - transforms : [type] - transform function - """ - source_path = Path(source_path) - target_path = Path(target_path) - file_paths = sorted(list(source_path.glob("*?.*"))) - for f in file_paths: - timg_path = PurePosixPath(target_path).joinpath(f.name) - img = Image.open(f).convert(mode="1", dither=None) - img, _ = transforms(img, img) - img = to_pil_image(img) - img.save(str(timg_path)) diff --git a/doc/cli.rst b/doc/cli.rst index fc3778926a56ee2db2845aa80d9028273c6c2d08..03f7c2dc7c692c1aba580d3f05393396f585f4e1 100644 --- a/doc/cli.rst +++ b/doc/cli.rst @@ -95,4 +95,15 @@ a series of analysis figures which are useful to understand model performance. .. command-output:: bob binseg evaluate --help +.. _bob.ip.binseg.cli.compare: + +Performance Comparison +---------------------- + +Performance comparison takes the performance evaluation results and generate +combined figures and tables that compare results of multiple systems. + +.. command-output:: bob binseg compare --help + + .. include:: links.rst diff --git a/doc/evaluation.rst b/doc/evaluation.rst index 95ab253844b6f21c13449eb0b1fb98054947ac25..48f7a91efbbec8da0bec6723ddbadf47a4d4d358 100644 --- a/doc/evaluation.rst +++ b/doc/evaluation.rst @@ -84,4 +84,15 @@ E.g. run inference on predictions from the DRIVE test set, do the following: bob binseg evaluate -vv drive-test -p /predictions/folder -o /eval/results/folder +Comparing Systems +================= + +To compare multiple systems together and generate combined plots and tables, +use ``bob binseg compare``. Use ``--help`` for a quick guide. + +.. code-block:: bash + + $ bob binseg compare -vv A A/metrics.csv B B/metrics.csv + + .. include:: links.rst diff --git a/doc/usage.rst b/doc/usage.rst index 24c3fa1ada6058ffbb5668de475d5b3f939c55fc..d9c1ef87c00264666f78e71c3294ecd6e486345d 100644 --- a/doc/usage.rst +++ b/doc/usage.rst @@ -49,8 +49,6 @@ modifying one of our configuration resources. training models evaluation - plotting - visualization .. include:: links.rst diff --git a/doc/visualization.rst b/doc/visualization.rst deleted file mode 100644 index 56728e9562003c676dab0a5d51ca4640b12df1e8..0000000000000000000000000000000000000000 --- a/doc/visualization.rst +++ /dev/null @@ -1,30 +0,0 @@ -.. -*- coding: utf-8 -*- -.. _bob.ip.binseg.visualization: - -============= -Visualization -============= - -Two visualization are generated via the ``bob binseg visualize`` command: - -1. Visualizations of true positives, false positives and false negatives -overlayed over the test images -2. Visualizations of the probability map outputs overlayed over the test images - -The following directory structure is expected: - -.. code-block:: bash - - ├── DATABASE - ├── MODEL - ├── images - └── results - -Example to generate visualization for outputs for the DRIVE dataset: - -.. code-block:: bash - - # Visualizations are stored in the same output folder. - bob binseg visualize DRIVETEST -o /DRIVE/M2UNet/output - -Use ``bob binseg visualize --help`` for more information. diff --git a/setup.py b/setup.py index 452c77c099d133454e8376cbc23094ddc425bff4..5a234f7c70c84706a1ee531d23954f799754db46 100644 --- a/setup.py +++ b/setup.py @@ -31,14 +31,11 @@ setup( "bob.cli": ["binseg = bob.ip.binseg.script.binseg:binseg"], # bob binseg sub-commands "bob.ip.binseg.cli": [ - "compare = bob.bin.binseg.script.binseg:compare", - "evalpred = bob.ip.binseg.script.binseg:evalpred", - "gridtable = bob.ip.binseg.script.binseg:testcheckpoints", - "visualize = bob.ip.binseg.script.binseg:visualize", "config = bob.ip.binseg.script.config:config", "train = bob.ip.binseg.script.train:train", "predict = bob.ip.binseg.script.predict:predict", "evaluate = bob.ip.binseg.script.evaluate:evaluate", + "compare = bob.ip.binseg.script.compare:compare", ], # bob train configurations "bob.ip.binseg.config": [