diff --git a/bob/ip/binseg/engine/evaluator.py b/bob/ip/binseg/engine/evaluator.py new file mode 100644 index 0000000000000000000000000000000000000000..af2c0a2d1e2cabe5b1eb6a226d87aebd89dca7cb --- /dev/null +++ b/bob/ip/binseg/engine/evaluator.py @@ -0,0 +1,197 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +"""Defines functionality for the evaluation of predictions""" + +import os + +import numpy +import pandas +from tqdm import tqdm + +import torch +import torchvision.transforms.functional as VF + +import bob.io.base + +from ..utils.metric import base_metrics +from ..utils.plot import precision_recall_f1iso_confintval +from ..utils.summary import summary + +import logging +logger = logging.getLogger(__name__) + + +def _sample_metrics(stem, pred, gt): + """ + Calculates metrics on one single sample and saves it to disk + + + Parameters + ---------- + + stem : str + original filename without extension and relative to its root-path + + pred : torch.Tensor + pixel-wise predictions + + gt : torch.Tensor + ground-truth (annotations) + + + Returns + ------- + + metrics : pandas.DataFrame + + A pandas dataframe with the following columns: + + * threshold: float + * precision: float + * recall: float + * specificity: float + * accuracy: float + * jaccard: float + * f1_score: float + + """ + + step_size = 0.01 + gts = gt.byte() + + data = [] + + for threshold in numpy.arange(0.0, 1.0, step_size): + + # threshold + binary_pred = torch.gt(pred, threshold).byte() + + # equals and not-equals + equals = torch.eq(binary_pred, gts).type(torch.uint8) # tensor + notequals = torch.ne(binary_pred, gts).type(torch.uint8) # tensor + + # true positives + tp_tensor = gt * binary_pred # tensor + tp_count = torch.sum(tp_tensor).item() # scalar + + # false positives + fp_tensor = torch.eq((binary_pred + tp_tensor), 1) + fp_count = torch.sum(fp_tensor).item() + + # true negatives + tn_tensor = equals - tp_tensor + tn_count = torch.sum(tn_tensor).item() + + # false negatives + fn_tensor = notequals - fp_tensor.type(torch.uint8) + fn_count = torch.sum(fn_tensor).item() + + # calc metrics + precision, recall, specificity, accuracy, jaccard, f1_score = \ + base_metrics(tp_count, fp_count, tn_count, fn_count) + + data.append([threshold, precision, recall, specificity, + accuracy, jaccard, f1_score]) + + return pandas.DataFrame(data, columns=( + "threshold", + "precision", + "recall", + "specificity", + "accuracy", + "jaccard", + "f1_score", + )) + + +def run(data_loader, predictions_folder, output_folder): + """ + Runs inference and calculates metrics + + + Parameters + --------- + + data_loader : py:class:`torch.torch.utils.data.DataLoader` + an iterable over the transformed input dataset, containing ground-truth + + predictions_folder : str + folder where predictions for the dataset images has been previously + stored + + output_folder : str + folder where to store results + + """ + + logger.info("Start evaluation") + logger.info(f"Output folder: {output_folder}") + + if not os.path.exists(output_folder): + logger.info(f"Creating {output_folder}...") + os.makedirs(output_folder, exist_ok=True) + + # Collect overall metrics + data = {} + + for sample in tqdm(data_loader): + name = sample[0] + stem = os.path.splitext(name)[0] + image = sample[1].to("cpu") + gt = sample[2].to("cpu") + pred_fullpath = os.path.join(predictions_folder, stem + ".hdf5") + pred = bob.io.base.load(pred_fullpath).astype("float32") + pred = torch.from_numpy(pred) + if stem in data: + raise RuntimeError(f"{stem} entry already exists in data. " + f"Cannot overwrite.") + data[stem] = _sample_metrics(stem, pred, gt) + + # Merges all dataframes together + df_metrics = pandas.concat(data.values()) + + # Report and Averages + avg_metrics = df_metrics.groupby("threshold").mean() + std_metrics = df_metrics.groupby("threshold").std() + + # Uncomment below for F1-score calculation based on average precision and + # metrics instead of F1-scores of individual images. This method is in line + # with Maninis et. al. (2016) + # + # avg_metrics["f1_score"] = \ + # (2* avg_metrics["precision"]*avg_metrics["recall"])/ \ + # (avg_metrics["precision"]+avg_metrics["recall"]) + + avg_metrics["std_pr"] = std_metrics["precision"] + avg_metrics["pr_upper"] = avg_metrics["precision"] + avg_metrics["std_pr"] + avg_metrics["pr_lower"] = avg_metrics["precision"] - avg_metrics["std_pr"] + avg_metrics["std_re"] = std_metrics["recall"] + avg_metrics["re_upper"] = avg_metrics["recall"] + avg_metrics["std_re"] + avg_metrics["re_lower"] = avg_metrics["recall"] - avg_metrics["std_re"] + avg_metrics["std_f1"] = std_metrics["f1_score"] + + metrics_path = os.path.join(output_folder, "metrics.csv") + logger.info(f"Saving averages over all input images at {metrics_path}...") + avg_metrics.to_csv(metrics_path) + + maxf1 = avg_metrics["f1_score"].max() + optimal_f1_threshold = avg_metrics["f1_score"].idxmax() + + logger.info(f"Highest F1-score of {maxf1:.5f}, achieved at " + f"threshold {optimal_f1_threshold:.2f}") + + # Plotting + np_avg_metrics = avg_metrics.to_numpy().T + figure_path = os.path.join(output_folder, "precision-recall.pdf") + logger.info(f"Saving overall precision-recall plot at {figure_path}...") + fig = precision_recall_f1iso_confintval( + [np_avg_metrics[0]], + [np_avg_metrics[1]], + [np_avg_metrics[7]], + [np_avg_metrics[8]], + [np_avg_metrics[10]], + [np_avg_metrics[11]], + ["data"], + ) + fig.savefig(figure_path) diff --git a/bob/ip/binseg/engine/inferencer.py b/bob/ip/binseg/engine/inferencer.py deleted file mode 100644 index feb78b3ec79550b91519a938c3e4dd96002f5442..0000000000000000000000000000000000000000 --- a/bob/ip/binseg/engine/inferencer.py +++ /dev/null @@ -1,313 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -import os -import time -import datetime -import numpy as np -import torch -import pandas as pd -import torchvision.transforms.functional as VF -from tqdm import tqdm - -import bob.io.base - -from bob.ip.binseg.utils.metric import base_metrics -from bob.ip.binseg.utils.plot import precision_recall_f1iso_confintval -from bob.ip.binseg.utils.summary import summary - -import logging -logger = logging.getLogger(__name__) - - -def batch_metrics(predictions, ground_truths, names, output_folder): - """ - Calculates metrics on the batch and saves it to disc - - - Parameters - ---------- - - predictions : :py:class:`torch.Tensor` - tensor with pixel-wise probabilities - - ground_truths : :py:class:`torch.Tensor` - tensor with binary ground-truth - - names : list - list of file names - - output_folder : str - output path - - - Returns - ------- - metrics : tuple - A tuple containing batch metrics: ``(name, threshold, precision, recall, specificity, accuracy, jaccard, f1_score)`` - """ - - step_size = 0.01 - batch_metrics = [] - - for j in range(predictions.size()[0]): - # ground truth byte - gts = ground_truths[j].byte() - - file_name = "{}.csv".format(names[j]) - logger.info("saving {}".format(file_name)) - - with open(os.path.join(output_folder, file_name), "w+") as outfile: - - outfile.write( - "threshold, precision, recall, specificity, accuracy, jaccard, f1_score\n" - ) - - for threshold in np.arange(0.0, 1.0, step_size): - # threshold - binary_pred = torch.gt(predictions[j], threshold).byte() - - # equals and not-equals - equals = torch.eq(binary_pred, gts).type(torch.uint8) # tensor - notequals = torch.ne(binary_pred, gts).type(torch.uint8) # tensor - - # true positives - tp_tensor = gts * binary_pred # tensor - tp_count = torch.sum(tp_tensor).item() # scalar - - # false positives - fp_tensor = torch.eq((binary_pred + tp_tensor), 1) - fp_count = torch.sum(fp_tensor).item() - - # true negatives - tn_tensor = equals - tp_tensor - tn_count = torch.sum(tn_tensor).item() - - # false negatives - fn_tensor = notequals - fp_tensor.type(torch.uint8) - fn_count = torch.sum(fn_tensor).item() - - # calc metrics - metrics = base_metrics(tp_count, fp_count, tn_count, fn_count) - - # write to disk - outfile.write( - "{:.2f},{:.5f},{:.5f},{:.5f},{:.5f},{:.5f},{:.5f} \n".format( - threshold, *metrics - ) - ) - - batch_metrics.append([names[j], threshold, *metrics]) - - return batch_metrics - - -def save_probability_images(predictions, names, output_folder): - """ - Saves probability maps as image in the same format as the test image - - - Parameters - ---------- - - predictions : :py:class:`torch.Tensor` - tensor with pixel-wise probabilities - - names : list - list of file names - - output_folder : str - output path - """ - - images_subfolder = os.path.join(output_folder, "images") - for j in range(predictions.size()[0]): - img = VF.to_pil_image(predictions.cpu().data[j]) - filename = "{}.png".format(names[j].split(".")[0]) - fullpath = os.path.join(images_subfolder, filename) - logger.info("saving {}".format(fullpath)) - fulldir = os.path.dirname(fullpath) - if not os.path.exists(fulldir): - os.makedirs(fulldir) - img.save(fullpath) - - -def save_hdf(predictions, names, output_folder): - """ - Saves probability maps as image in the same format as the test image - - Parameters - ---------- - predictions : :py:class:`torch.Tensor` - tensor with pixel-wise probabilities - names : list - list of file names - output_folder : str - output path - """ - hdf5_subfolder = os.path.join(output_folder, "hdf5") - if not os.path.exists(hdf5_subfolder): - os.makedirs(hdf5_subfolder) - for j in range(predictions.size()[0]): - img = predictions.cpu().data[j].squeeze(0).numpy() - filename = "{}.hdf5".format(names[j].split(".")[0]) - fullpath = os.path.join(hdf5_subfolder, filename) - logger.info("saving {}".format(filename)) - fulldir = os.path.dirname(fullpath) - if not os.path.exists(fulldir): - os.makedirs(fulldir) - bob.io.base.save(img, fullpath) - - -def do_inference(model, data_loader, device, output_folder=None): - """ - Runs inference and calculate metrics - - Parameters - --------- - model : :py:class:`torch.nn.Module` - neural network model (e.g. DRIU, HED, UNet) - data_loader : py:class:`torch.torch.utils.data.DataLoader` - device : str - device to use ``'cpu'`` or ``'cuda'`` - output_folder : str - """ - - logger.info("Start evaluation") - logger.info("Output folder: {}, Device: {}".format(output_folder, device)) - results_subfolder = os.path.join(output_folder, "results") - os.makedirs(results_subfolder, exist_ok=True) - - model.eval().to(device) - # Sigmoid for probabilities - sigmoid = torch.nn.Sigmoid() - - # Setup timers - start_total_time = time.time() - times = [] - - # Collect overall metrics - metrics = [] - - for samples in tqdm(data_loader): - names = samples[0] - images = samples[1].to(device) - ground_truths = samples[2].to(device) - with torch.no_grad(): - start_time = time.perf_counter() - - outputs = model(images) - - # necessary check for hed architecture that uses several outputs - # for loss calculation instead of just the last concatfuse block - if isinstance(outputs, list): - outputs = outputs[-1] - - probabilities = sigmoid(outputs) - - batch_time = time.perf_counter() - start_time - times.append(batch_time) - logger.info("Batch time: {:.5f} s".format(batch_time)) - - b_metrics = batch_metrics( - probabilities, ground_truths, names, results_subfolder - ) - metrics.extend(b_metrics) - - # Create probability images - save_probability_images(probabilities, names, output_folder) - # save hdf5 - save_hdf(probabilities, names, output_folder) - - # DataFrame - df_metrics = pd.DataFrame( - metrics, - columns=[ - "name", - "threshold", - "precision", - "recall", - "specificity", - "accuracy", - "jaccard", - "f1_score", - ], - ) - - # Report and Averages - metrics_file = "Metrics.csv".format(model.name) - metrics_path = os.path.join(results_subfolder, metrics_file) - logger.info("Saving average over all input images: {}".format(metrics_file)) - - avg_metrics = df_metrics.groupby("threshold").mean() - std_metrics = df_metrics.groupby("threshold").std() - - # Uncomment below for F1-score calculation based on average precision and metrics instead of - # F1-scores of individual images. This method is in line with Maninis et. al. (2016) - # avg_metrics["f1_score"] = (2* avg_metrics["precision"]*avg_metrics["recall"])/ \ - # (avg_metrics["precision"]+avg_metrics["recall"]) - - avg_metrics["std_pr"] = std_metrics["precision"] - avg_metrics["pr_upper"] = avg_metrics["precision"] + avg_metrics["std_pr"] - avg_metrics["pr_lower"] = avg_metrics["precision"] - avg_metrics["std_pr"] - avg_metrics["std_re"] = std_metrics["recall"] - avg_metrics["re_upper"] = avg_metrics["recall"] + avg_metrics["std_re"] - avg_metrics["re_lower"] = avg_metrics["recall"] - avg_metrics["std_re"] - avg_metrics["std_f1"] = std_metrics["f1_score"] - - avg_metrics.to_csv(metrics_path) - maxf1 = avg_metrics["f1_score"].max() - optimal_f1_threshold = avg_metrics["f1_score"].idxmax() - - logger.info( - "Highest F1-score of {:.5f}, achieved at threshold {}".format( - maxf1, optimal_f1_threshold - ) - ) - - # Plotting - np_avg_metrics = avg_metrics.to_numpy().T - fig_name = "precision_recall.pdf" - logger.info("saving {}".format(fig_name)) - fig = precision_recall_f1iso_confintval( - [np_avg_metrics[0]], - [np_avg_metrics[1]], - [np_avg_metrics[7]], - [np_avg_metrics[8]], - [np_avg_metrics[10]], - [np_avg_metrics[11]], - [model.name, None], - title=output_folder, - ) - fig_filename = os.path.join(results_subfolder, fig_name) - fig.savefig(fig_filename) - - # Report times - total_inference_time = str(datetime.timedelta(seconds=int(sum(times)))) - average_batch_inference_time = np.mean(times) - total_evalution_time = str( - datetime.timedelta(seconds=int(time.time() - start_total_time)) - ) - - logger.info( - "Average batch inference time: {:.5f}s".format(average_batch_inference_time) - ) - - times_file = "Times.txt" - logger.info("saving {}".format(times_file)) - - with open(os.path.join(results_subfolder, times_file), "w+") as outfile: - date = datetime.datetime.now() - outfile.write("Date: {} \n".format(date.strftime("%Y-%m-%d %H:%M:%S"))) - outfile.write("Total evaluation run-time: {} \n".format(total_evalution_time)) - outfile.write( - "Average batch inference time: {} \n".format(average_batch_inference_time) - ) - outfile.write("Total inference time: {} \n".format(total_inference_time)) - - # Save model summary - summary_file = "ModelSummary.txt" - logger.info("saving {}".format(summary_file)) - - with open(os.path.join(results_subfolder, summary_file), "w+") as outfile: - summary(model, outfile) diff --git a/bob/ip/binseg/engine/predictor.py b/bob/ip/binseg/engine/predictor.py index 093c6c676fb03c38c6ce50cadf692eccb111d54d..0a63d74a4a9501a2e4a54118491c52d918d8d7a6 100644 --- a/bob/ip/binseg/engine/predictor.py +++ b/bob/ip/binseg/engine/predictor.py @@ -5,49 +5,128 @@ import os import time import datetime +import PIL import numpy -import torch from tqdm import tqdm +import torch +import torchvision.transforms.functional as VF + import bob.io.base import logging logger = logging.getLogger(__name__) -def save_hdf5(predictions, names, output_folder): +def _save_hdf5(stem, prob, output_folder): """ - Saves probability maps as image in the same format as the test image + Saves prediction maps as image in the same format as the test image Parameters ---------- - predictions : :py:class:`torch.Tensor` - tensor with pixel-wise probabilities + stem : str + the name of the file without extension on the original dataset - names : list - list of file names + prob : PIL.Image.Image + Monochrome Image with prediction maps output_folder : str - output path + path where to store overlayed results """ - for j in range(predictions.size()[0]): + fullpath = os.path.join(output_folder, f"{stem}.hdf5") + tqdm.write(f"Saving {fullpath}...") + fulldir = os.path.dirname(fullpath) + if not os.path.exists(fulldir): + tqdm.write(f"Creating directory {fulldir}...") + os.makedirs(fulldir, exist_ok=True) + bob.io.base.save(prob.cpu().squeeze(0).numpy(), fullpath) + + +def _save_image(stem, extension, data, output_folder): + """Saves a PIL image into a file + + Parameters + ---------- + + stem : str + the name of the file without extension on the original dataset - img = predictions.cpu().data[j].squeeze(0).numpy() - filename = "{}.hdf5".format(names[j].split(".")[0]) - fullpath = os.path.join(output_folder, filename) - tqdm.write(f"Saving {fullpath}...") - fulldir = os.path.dirname(fullpath) - if not os.path.exists(fulldir): - tqdm.write(f"Creating directory {fulldir}...") - # protect against concurrent access - exist_ok=True - os.makedirs(fulldir, exist_ok=True) - bob.io.base.save(img, fullpath) + extension : str + an extension for the file to be saved (e.g. ``.png``) + data : PIL.Image.Image + RGB image with the original image, preloaded + + output_folder : str + path where to store results -def run(model, data_loader, device, output_folder): + """ + + fullpath = os.path.join(output_folder, stem + extension) + tqdm.write(f"Saving {fullpath}...") + fulldir = os.path.dirname(fullpath) + if not os.path.exists(fulldir): + tqdm.write(f"Creating directory {fulldir}...") + os.makedirs(fulldir, exist_ok=True) + data.save(fullpath) + + +def _save_overlayed_png(stem, image, prob, output_folder): + """Overlays prediction predictions vessel tree with original test image + + + Parameters + ---------- + + stem : str + the name of the file without extension on the original dataset + + image : torch.Tensor + Tensor with RGB input image + + prob : torch.Tensor + Tensor with 1-D prediction map + + output_folder : str + path where to store results + + """ + + image = VF.to_pil_image(image) + prob = VF.to_pil_image(prob.cpu().squeeze(0)) + + # color and overlay + prob_green = PIL.ImageOps.colorize(prob, (0, 0, 0), (0, 255, 0)) + overlayed = PIL.Image.blend(image, prob_green, 0.4) + _save_image(stem, '.png', overlayed, output_folder) + + +def _save_transformed_png(stem, image, output_folder): + """Saves a PNG copy of the transformed input image to a folder + + + Parameters + ---------- + + stem : str + the name of the file without extension on the original dataset + + image : torch.Tensor + Tensor with RGB input image + + output_folder : str + path where to store overlayed results + + """ + + _save_image(stem, '.png', VF.to_pil_image(image), output_folder) + + +def run(model, data_loader, device, output_folder, overlayed_folder, + transformed_input_folder): """ Runs inference on input data, outputs HDF5 files with predictions @@ -62,7 +141,15 @@ def run(model, data_loader, device, output_folder): device to use ``cpu`` or ``cuda:0`` output_folder : str - folder where to store output images (HDF5 files) + folder where to store output prediction maps (HDF5 files) and model + summary + + overlayed_folder : str + folder where to store output images (PNG files) + + transformed_input_folder : str + folder where to store input images, transformed through the input + pipeline (PNG files) """ @@ -76,7 +163,7 @@ def run(model, data_loader, device, output_folder): os.makedirs(output_folder, exist_ok=True) model.eval().to(device) - # Sigmoid for probabilities + # Sigmoid for predictions sigmoid = torch.nn.Sigmoid() # Setup timers @@ -101,13 +188,19 @@ def run(model, data_loader, device, output_folder): if isinstance(outputs, list): outputs = outputs[-1] - probabilities = sigmoid(outputs) + predictions = sigmoid(outputs) batch_time = time.perf_counter() - start_time times.append(batch_time) len_samples.append(len(images)) - save_hdf5(probabilities, names, output_folder) + for name, img, prob in zip(names, images, predictions): + stem = os.path.splitext(name)[0] + _save_hdf5(stem, prob, output_folder) + if overlayed_folder is not None: + _save_overlayed_png(stem, img, prob, overlayed_folder) + if transformed_input_folder is not None: + _save_transformed_png(stem, img, transformed_input_folder) logger.info("End prediction") @@ -116,7 +209,13 @@ def run(model, data_loader, device, output_folder): logger.info(f"Total time: {total_time}") average_batch_time = numpy.mean(times) - logger.info(f"Average batch time: {average_batch_time:g}s\n") + logger.info(f"Average batch time: {average_batch_time:g}s") average_image_time = numpy.sum(numpy.array(times) * len_samples) / float(sum(len_samples)) - logger.info(f"Average image time: {average_image_time:g}s\n") + logger.info(f"Average image time: {average_image_time:g}s") + + # Save model summary + summary_path = os.path.join(output_folder, "model-info.txt") + logger.info(f"Saving model summary at {summary_path}...") + + with open(summary_path, "w") as f: summary(model, f) diff --git a/bob/ip/binseg/engine/trainer.py b/bob/ip/binseg/engine/trainer.py index 0d575bc99759cc52c479e58da4485857604778bb..da49327b605f9f3362ccc98724dc81dfc129a935 100644 --- a/bob/ip/binseg/engine/trainer.py +++ b/bob/ip/binseg/engine/trainer.py @@ -5,8 +5,9 @@ import os import csv import time import datetime + import torch -import pandas as pd +import pandas from tqdm import tqdm from bob.ip.binseg.utils.metric import SmoothedValue @@ -178,7 +179,7 @@ def run( ) # plots a version of the CSV trainlog into a PDF - logdf = pd.read_csv(logfile_name, header=0, names=logfile_fields) + logdf = pandas.read_csv(logfile_name, header=0, names=logfile_fields) fig = loss_curve(logdf, title="Loss Evolution") figurefile_name = os.path.join(output_folder, "trainlog.pdf") logger.info(f"Saving {figurefile_name}") diff --git a/bob/ip/binseg/script/binseg.py b/bob/ip/binseg/script/binseg.py index 8f63c83c4ba283f19e5ed353ee861cff601398d4..65a9b1664dd9dce2a70b215e4414c44cb1040844 100644 --- a/bob/ip/binseg/script/binseg.py +++ b/bob/ip/binseg/script/binseg.py @@ -25,9 +25,8 @@ from torch.utils.data import DataLoader from bob.ip.binseg.utils.plot import plot_overview from bob.ip.binseg.utils.click import OptionEatAll from bob.ip.binseg.utils.rsttable import create_overview_grid -from bob.ip.binseg.utils.plot import metricsviz, savetransformedtest +from bob.ip.binseg.utils.plot import metricsviz from bob.ip.binseg.utils.transformfolder import transformfolder as transfld -from bob.ip.binseg.utils.evaluate import do_eval logger = logging.getLogger(__name__) @@ -104,10 +103,6 @@ def visualize(dataset, output_path, **kwargs): """ logger.info("Creating TP, FP, FN visualizations for {}".format(output_path)) metricsviz(dataset=dataset, output_path=output_path) - logger.info("Creating overlay visualizations for {}".format(output_path)) - overlay(dataset=dataset, output_path=output_path) - logger.info("Saving transformed test images {}".format(output_path)) - savetransformedtest(dataset=dataset, output_path=output_path) # Apply image transforms to a folder containing images @binseg.command(entry_point_group="bob.ip.binseg.config", cls=ConfigCommand) diff --git a/bob/ip/binseg/script/evaluate.py b/bob/ip/binseg/script/evaluate.py index a036084c6c3d82fe8cefafa7a70be7d6810c1486..11ca9719aa98cb122f142e7ff73ca0d317b84abe 100644 --- a/bob/ip/binseg/script/evaluate.py +++ b/bob/ip/binseg/script/evaluate.py @@ -4,7 +4,6 @@ import click from click_plugins import with_plugins -import torch from torch.utils.data import DataLoader from bob.extension.scripts.click_helper import ( @@ -14,8 +13,7 @@ from bob.extension.scripts.click_helper import ( AliasedGroup, ) -from ..utils.checkpointer import DetectronCheckpointer -from ..engine.inferencer import do_inference +from ..engine.evaluator import run import logging logger = logging.getLogger(__name__) @@ -27,75 +25,62 @@ logger = logging.getLogger(__name__) epilog="""Examples: \b - 1. Evaluates a M2U-Net model on the DRIVE test set: - - $ bob binseg evaluate -vv m2unet drive-test --weight=results/model_final.pth - + 1. Runs evaluation on an existing dataset configuration: +\b + $ bob binseg evaluate -vv m2unet drive-test --predictions-folder=path/to/predictions --output-folder=path/to/results +\b + 2. To run evaluation on a folder with your own images and annotations, you + must first specify resizing, cropping, etc, so that the image can be + correctly input to the model. Failing to do so will likely result in + poor performance. To figure out such specifications, you must consult + the dataset configuration used for **training** the provided model. + Once you figured this out, do the following: +\b + $ bob binseg config copy csv-dataset-example mydataset.py + # modify "mydataset.py" to your liking + $ bob binseg evaluate -vv m2unet mydataset.py --predictions-folder=path/to/predictions --output-folder=path/to/results """, ) @click.option( - "--output-path", + "--output-folder", "-o", - help="Path where to store the generated model (created if does not exist)", + help="Path where to store the analysis result (created if does not exist)", required=True, default="results", cls=ResourceOption, ) @click.option( - "--model", - "-m", - help="A torch.nn.Module instance implementing the network to be evaluated", + "--predictions-folder", + "-p", + help="Path where predictions are currently stored", required=True, cls=ResourceOption, ) @click.option( "--dataset", "-d", - help="A torch.utils.data.dataset.Dataset instance implementing a dataset to be used for evaluating the model, possibly including all pre-processing pipelines required", - required=True, - cls=ResourceOption, -) -@click.option( - "--batch-size", - "-b", - help="Number of samples in every batch (this parameter affects memory requirements for the network)", + help="A torch.utils.data.dataset.Dataset instance implementing a dataset to be used for evaluating predictions, possibly including all pre-processing pipelines required", required=True, - show_default=True, - default=1, cls=ResourceOption, ) @click.option( - "--device", - "-d", - help='A string indicating the device to use (e.g. "cpu" or "cuda:0")', + "--overlayed", + "-A", + help="Creates overlayed representations of the output probability maps, " + "similar to --overlayed in prediction-mode, except it includes " + "distinctive colours for true and false positives and false negatives. " + "If not set, or empty then do **NOT** output overlayed images. " + "Otherwise, the parameter represents the name of a folder where to " + "store those", show_default=True, - required=True, - default="cpu", - cls=ResourceOption, -) -@click.option( - "--weight", - "-w", - help="Path or URL to pretrained model file (.pth extension)", - required=True, + default=None, + required=False, cls=ResourceOption, ) @verbosity_option(cls=ResourceOption) -def evaluate(output_path, model, dataset, batch_size, device, weight, **kwargs): +def evaluate(output_folder, predictions_folder, dataset, overlayed, **kwargs): """Evaluates an FCN on a binary segmentation task. """ - - # PyTorch dataloader - data_loader = DataLoader( - dataset=dataset, - batch_size=batch_size, - shuffle=False, - pin_memory=torch.cuda.is_available(), - ) - - # checkpointer, load last model in dir - checkpointer = DetectronCheckpointer( - model, save_dir=output_path, save_to_disk=False - ) - checkpointer.load(weight) - do_inference(model, data_loader, device, output_path) + data_loader = DataLoader(dataset=dataset, batch_size=1, shuffle=False, + pin_memory=False) + run(dataset, predictions_folder, output_folder) diff --git a/bob/ip/binseg/script/predict.py b/bob/ip/binseg/script/predict.py index 560a8a197fad525f94ce290afebcd342d1509583..2c7929ec2127f76f3afaeaae34287fcfe8b6e862 100644 --- a/bob/ip/binseg/script/predict.py +++ b/bob/ip/binseg/script/predict.py @@ -31,7 +31,7 @@ logger = logging.getLogger(__name__) \b 1. Runs prediction on an existing dataset configuration: \b - $ bob binseg predict -vv m2unet drive-test --weight=path/to/model_final.pth --output-path=path/to/predictions + $ bob binseg predict -vv m2unet drive-test --weight=path/to/model_final.pth --output-folder=path/to/predictions \b 2. To run prediction on a folder with your own images, you must first specify resizing, cropping, etc, so that the image can be correctly @@ -42,13 +42,13 @@ logger = logging.getLogger(__name__) \b $ bob binseg config copy folder-dataset-example mydataset.py # modify "mydataset.py" to include the base path and required transforms - $ bob binseg predict -vv m2unet mydataset.py --weight=path/to/model_final.pth --output-path=path/to/predictions + $ bob binseg predict -vv m2unet mydataset.py --weight=path/to/model_final.pth --output-folder=path/to/predictions """, ) @click.option( - "--output-path", + "--output-folder", "-o", - help="Path where to store the generated model (created if does not exist)", + help="Path where to store the predictions (created if does not exist)", required=True, default="results", cls=ResourceOption, @@ -92,8 +92,33 @@ logger = logging.getLogger(__name__) required=True, cls=ResourceOption, ) +@click.option( + "--overlayed", + "-O", + help="Creates overlayed representations of the output probability maps on " + "top of input images (store results as PNG files). If not set, or empty " + "then do **NOT** output overlayed images. Otherwise, the parameter " + "represents the name of a folder where to store those", + show_default=True, + default=None, + required=False, + cls=ResourceOption, +) +@click.option( + "--transformed", + "-T", + help="Creates a version of the input dataset with transformations applied, " + "but before feeding to FCN. If not set, or empty then do **NOT** output " + "transformed images. Otherwise, the parameter represents the name of a " + "folder where to store those", + show_default=True, + default=None, + required=False, + cls=ResourceOption, +) @verbosity_option(cls=ResourceOption) -def predict(output_path, model, dataset, batch_size, device, weight, **kwargs): +def predict(output_folder, model, dataset, batch_size, device, weight, + overlayed, transformed, **kwargs): """Predicts vessel map (probabilities) on input images""" # PyTorch dataloader @@ -112,4 +137,8 @@ def predict(output_path, model, dataset, batch_size, device, weight, **kwargs): save_to_disk=False) checkpointer.load(weight_name) - run(model, data_loader, device, output_path) + # clean-up the overlayed path + if overlayed is not None: + overlayed = overlayed.strip() + + run(model, data_loader, device, output_folder, overlayed, transformed) diff --git a/bob/ip/binseg/test/test_batchmetrics.py b/bob/ip/binseg/test/test_batchmetrics.py index 045b36385cb1a5edff0e734fc1656414ff6560ef..76b1313d4ed819cecfc8b2dd7e0f5125fd9723d9 100644 --- a/bob/ip/binseg/test/test_batchmetrics.py +++ b/bob/ip/binseg/test/test_batchmetrics.py @@ -2,11 +2,17 @@ # -*- coding: utf-8 -*- import unittest -from bob.ip.binseg.engine.inferencer import batch_metrics import random -import shutil, tempfile -import logging +import shutil + import torch +import pandas +import numpy + +from ..engine.evaluator import _sample_metrics + +import logging +logger = logging.getLogger(__name__) class Tester(unittest.TestCase): @@ -22,26 +28,19 @@ class Tester(unittest.TestCase): self.predictions = torch.rand(size=(2, 1, 420, 420)) self.ground_truths = torch.randint(low=0, high=2, size=(2, 1, 420, 420)) self.names = ["Bob", "Tim"] - self.output_folder = tempfile.mkdtemp() - self.logger = logging.getLogger(__name__) - - def tearDown(self): - # Remove the temporary folder after the test - shutil.rmtree(self.output_folder) def test_batch_metrics(self): - bm = batch_metrics( - self.predictions, - self.ground_truths, - self.names, - self.output_folder, - ) + dfs = [] + for stem, pred, gt in zip(self.names, self.predictions, + self.ground_truths): + dfs.append(_sample_metrics(stem, pred, gt)) + bm = pandas.concat(dfs) + self.assertEqual(len(bm), 2 * 100) - for metric in bm: - # check whether f1 score agree - self.assertAlmostEqual( - metric[-1], 2 * (metric[-6] * metric[-5]) / (metric[-6] + metric[-5]) - ) + # check whether f1 score agree + calculated = bm.f1_score.to_numpy() + ours = (2*(bm.precision*bm.recall)/(bm.precision+bm.recall)).to_numpy() + assert numpy.isclose(calculated, ours).all() if __name__ == "__main__": diff --git a/bob/ip/binseg/utils/evaluate.py b/bob/ip/binseg/utils/evaluate.py deleted file mode 100644 index a921f61f96b63d03964dcfe7c06a4c06ad567304..0000000000000000000000000000000000000000 --- a/bob/ip/binseg/utils/evaluate.py +++ /dev/null @@ -1,208 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -# only used to evaluate 2nd human annotator - -import os -import numpy as np -import torch -import pandas as pd -from tqdm import tqdm - -from bob.ip.binseg.utils.metric import base_metrics -from bob.ip.binseg.utils.plot import ( - precision_recall_f1iso, - precision_recall_f1iso_confintval, -) -from PIL import Image -from torchvision.transforms.functional import to_tensor - -import logging -logger = logging.getLogger(__name__) - - -def batch_metrics(predictions, ground_truths, names, output_folder): - """ - Calculates metrics on the batch and saves it to disc - - Parameters - ---------- - predictions : :py:class:`torch.Tensor` - tensor with pixel-wise probabilities - ground_truths : :py:class:`torch.Tensor` - tensor with binary ground-truth - names : list - list of file names - output_folder : str - output path - - Returns - ------- - list - list containing batch metrics: ``[name, threshold, precision, recall, specificity, accuracy, jaccard, f1_score]`` - """ - step_size = 0.01 - batch_metrics = [] - - for j in range(predictions.size()[0]): - # ground truth byte - gts = ground_truths[j].byte() - - file_name = "{}.csv".format(names[j]) - logger.info("saving {}".format(file_name)) - - with open(os.path.join(output_folder, file_name), "w+") as outfile: - - outfile.write( - "threshold, precision, recall, specificity, accuracy, jaccard, f1_score\n" - ) - - for threshold in np.arange(0.0, 1.0, step_size): - # threshold - binary_pred = torch.gt(predictions[j], threshold).byte() - - # equals and not-equals - equals = torch.eq(binary_pred, gts) # tensor - notequals = torch.ne(binary_pred, gts) # tensor - - # true positives - tp_tensor = gts * binary_pred # tensor - tp_count = torch.sum(tp_tensor).item() # scalar - - # false positives - fp_tensor = torch.eq((binary_pred + tp_tensor), 1) - fp_count = torch.sum(fp_tensor).item() - - # true negatives - tn_tensor = equals - tp_tensor - tn_count = torch.sum(tn_tensor).item() - - # false negatives - fn_tensor = notequals - fp_tensor - fn_count = torch.sum(fn_tensor).item() - - # calc metrics - metrics = base_metrics(tp_count, fp_count, tn_count, fn_count) - - # write to disk - outfile.write( - "{:.2f},{:.5f},{:.5f},{:.5f},{:.5f},{:.5f},{:.5f} \n".format( - threshold, *metrics - ) - ) - - batch_metrics.append([names[j], threshold, *metrics]) - - return batch_metrics - - -def do_eval( - prediction_folder, - data_loader, - output_folder=None, - title="2nd human", - legend="2nd human", - prediction_extension=None, -): - - """ - Calculate metrics on saved prediction images (needs batch_size = 1 !) - - Parameters - --------- - model : :py:class:`torch.nn.Module` - neural network model (e.g. DRIU, HED, UNet) - data_loader : py:class:`torch.torch.utils.data.DataLoader` - device : str - device to use ``'cpu'`` or ``'cuda'`` - output_folder : str - """ - logger.info("Start evaluation") - logger.info("Prediction folder {}".format(prediction_folder)) - results_subfolder = os.path.join(output_folder, "results") - os.makedirs(results_subfolder, exist_ok=True) - - # Collect overall metrics - metrics = [] - for samples in tqdm(data_loader): - names = samples[0] - ground_truths = samples[2] - - if prediction_extension is None: - pred_file = os.path.join(prediction_folder, names[0]) - else: - pred_file = os.path.join( - prediction_folder, os.path.splitext(names[0])[0] + ".png" - ) - probabilities = Image.open(pred_file) - probabilities = probabilities.convert(mode="L") - probabilities = to_tensor(probabilities) - - b_metrics = batch_metrics( - probabilities, ground_truths, names, results_subfolder - ) - metrics.extend(b_metrics) - - # DataFrame - df_metrics = pd.DataFrame( - metrics, - columns=[ - "name", - "threshold", - "precision", - "recall", - "specificity", - "accuracy", - "jaccard", - "f1_score", - ], - ) - - # Report and Averages - metrics_file = "Metrics.csv" - metrics_path = os.path.join(results_subfolder, metrics_file) - logger.info("Saving average over all input images: {}".format(metrics_file)) - - avg_metrics = df_metrics.groupby("threshold").mean() - std_metrics = df_metrics.groupby("threshold").std() - - # Uncomment below for F1-score calculation based on average precision and metrics instead of - # F1-scores of individual images. This method is in line with Maninis et. al. (2016) - # avg_metrics["f1_score"] = (2* avg_metrics["precision"]*avg_metrics["recall"])/ \ - # (avg_metrics["precision"]+avg_metrics["recall"]) - - avg_metrics["std_pr"] = std_metrics["precision"] - avg_metrics["pr_upper"] = avg_metrics["precision"] + avg_metrics["std_pr"] - avg_metrics["pr_lower"] = avg_metrics["precision"] - avg_metrics["std_pr"] - avg_metrics["std_re"] = std_metrics["recall"] - avg_metrics["re_upper"] = avg_metrics["recall"] + avg_metrics["std_re"] - avg_metrics["re_lower"] = avg_metrics["recall"] - avg_metrics["std_re"] - avg_metrics["std_f1"] = std_metrics["f1_score"] - - avg_metrics.to_csv(metrics_path) - maxf1 = avg_metrics["f1_score"].max() - optimal_f1_threshold = avg_metrics["f1_score"].idxmax() - - logger.info( - "Highest F1-score of {:.5f}, achieved at threshold {}".format( - maxf1, optimal_f1_threshold - ) - ) - - # Plotting - # print(avg_metrics) - np_avg_metrics = avg_metrics.to_numpy().T - fig_name = "precision_recall.pdf" - logger.info("saving {}".format(fig_name)) - fig = precision_recall_f1iso_confintval( - [np_avg_metrics[0]], - [np_avg_metrics[1]], - [np_avg_metrics[7]], - [np_avg_metrics[8]], - [np_avg_metrics[10]], - [np_avg_metrics[11]], - [legend, None], - title=title, - ) - fig_filename = os.path.join(results_subfolder, fig_name) - fig.savefig(fig_filename) diff --git a/bob/ip/binseg/utils/plot.py b/bob/ip/binseg/utils/plot.py index 2873b71565d3c667331e6fb0162ffc52a418ea0e..ecfbe92bbcb9b438f2d9a7cb8d06fa777f7a6584 100644 --- a/bob/ip/binseg/utils/plot.py +++ b/bob/ip/binseg/utils/plot.py @@ -120,8 +120,6 @@ def precision_recall_f1iso_confintval( precision, recall, pr_upper, pr_lower, re_upper, re_lower, names, title=None ): """ - Author: Andre Anjos (andre.anjos@idiap.ch). - Creates a precision-recall plot of the given data. The plot will be annotated with F1-score iso-lines (in which the F1-score maintains the same value) @@ -132,13 +130,16 @@ def precision_recall_f1iso_confintval( A list of 1D np arrays containing the Y coordinates of the plot, or the precision, or a 2D np array in which the rows correspond to each of the system's precision coordinates. + recall : :py:class:`numpy.ndarray` or :py:class:`list` A list of 1D np arrays containing the X coordinates of the plot, or the recall, or a 2D np array in which the rows correspond to each of the system's recall coordinates. + names : :py:class:`list` An iterable over the names of each of the systems along the rows of ``precision`` and ``recall`` + title : :py:class:`str`, optional A title for the plot. If not set, omits the title @@ -462,63 +463,3 @@ def metricsviz( if not os.path.exists(fulldir): os.makedirs(fulldir) tp_pil_colored.save(fullpath) - - -def overlay(dataset, output_path): - """Overlays prediction probabilities vessel tree with original test image. - - Parameters - ---------- - dataset : :py:class:`torch.utils.data.Dataset` - output_path : str - path where results and probability output images are stored. E.g. ``'DRIVE/MODEL'`` - """ - - for sample in dataset: - # get sample - name = sample[0] - img = VF.to_pil_image(sample[1]) # PIL Image - - # read probability output - pred = PIL.Image.open(os.path.join(output_path, "images", name)).convert(mode="L") - # color and overlay - pred_green = PIL.ImageOps.colorize(pred, (0, 0, 0), (0, 255, 0)) - overlayed = PIL.Image.blend(img, pred_green, 0.4) - - # add f1-score - # fnt_size = overlayed.size[1]//25 - # draw = PIL.ImageDraw.Draw(overlayed) - # fnt = PIL.ImageFont.truetype('FreeMono.ttf', fnt_size) - # draw.text((0, 0),"F1: {:.4f}".format(f1),(255,255,255),font=fnt) - # save to disk - overlayed_path = os.path.join(output_path, "overlayed") - fullpath = os.path.join(overlayed_path, name) - fulldir = os.path.dirname(fullpath) - if not os.path.exists(fulldir): - os.makedirs(fulldir) - overlayed.save(fullpath) - - -def savetransformedtest(dataset, output_path): - """Save the test images as they are fed into the neural network. - Makes it easier to create overlay animations (e.g. slide) - - Parameters - ---------- - dataset : :py:class:`torch.utils.data.Dataset` - output_path : str - path where results and probability output images are stored. E.g. ``'DRIVE/MODEL'`` - """ - - for sample in dataset: - # get sample - name = sample[0] - img = VF.to_pil_image(sample[1]) # PIL Image - - # save to disk - testimg_path = os.path.join(output_path, "transformedtestimages") - fullpath = os.path.join(testimg_path, name) - fulldir = os.path.dirname(fullpath) - if not os.path.exists(fulldir): - os.makedirs(fulldir) - img.save(fullpath) diff --git a/doc/api.rst b/doc/api.rst index 3643295bd9f07e23bc874a23cbff45fa370c5b9e..f90f6af99d64cce705fc2cac4ba7121e94f80f3e 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -29,11 +29,11 @@ Engines :toctree: api/engine bob.ip.binseg.engine - bob.ip.binseg.engine.adabound - bob.ip.binseg.engine.inferencer - bob.ip.binseg.engine.predictor - bob.ip.binseg.engine.ssltrainer bob.ip.binseg.engine.trainer + bob.ip.binseg.engine.ssltrainer + bob.ip.binseg.engine.predictor + bob.ip.binseg.engine.evaluator + bob.ip.binseg.engine.adabound Neural Network Models @@ -68,7 +68,6 @@ Toolbox bob.ip.binseg.utils bob.ip.binseg.utils.checkpointer bob.ip.binseg.utils.click - bob.ip.binseg.utils.evaluate bob.ip.binseg.utils.metric bob.ip.binseg.utils.model_serialization bob.ip.binseg.utils.model_zoo diff --git a/doc/evaluation.rst b/doc/evaluation.rst index 0329dabdac4b888cc6faa9fb1822ad01e902735a..95ab253844b6f21c13449eb0b1fb98054947ac25 100644 --- a/doc/evaluation.rst +++ b/doc/evaluation.rst @@ -68,54 +68,20 @@ things up using ``--device='cuda:0'`` in case you have a GPU. Evaluation ---------- -In evaluation we input an **annotated** dataset and a pre-trained model to -output a complete set of performance figures that can help analysis of model -performance. Evaluation is done using ``bob binseg evaluate`` followed by the -model and the dataset configuration, and the path to the pretrained model via -the ``--weight`` argument. +In evaluation, we input an **annotated** dataset and predictions to generate +performance figures that can help analysis of a trained model. Evaluation is +done using ``bob binseg evaluate`` followed by the model and the annotated +dataset configuration, and the path to the pretrained model via the +``--weight`` argument. Use ``bob binseg evaluate --help`` for more information. -E.g. run inference on model M2U-Net on the DRIVE test set: +E.g. run inference on predictions from the DRIVE test set, do the following: .. code-block:: bash # Point directly to saved model via -w argument: - bob binseg evaluate m2unet drive-test -o /outputfolder/for/results -w /direct/path/to/weight/model_final.pth - - # Use training output path (requries last_checkpoint file to be present) - # The evaluation results will be stored in the same folder - bob binseg test m2unet drive-test -o /outputfolder/for/results - - -Outputs -======== -The inference run generates the following output files: - -.. code-block:: text - - . - ├── images # the predicted probabilities as grayscale images in .png format - ├── hdf5 # the predicted probabilties in hdf5 format - ├── last_checkpoint # text file that keeps track of the last checkpoint - ├── trainlog.csv # training log - ├── trainlog.pdf # training log plot - ├── model_*.pth # model checkpoints - └── results - ├── image*.jpg.csv # evaluation metrics for each image - ├── Metrics.csv # average evaluation metrics - ├── ModelSummary.txt # model summary and parameter count - ├── precision_recall.pdf # precision vs recall plot - └── Times.txt # inference times - - -To run evaluation of pretrained models pass url as ``-w`` argument. E.g.: - -.. code-block:: bash - - bob binseg test DRIU DRIVETEST -o Evaluation_DRIU_DRIVE -w https://www.idiap.ch/software/bob/data/bob/bob.ip.binseg/master/DRIU_DRIVE.pth - bob binseg test M2UNet DRIVETEST -o Evaluation_M2UNet_DRIVE -w https://www.idiap.ch/software/bob/data/bob/bob.ip.binseg/master/M2UNet_DRIVE.pth - + bob binseg evaluate -vv drive-test -p /predictions/folder -o /eval/results/folder .. include:: links.rst