diff --git a/bob/ip/binseg/engine/inferencer.py b/bob/ip/binseg/engine/inferencer.py index ccff70d4019c6856334e982fb861d315c2d60196..26de6cf733e301f2ec60a76e275822ec85491be3 100644 --- a/bob/ip/binseg/engine/inferencer.py +++ b/bob/ip/binseg/engine/inferencer.py @@ -194,8 +194,10 @@ def do_inference( avg_metrics = df_metrics.groupby('threshold').mean() std_metrics = df_metrics.groupby('threshold').std() - avg_metrics["f1_score"] = (2* avg_metrics["precision"]*avg_metrics["recall"])/ \ - (avg_metrics["precision"]+avg_metrics["recall"]) + # Uncomment below for F1-score calculation based on average precision and metrics instead of + # F1-scores of individual images. This method is in line with Maninis et. al. (2016) + #avg_metrics["f1_score"] = (2* avg_metrics["precision"]*avg_metrics["recall"])/ \ + # (avg_metrics["precision"]+avg_metrics["recall"]) avg_metrics["std_f1"] = std_metrics["f1_score"] diff --git a/bob/ip/binseg/script/binseg.py b/bob/ip/binseg/script/binseg.py index 780d85a30f13078af47e9e3ab79aa451b14f108e..44166070d13eeea92d77b6b54c4e1b49e62241c5 100644 --- a/bob/ip/binseg/script/binseg.py +++ b/bob/ip/binseg/script/binseg.py @@ -337,28 +337,6 @@ def compare(output_path_list, output_path, title, **kwargs): fig.savefig(fig_filename) -# Plot overviews -@binseg.command(entry_point_group='bob.ip.binseg.config', cls=ConfigCommand) -@click.option( - '--output-path', - '-o', - required=True, - ) -@verbosity_option(cls=ResourceOption) -def pdfoverview(output_path, **kwargs): - """ Creates an overview pdf with all precision vs recall curves present in the output directory. - Requires pdflatex to be available on the host.""" - # PR curves - pr_filename = "precision_recall_comparison.pdf" - pr_filenames = get_paths(output_path,pr_filename) - create_pdf(output_path, pr_filenames, title='Precision vs Recall', tex_filename='pr_overview.tex') - - # Training curves - trainlog_filename = "*trainlog.pdf" - tl_file_names = get_paths(output_path,trainlog_filename) - create_pdf(output_path, tl_file_names, title='Training', tex_filename='training_overview.tex') - - # Create grid table with results @binseg.command(entry_point_group='bob.ip.binseg.config', cls=ConfigCommand) @click.option( @@ -552,7 +530,7 @@ def ssltrain(model , rampup ) -# Apple image transforms to a fodler +# Apply image transforms to a folder containing images @binseg.command(entry_point_group='bob.ip.binseg.config', cls=ConfigCommand) @click.option( '--source-path', @@ -579,7 +557,7 @@ def transformfolder(source_path ,target_path,transforms,**kwargs): transfld(source_path,target_path,transforms) -# Eval only +# Evaluate only. Runs evaluation on predicted probability maps (--prediction-folder) @binseg.command(entry_point_group='bob.ip.binseg.config', cls=ConfigCommand) @click.option( '--output-path', @@ -591,6 +569,7 @@ def transformfolder(source_path ,target_path,transforms,**kwargs): @click.option( '--prediction-folder', '-p', + help = 'Path containing output probability maps', required=True, cls=ResourceOption ) @@ -600,12 +579,23 @@ def transformfolder(source_path ,target_path,transforms,**kwargs): required=True, cls=ResourceOption ) +@click.option( + '--title', + required=False, + cls=ResourceOption + ) +@click.option( + '--legend', + cls=ResourceOption + ) @verbosity_option(cls=ResourceOption) def evalpred( output_path ,prediction_folder ,dataset + ,title + ,legend , **kwargs): """ Run inference and evalaute the model performance """ @@ -617,5 +607,5 @@ def evalpred( ,pin_memory = torch.cuda.is_available() ) - # checkpointer, load last model in dir - do_eval(prediction_folder, data_loader, output_folder = output_path) \ No newline at end of file + # Run eval + do_eval(prediction_folder, data_loader, output_folder = output_path, title= title, legend=legend) \ No newline at end of file diff --git a/bob/ip/binseg/utils/evaluate.py b/bob/ip/binseg/utils/evaluate.py index 620b62db87a0e001151e0a00eeb17fe9829886a7..68257128d64ca899efbd98d2aa8cc3888d932755 100644 --- a/bob/ip/binseg/utils/evaluate.py +++ b/bob/ip/binseg/utils/evaluate.py @@ -13,7 +13,7 @@ import torchvision.transforms.functional as VF from tqdm import tqdm from bob.ip.binseg.utils.metric import SmoothedValue, base_metrics -from bob.ip.binseg.utils.plot import precision_recall_f1iso +from bob.ip.binseg.utils.plot import precision_recall_f1iso, precision_recall_f1iso_confintval from bob.ip.binseg.utils.summary import summary from PIL import Image from torchvision.transforms.functional import to_tensor @@ -95,7 +95,9 @@ def batch_metrics(predictions, ground_truths, names, output_folder, logger): def do_eval( prediction_folder, data_loader, - output_folder = None + output_folder = None, + title = '2nd human', + legend = '2nd human' ): """ @@ -119,7 +121,7 @@ def do_eval( # Collect overall metrics metrics = [] - + num_images = len(data_loader) for samples in tqdm(data_loader): names = samples[0] images = samples[1] @@ -156,9 +158,18 @@ def do_eval( avg_metrics = df_metrics.groupby('threshold').mean() std_metrics = df_metrics.groupby('threshold').std() - avg_metrics["f1_score"] = (2* avg_metrics["precision"]*avg_metrics["recall"])/ \ - (avg_metrics["precision"]+avg_metrics["recall"]) + # Uncomment below for F1-score calculation based on average precision and metrics instead of + # F1-scores of individual images. This method is in line with Maninis et. al. (2016) + #avg_metrics["f1_score"] = (2* avg_metrics["precision"]*avg_metrics["recall"])/ \ + # (avg_metrics["precision"]+avg_metrics["recall"]) + + avg_metrics["std_pr"] = std_metrics["precision"] + avg_metrics["pr_upper"] = avg_metrics['precision'] + avg_metrics["std_pr"] + avg_metrics["pr_lower"] = avg_metrics['precision'] - avg_metrics["std_pr"] + avg_metrics["std_re"] = std_metrics["recall"] + avg_metrics["re_upper"] = avg_metrics['recall'] + avg_metrics["std_re"] + avg_metrics["re_lower"] = avg_metrics['recall'] - avg_metrics["std_re"] avg_metrics["std_f1"] = std_metrics["f1_score"] avg_metrics.to_csv(metrics_path) @@ -168,10 +179,11 @@ def do_eval( logger.info("Highest F1-score of {:.5f}, achieved at threshold {}".format(maxf1, optimal_f1_threshold)) # Plotting + #print(avg_metrics) np_avg_metrics = avg_metrics.to_numpy().T fig_name = "precision_recall.pdf" logger.info("saving {}".format(fig_name)) - fig = precision_recall_f1iso([np_avg_metrics[0]],[np_avg_metrics[1]], ['2nd Human',None], title='2nd Human') + fig = precision_recall_f1iso_confintval([np_avg_metrics[0]],[np_avg_metrics[1]],[np_avg_metrics[7]],[np_avg_metrics[8]],[np_avg_metrics[10]],[np_avg_metrics[11]], [legend ,None], title=title) fig_filename = os.path.join(results_subfolder, fig_name) fig.savefig(fig_filename) diff --git a/bob/ip/binseg/utils/metric.py b/bob/ip/binseg/utils/metric.py index d1f5ec63fd1b1d7ff3818a976af98562e676f85b..bcb91511f533a8ed69843487ef8cbe793e42a925 100644 --- a/bob/ip/binseg/utils/metric.py +++ b/bob/ip/binseg/utils/metric.py @@ -60,5 +60,5 @@ def base_metrics(tp, fp, tn, fn): accuracy = (tp + tn) / (tp+fp+fn+tn) jaccard = tp / (tp+fp+fn + ( (tp+fp+fn) == 0) ) f1_score = (2.0 * tp ) / (2.0 * tp + fp + fn + ( (2.0 * tp + fp + fn) == 0) ) - + #f1_score = (2.0 * precision * recall) / (precision + recall) return [precision, recall, specificity, accuracy, jaccard, f1_score] \ No newline at end of file diff --git a/bob/ip/binseg/utils/pdfcreator.py b/bob/ip/binseg/utils/pdfcreator.py deleted file mode 100644 index fdd1acd393e40e2aa103e9e5f5484f603c1bb350..0000000000000000000000000000000000000000 --- a/bob/ip/binseg/utils/pdfcreator.py +++ /dev/null @@ -1,38 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -from pathlib import Path -import os - - -def get_paths(output_path, filename): - """ - Parameters - ---------- - output_path : str - path in which to look for files - filename : str - - Returns - ------- - list - list of file paths - """ - datadir = Path(output_path) - file_paths = sorted(list(datadir.glob('**/{}'.format(filename)))) - file_paths = [f.as_posix() for f in file_paths] - return file_paths - - -def create_pdf(output_path, file_paths, title, tex_filename): - # setup tex doc - textitle = "\\section*{{{}}} \n".format(title, 42) - texinit = "\\documentclass{{article}} \\usepackage[utf8]{{inputenc}} \\usepackage[a4paper, margin=2cm]{{geometry}} \\usepackage{{graphicx}} \\begin{{document}} \n".format(42) - texclose = "\\end{{document}} \n".format(42) - with open (os.path.join(output_path,tex_filename), "w+") as outfile: - outfile.write(texinit) - outfile.write(textitle) - for f in file_paths: - outfile.write("\\includegraphics[width=0.5\\textwidth]{{{}}} \n".format(f,42)) - outfile.write(texclose) - # create pdf - os.system("pdflatex -output-directory {} {}".format(output_path, os.path.join(output_path,tex_filename))) \ No newline at end of file diff --git a/bob/ip/binseg/utils/plot.py b/bob/ip/binseg/utils/plot.py index 0e6169b41cf3361e7388b7ed2a184ae2abc6f236..b5943e9dc75c3ce034bdc70a8e49c4d231038880 100644 --- a/bob/ip/binseg/utils/plot.py +++ b/bob/ip/binseg/utils/plot.py @@ -102,7 +102,121 @@ def precision_recall_f1iso(precision, recall, names, title=None): plt.tight_layout() return fig +def precision_recall_f1iso_confintval(precision, recall, pr_upper, pr_lower, re_upper, re_lower, names, title=None): + """ + Author: Andre Anjos (andre.anjos@idiap.ch). + + Creates a precision-recall plot of the given data. + The plot will be annotated with F1-score iso-lines (in which the F1-score + maintains the same value) + + Parameters + ---------- + precision : :py:class:`numpy.ndarray` or :py:class:`list` + A list of 1D np arrays containing the Y coordinates of the plot, or + the precision, or a 2D np array in which the rows correspond to each + of the system's precision coordinates. + recall : :py:class:`numpy.ndarray` or :py:class:`list` + A list of 1D np arrays containing the X coordinates of the plot, or + the recall, or a 2D np array in which the rows correspond to each + of the system's recall coordinates. + names : :py:class:`list` + An iterable over the names of each of the systems along the rows of + ``precision`` and ``recall`` + title : :py:class:`str`, optional + A title for the plot. If not set, omits the title + + Returns + ------- + matplotlib.figure.Figure + A matplotlib figure you can save or display + """ + import matplotlib + matplotlib.use('agg') + import matplotlib.pyplot as plt + from itertools import cycle + fig, ax1 = plt.subplots(1) + lines = ["-","--","-.",":"] + colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', + '#9467bd', '#8c564b', '#e377c2', '#7f7f7f', + '#bcbd22', '#17becf'] + colorcycler = cycle(colors) + linecycler = cycle(lines) + for p, r, pu, pl, ru, rl, n in zip(precision, recall, pr_upper, pr_lower, re_upper, re_lower, names): + # Plots only from the point where recall reaches its maximum, otherwise, we + # don't see a curve... + i = r.argmax() + pi = p[i:] + ri = r[i:] + pui = pu[i:] + pli = pl[i:] + rui = ru[i:] + rli = rl[i:] + valid = (pi+ri) > 0 + f1 = 2 * (pi[valid]*ri[valid]) / (pi[valid]+ri[valid]) + # optimal point along the curve + argmax = f1.argmax() + opi = pi[argmax] + ori = ri[argmax] + # Plot Recall/Precision as threshold changes + ax1.plot(ri[pi>0], pi[pi>0], next(linecycler), label='[F={:.4f}] {}'.format(f1.max(), n),) + ax1.plot(ori,opi, marker='o', linestyle=None, markersize=3, color='black') + # Plot confidence + # Upper bound + #ax1.plot(r95ui[p95ui>0], p95ui[p95ui>0]) + # Lower bound + #ax1.plot(r95li[p95li>0], p95li[p95li>0]) + # create the limiting polygon + vert_x = np.concatenate((rui[pui>0], rli[pli>0][::-1])) + vert_y = np.concatenate((pui[pui>0], pli[pli>0][::-1])) + # hacky workaround to plot 2nd human + if np.isclose(np.mean(rui), rui[1], rtol=1e-05): + print('found human') + p = plt.Polygon(np.column_stack((vert_x, vert_y)), facecolor='none', alpha=.2, edgecolor=next(colorcycler),lw=2) + else: + p = plt.Polygon(np.column_stack((vert_x, vert_y)), facecolor=next(colorcycler), alpha=.2, edgecolor='none',lw=.2) + ax1.add_artist(p) + ax1.grid(linestyle='--', linewidth=1, color='gray', alpha=0.2) + if len(names) > 1: + plt.legend(loc='lower left', framealpha=0.5) + ax1.set_xlabel('Recall') + ax1.set_ylabel('Precision') + ax1.set_xlim([0.0, 1.0]) + ax1.set_ylim([0.0, 1.0]) + if title is not None: ax1.set_title(title) + # Annotates plot with F1-score iso-lines + ax2 = ax1.twinx() + f_scores = np.linspace(0.1, 0.9, num=9) + tick_locs = [] + tick_labels = [] + for f_score in f_scores: + x = np.linspace(0.01, 1) + y = f_score * x / (2 * x - f_score) + l, = plt.plot(x[y >= 0], y[y >= 0], color='green', alpha=0.1) + tick_locs.append(y[-1]) + tick_labels.append('%.1f' % f_score) + ax2.tick_params(axis='y', which='both', pad=0, right=False, left=False) + ax2.set_ylabel('iso-F', color='green', alpha=0.3) + ax2.set_ylim([0.0, 1.0]) + ax2.yaxis.set_label_coords(1.015, 0.97) + ax2.set_yticks(tick_locs) #notice these are invisible + for k in ax2.set_yticklabels(tick_labels): + k.set_color('green') + k.set_alpha(0.3) + k.set_size(8) + # we should see some of axes 1 axes + ax1.spines['right'].set_visible(False) + ax1.spines['top'].set_visible(False) + ax1.spines['left'].set_position(('data', -0.015)) + ax1.spines['bottom'].set_position(('data', -0.015)) + # we shouldn't see any of axes 2 axes + ax2.spines['right'].set_visible(False) + ax2.spines['top'].set_visible(False) + ax2.spines['left'].set_visible(False) + ax2.spines['bottom'].set_visible(False) + plt.tight_layout() + return fig def loss_curve(df, title): """ Creates a loss curve given a Dataframe with column names: @@ -152,10 +266,18 @@ def read_metricscsv(file): next(metricsreader) precision = [] recall = [] + pr_upper = [] + pr_lower = [] + re_upper = [] + re_lower = [] for row in metricsreader: precision.append(float(row[1])) recall.append(float(row[2])) - return np.array(precision), np.array(recall) + pr_upper.append(float(row[8])) + pr_lower.append(float(row[9])) + re_upper.append(float(row[11])) + re_lower.append(float(row[12])) + return np.array(precision), np.array(recall), np.array(pr_upper), np.array(pr_lower), np.array(re_upper), np.array(re_lower) def plot_overview(outputfolders,title): @@ -174,14 +296,22 @@ def plot_overview(outputfolders,title): """ precisions = [] recalls = [] + pr_ups = [] + pr_lows = [] + re_ups = [] + re_lows = [] names = [] params = [] for folder in outputfolders: # metrics metrics_path = os.path.join(folder,'results/Metrics.csv') - pr, re = read_metricscsv(metrics_path) + pr, re, pr_upper, pr_lower, re_upper, re_lower = read_metricscsv(metrics_path) precisions.append(pr) recalls.append(re) + pr_ups.append(pr_upper) + pr_lows.append(pr_lower) + re_ups.append(re_upper) + re_lows.append(re_lower) modelname = folder.split('/')[-1] datasetname = folder.split('/')[-2] # parameters @@ -190,10 +320,11 @@ def plot_overview(outputfolders,title): rows = outfile.readlines() lastrow = rows[-1] parameter = int(lastrow.split()[1].replace(',','')) - name = '[P={:.2f}M] {} {}'.format(parameter/100**3, modelname, "") + #name = '[P={:.2f}M] {} {}'.format(parameter/100**3, modelname, "") + name = '{} '.format(modelname) names.append(name) #title = folder.split('/')[-4] - fig = precision_recall_f1iso(precisions,recalls,names,title) + fig = precision_recall_f1iso_confintval(precisions,recalls, pr_ups, pr_lows, re_ups, re_lows, names,title) return fig def metricsviz(dataset diff --git a/bob/ip/binseg/utils/rsttable.py b/bob/ip/binseg/utils/rsttable.py index 15a0b68d0f8a6decd08b80710ebd5d57b62a6dd9..1db04e45892c5993e711529cb6ebc4b4ea41a43a 100644 --- a/bob/ip/binseg/utils/rsttable.py +++ b/bob/ip/binseg/utils/rsttable.py @@ -8,11 +8,15 @@ def create_overview_grid(output_path): filename = 'Metrics.csv' metrics = get_paths(output_path,filename) f1s = [] + stds = [] models = [] databases = [] for m in metrics: metrics = pd.read_csv(m) maxf1 = metrics['f1_score'].max() + idmaxf1 = metrics['f1_score'].idxmax() + std = metrics['std_f1'][idmaxf1] + stds.append(std) f1s.append(maxf1) model = m.split('/')[-3] models.append(model) @@ -22,7 +26,11 @@ def create_overview_grid(output_path): df['database'] = databases df['model'] = models df['f1'] = f1s + df['std'] = stds pivot = df.pivot(index='database',columns='model',values='f1') + pivot2 = df.pivot(index='database',columns='model',values='std') with open (os.path.join(output_path,'Metrics_overview.rst'), "w+") as outfile: - outfile.write(tabulate(pivot,headers=pivot.columns, tablefmt="grid")) \ No newline at end of file + outfile.write(tabulate(pivot,headers=pivot.columns, tablefmt="grid")) + with open (os.path.join(output_path,'Metrics_overview_std.rst'), "w+") as outfile: + outfile.write(tabulate(pivot2,headers=pivot2.columns, tablefmt="grid")) \ No newline at end of file diff --git a/doc/benchmarkresults.rst b/doc/benchmarkresults.rst index c0d6ae0a689c16f19197352735aace7c38a58be7..2f391611db75358ffeb809058e5bf2242a474b93 100644 --- a/doc/benchmarkresults.rst +++ b/doc/benchmarkresults.rst @@ -16,55 +16,15 @@ F1 Scores +--------------------------------------------+------------------------------------------------+---------------------------------------------+-------------------------------------------+----------------------------------------------+---------------------------------------------+ | F1 (std) | :ref:`bob.ip.binseg.configs.datasets.chasedb1` | :ref:`bob.ip.binseg.configs.datasets.drive` | :ref:`bob.ip.binseg.configs.datasets.hrf` | :ref:`bob.ip.binseg.configs.datasets.iostar` | :ref:`bob.ip.binseg.configs.datasets.stare` | +--------------------------------------------+------------------------------------------------+---------------------------------------------+-------------------------------------------+----------------------------------------------+---------------------------------------------+ -| :ref:`bob.ip.binseg.configs.models.driu` | `0.8114 (0.0206) <driu_chasedb1.pth_>`_ | `0.8226 (0.0142) <driu_drive.pth_>`_ | `0.7865 (0.0545) <driu_hrf.pth_>`_ | `0.8273 (0.0199) <driu_iostar.pth_>`_ | `0.8286 (0.0368) <driu_stare.pth_>`_ | +| :ref:`bob.ip.binseg.configs.models.driu` | `0.810 (0.021) <driu_chasedb1.pth_>`_ | `0.820 (0.014) <driu_drive.pth_>`_ | `0.783 (0.055) <driu_hrf.pth_>`_ | `0.825 (0.020) <driu_iostar.pth_>`_ | `0.827 (0.037) <driu_stare.pth_>`_ | +--------------------------------------------+------------------------------------------------+---------------------------------------------+-------------------------------------------+----------------------------------------------+---------------------------------------------+ -| :ref:`bob.ip.binseg.configs.models.hed` | 0.8111 (0.0214) | 0.8192 (0.0136) | 0.7868 (0.0576) | 0.8275 (0.0201) | 0.8250 (0.0375) | +| :ref:`bob.ip.binseg.configs.models.hed` | 0.810 (0.022) | 0.817 (0.013) | 0.783 (0.058) | 0.825 (0.020) | 0.823 (0.037) | +--------------------------------------------+------------------------------------------------+---------------------------------------------+-------------------------------------------+----------------------------------------------+---------------------------------------------+ -| :ref:`bob.ip.binseg.configs.models.m2unet` | `0.8035 (0.0195) <m2unet_chasedb1.pth_>`_ | `0.8051 (0.0141) <m2unet_drive.pth_>`_ | `0.7838 (0.0572) <m2unet_hrf.pth_>`_ | `0.8194 (0.0201) <m2unet_iostar.pth_>`_ | `0.8174 (0.0409) <m2unet_stare.pth_>`_ | +| :ref:`bob.ip.binseg.configs.models.m2unet` | `0.802 (0.019) <m2unet_chasedb1.pth_>`_ | `0.803 (0.014) <m2unet_drive.pth_>`_ | `0.780 (0.057) <m2unet_hrf.pth_>`_ | `0.817 (0.020) <m2unet_iostar.pth_>`_ | `0.815 (0.041) <m2unet_stare.pth_>`_ | +--------------------------------------------+------------------------------------------------+---------------------------------------------+-------------------------------------------+----------------------------------------------+---------------------------------------------+ -| :ref:`bob.ip.binseg.configs.models.unet` | 0.8136 (0.0209) | 0.8237 (0.0145) | 0.7914 (0.0516) | 0.8203 (0.0190) | 0.8306 (0.0421) | +| :ref:`bob.ip.binseg.configs.models.unet` | 0.812 (0.020) | 0.822 (0.015) | 0.788 (0.051) | 0.818 (0.019) | 0.829 (0.042) | +--------------------------------------------+------------------------------------------------+---------------------------------------------+-------------------------------------------+----------------------------------------------+---------------------------------------------+ -.. figure:: img/pr_CHASEDB1.png - :scale: 30 % - :align: center - :alt: model comparisons - - CHASE_DB1: Precision vs Recall curve, F1 scores and - number of parameter of each model. - -.. figure:: img/pr_DRIVE.png - :scale: 30 % - :align: center - :alt: model comparisons - - DRIVE: Precision vs Recall curve, F1 scores and - number of parameter of each model. - -.. figure:: img/pr_HRF.png - :scale: 30 % - :align: center - :alt: model comparisons - - HRF: Precision vs Recall curve, F1 scores and - number of parameter of each model. - -.. figure:: img/pr_IOSTARVESSEL.png - :scale: 30 % - :align: center - :alt: model comparisons - - IOSTAR: Precision vs Recall curve, F1 scores and - number of parameter of each model. - -.. figure:: img/pr_STARE.png - :scale: 30 % - :align: center - :alt: model comparisons - - STARE: Precision vs Recall curve, F1 scores and - number of parameter of each model. - .. include:: links.rst diff --git a/doc/covdresults.rst b/doc/covdresults.rst index b3c5541408faf071f352865ef380fb0eef905aa0..95f7d7fe40f860b1c8af32f6beca365ee14e9eb1 100644 --- a/doc/covdresults.rst +++ b/doc/covdresults.rst @@ -12,26 +12,66 @@ F1 Scores F1 score together with standard deviation across test images. +---------------------------------------------------------+---------------------------------------------+-----------------------------------------------+---------------------------------------------------+ -| F1 score | :ref:`bob.ip.binseg.configs.models.driussl` | :ref:`bob.ip.binseg.configs.models.driubnssl` | :ref:`bob.ip.binseg.configs.models.m2unetssl` | +| F1 score | :ref:`bob.ip.binseg.configs.models.driu` | :ref:`bob.ip.binseg.configs.models.driubn` | :ref:`bob.ip.binseg.configs.models.m2unet` | +---------------------------------------------------------+---------------------------------------------+-----------------------------------------------+---------------------------------------------------+ -| :ref:`bob.ip.binseg.configs.datasets.covd-drive` | 0.7896 (0.0178) | 0.8000 (0.0182) | `0.7906 (0.0179) <m2unet_covd-drive.pth>`_ | +| :ref:`bob.ip.binseg.configs.datasets.covd-drive` | 0.788 (0.018) | 0.797 (0.019) | `0.789 (0.018) <m2unet_covd-drive.pth>`_ | +---------------------------------------------------------+---------------------------------------------+-----------------------------------------------+---------------------------------------------------+ -| :ref:`bob.ip.binseg.configs.datasets.covd-drive_ssl` | 0.7870 (0.0176) | 0.8020 (0.0179) | `0.7938 (0.0142) <m2unet_covd-drive_ssl.pth>`_ | +| :ref:`bob.ip.binseg.configs.datasets.covd-drive_ssl` | 0.785 (0.018) | 0.783 (0.019) | `0.791 (0.014) <m2unet_covd-drive_ssl.pth>`_ | +---------------------------------------------------------+---------------------------------------------+-----------------------------------------------+---------------------------------------------------+ -| :ref:`bob.ip.binseg.configs.datasets.covd-stare` | 0.7979 (0.1254) | 0.8129 (0.0986) | `0.8120 (0.0457) <m2unet_covd-stare.pth>`_ | +| :ref:`bob.ip.binseg.configs.datasets.covd-stare` | 0.778 (0.117) | 0.778 (0.122) | `0.812 (0.046) <m2unet_covd-stare.pth>`_ | +---------------------------------------------------------+---------------------------------------------+-----------------------------------------------+---------------------------------------------------+ -| :ref:`bob.ip.binseg.configs.datasets.covd-stare_ssl` | 0.8062 (0.1033) | 0.8221 (0.0784) | `0.8222 (0.0441) <m2unet_covd-stare_ssl.pth>`_ | +| :ref:`bob.ip.binseg.configs.datasets.covd-stare_ssl` | 0.788 (0.102) | 0.811 (0.074) | `0.820 (0.044) <m2unet_covd-stare_ssl.pth>`_ | +---------------------------------------------------------+---------------------------------------------+-----------------------------------------------+---------------------------------------------------+ -| :ref:`bob.ip.binseg.configs.datasets.covd-chasedb1` | 0.7979 (0.0284) | 0.7923 (0.0240) | `0.7898 (0.0236) <m2unet_covd-chasedb1.pth>`_ | +| :ref:`bob.ip.binseg.configs.datasets.covd-chasedb1` | 0.796 (0.027) | 0.791 (0.025) | `0.788 (0.024) <m2unet_covd-chasedb1.pth>`_ | +---------------------------------------------------------+---------------------------------------------+-----------------------------------------------+---------------------------------------------------+ -| :ref:`bob.ip.binseg.configs.datasets.covd-chasedb1_ssl` | 0.7976 (0.0242) | 0.7992 (0.0235) | `0.8000 (0.0268) <m2unet_covd-chasedb1_ssl.pth>`_ | +| :ref:`bob.ip.binseg.configs.datasets.covd-chasedb1_ssl` | 0.796 (0.024) | 0.798 (0.025) | `0.799 (0.026) <m2unet_covd-chasedb1_ssl.pth>`_ | +---------------------------------------------------------+---------------------------------------------+-----------------------------------------------+---------------------------------------------------+ -| :ref:`bob.ip.binseg.configs.datasets.covd-hrf` | 0.8013 (0.0436) | 0.8027 (0.0452) | `0.8036 (0.0442) <m2unet_covd-hrf.pth>`_ | +| :ref:`bob.ip.binseg.configs.datasets.covd-hrf` | 0.799 (0.044) | 0.800 (0.045) | `0.802 (0.045) <m2unet_covd-hrf.pth>`_ | +---------------------------------------------------------+---------------------------------------------+-----------------------------------------------+---------------------------------------------------+ -| :ref:`bob.ip.binseg.configs.datasets.covd-hrf_ssl` | 0.8002 (0.0421) | 0.7916 (0.0468) | `0.7987 (0.0436) <m2unet_covd-hrf_ssl.pth>`_ | +| :ref:`bob.ip.binseg.configs.datasets.covd-hrf_ssl` | 0.799 (0.044) | 0.784 (0.048) | `0.797 (0.044) <m2unet_covd-hrf_ssl.pth>`_ | +---------------------------------------------------------+---------------------------------------------+-----------------------------------------------+---------------------------------------------------+ -| :ref:`bob.ip.binseg.configs.datasets.covd-iostar` | 0.7934 (0.0206) | 0.7763 (0.0311) | `0.7953 (0.0152) <m2unet_covd-iostar.pth>`_ | +| :ref:`bob.ip.binseg.configs.datasets.covd-iostar` | 0.791 (0.021) | 0.777 (0.032) | `0.793 (0.015) <m2unet_covd-iostar.pth>`_ | +---------------------------------------------------------+---------------------------------------------+-----------------------------------------------+---------------------------------------------------+ -| :ref:`bob.ip.binseg.configs.datasets.covd-iostar_ssl` | 0.7995 (0.0174) | 0.7904 (0.0215) | `0.7868 (0.0182) <m2unet_covd-iostar_ssl.pth>`_ | +| :ref:`bob.ip.binseg.configs.datasets.covd-iostar_ssl` | 0.797 (0.017) | 0.811 (0.074) | `0.785 (0.018) <m2unet_covd-iostar_ssl.pth>`_ | +---------------------------------------------------------+---------------------------------------------+-----------------------------------------------+---------------------------------------------------+ +M2U-Net Precision vs. Recall Curves +=================================== + +Note that here the F1-score is calculated on a macro level (see paper for more details). + +.. figure:: img/pr_CHASEDB1.png + :scale: 50 % + :align: center + :alt: model comparisons + + CHASE_DB1: Precision vs Recall curve and F1 scores + +.. figure:: img/pr_DRIVE.png + :scale: 50 % + :align: center + :alt: model comparisons + + DRIVE: Precision vs Recall curve and F1 scores + +.. figure:: img/pr_HRF.png + :scale: 50 % + :align: center + :alt: model comparisons + + HRF: Precision vs Recall curve and F1 scores + +.. figure:: img/pr_IOSTARVESSEL.png + :scale: 50 % + :align: center + :alt: model comparisons + + IOSTAR: Precision vs Recall curve and F1 scores + +.. figure:: img/pr_STARE.png + :scale: 50 % + :align: center + :alt: model comparisons + + STARE: Precision vs Recall curve and F1 scores + diff --git a/doc/img/pr_CHASEDB1.png b/doc/img/pr_CHASEDB1.png index 7fe74f4e6178af9abc8fdda8c3d1142c992110c8..923a4af3445926fe46cb3ac58a3a73d28b12d9fc 100644 Binary files a/doc/img/pr_CHASEDB1.png and b/doc/img/pr_CHASEDB1.png differ diff --git a/doc/img/pr_DRIVE.png b/doc/img/pr_DRIVE.png index fc9e739e31c47bf319981dc6a561e335acfb261b..2aee69db83b3caaabcaa71e7c24d2eab7ca0eb7e 100644 Binary files a/doc/img/pr_DRIVE.png and b/doc/img/pr_DRIVE.png differ diff --git a/doc/img/pr_HRF.png b/doc/img/pr_HRF.png index ac6f870ece6c4fe9d439ba5c0d5e3914eea3bcbb..df479805f32dcd60770facfab285322d02534c68 100644 Binary files a/doc/img/pr_HRF.png and b/doc/img/pr_HRF.png differ diff --git a/doc/img/pr_IOSTARVESSEL.png b/doc/img/pr_IOSTARVESSEL.png index 97ed5c7a6b8f0d7ab6c0786db55588d7b163e9bb..e7c1b9a9203473d50a21adf938c9f4ccb0abe034 100644 Binary files a/doc/img/pr_IOSTARVESSEL.png and b/doc/img/pr_IOSTARVESSEL.png differ diff --git a/doc/img/pr_STARE.png b/doc/img/pr_STARE.png index 14603d2d3782292e66c813685fc61bca60953976..c485243f5ecb3350685a327fd61e9a12d457e71d 100644 Binary files a/doc/img/pr_STARE.png and b/doc/img/pr_STARE.png differ diff --git a/doc/index.rst b/doc/index.rst index 97d792b87b1b2e40dd566f373220540955dbab48..18d8d561e8e5aff351f4e411a6496edad5458c07 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -23,6 +23,7 @@ Users Guide benchmarkresults covdresults configs + plotting visualization api diff --git a/doc/plotting.rst b/doc/plotting.rst new file mode 100644 index 0000000000000000000000000000000000000000..f05ee42d12572211ffe4037a1c7d706283042a63 --- /dev/null +++ b/doc/plotting.rst @@ -0,0 +1,20 @@ +.. -*- coding: utf-8 -*- +.. _bob.ip.binseg.plotting: + +======== +Plotting +======== + +Precision vs recall curves for each evaluation run are generated by default and +stored in the ``results`` subfolder of the model output directory. + +To generate a comparison chart of various models use the ``compare`` command +and pass as arguments the output paths of the models you would like to plot. + +E.g.: + +.. code-block:: bash + + bob binseg compare -o myoutput -l myoutput/DRIVE/M2U-Net myoutput/DRIVE/U-Net myoutput/DRIVE/HED -t MyPlotTitle + +Use ``bob binseg compare --help`` for more information.