From c485510f099f888a58cbea3a6d25c32186b3bb8f Mon Sep 17 00:00:00 2001 From: Andre Anjos <andre.anjos@idiap.ch> Date: Mon, 16 Mar 2020 18:21:09 +0100 Subject: [PATCH] [script/binseg] In evalpred, allow user to specify the prediction map extension, but defaults to .png --- bob/ip/binseg/script/binseg.py | 35 ++++++++------ bob/ip/binseg/utils/evaluate.py | 83 +++++++++++++++++---------------- 2 files changed, 65 insertions(+), 53 deletions(-) diff --git a/bob/ip/binseg/script/binseg.py b/bob/ip/binseg/script/binseg.py index f78fe404..944a9995 100644 --- a/bob/ip/binseg/script/binseg.py +++ b/bob/ip/binseg/script/binseg.py @@ -139,7 +139,7 @@ def train(model ,seed ,**kwargs): """ Train a model """ - + if not os.path.exists(output_path): os.makedirs(output_path) torch.manual_seed(seed) # PyTorch dataloader @@ -153,11 +153,11 @@ def train(model # Checkpointer checkpointer = DetectronCheckpointer(model, optimizer, scheduler,save_dir = output_path, save_to_disk=True) arguments = {} - arguments["epoch"] = 0 + arguments["epoch"] = 0 extra_checkpoint_data = checkpointer.load(pretrained_backbone) arguments.update(extra_checkpoint_data) arguments["max_epoch"] = epochs - + # Train logger.info("Training for {} epochs".format(arguments["max_epoch"])) logger.info("Continuing from epoch {}".format(arguments["epoch"])) @@ -234,7 +234,7 @@ def test(model ,shuffle= False ,pin_memory = torch.cuda.is_available() ) - + # checkpointer, load last model in dir checkpointer = DetectronCheckpointer(model, save_dir = output_path, save_to_disk=False) checkpointer.load(weight) @@ -283,7 +283,7 @@ def compare(output_path_list, output_path, title, **kwargs): @verbosity_option(cls=ResourceOption) def gridtable(output_path, **kwargs): """ Creates an overview table in grid rst format for all Metrics.csv in the output_path - tree structure: + tree structure: ├── DATABASE ├── MODEL ├── images @@ -312,7 +312,7 @@ def visualize(dataset, output_path, **kwargs): overlayed: test images overlayed with prediction probabilities vessel tree tpfnfpviz: highlights true positives, false negatives and false positives - Required tree structure: + Required tree structure: ├── DATABASE ├── MODEL ├── images @@ -431,7 +431,7 @@ def ssltrain(model ,seed ,**kwargs): """ Train a model """ - + if not os.path.exists(output_path): os.makedirs(output_path) torch.manual_seed(seed) # PyTorch dataloader @@ -445,11 +445,11 @@ def ssltrain(model # Checkpointer checkpointer = DetectronCheckpointer(model, optimizer, scheduler,save_dir = output_path, save_to_disk=True) arguments = {} - arguments["epoch"] = 0 + arguments["epoch"] = 0 extra_checkpoint_data = checkpointer.load(pretrained_backbone) arguments.update(extra_checkpoint_data) arguments["max_epoch"] = epochs - + # Train logger.info("Training for {} epochs".format(arguments["max_epoch"])) logger.info("Continuing from epoch {}".format(arguments["epoch"])) @@ -553,7 +553,7 @@ def predict(model ,shuffle= False ,pin_memory = torch.cuda.is_available() ) - + # checkpointer, load last model in dir checkpointer = DetectronCheckpointer(model, save_dir = output_path, save_to_disk=False) checkpointer.load(weight) @@ -580,6 +580,14 @@ def predict(model required=True, cls=ResourceOption ) +@click.option( + '--prediction-extension', + '-x', + help = 'Extension (e.g. ".png") for the prediction files', + default=".png", + required=False, + cls=ResourceOption + ) @click.option( '--dataset', '-d', @@ -600,6 +608,7 @@ def predict(model def evalpred( output_path ,prediction_folder + ,prediction_extension ,dataset ,title ,legend @@ -613,9 +622,9 @@ def evalpred( ,shuffle= False ,pin_memory = torch.cuda.is_available() ) - + # Run eval - do_eval(prediction_folder, data_loader, output_folder = output_path, title= title, legend=legend) + do_eval(prediction_folder, data_loader, output_folder = output_path, title=title, legend=legend, prediction_extension=prediction_extension) + - \ No newline at end of file diff --git a/bob/ip/binseg/utils/evaluate.py b/bob/ip/binseg/utils/evaluate.py index 68257128..99259f41 100644 --- a/bob/ip/binseg/utils/evaluate.py +++ b/bob/ip/binseg/utils/evaluate.py @@ -1,8 +1,8 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- # only use to evaluate 2nd human annotator -# -import os +# +import os import logging import time import datetime @@ -30,7 +30,7 @@ def batch_metrics(predictions, ground_truths, names, output_folder, logger): ground_truths : :py:class:`torch.Tensor` tensor with binary ground-truth names : list - list of file names + list of file names output_folder : str output path logger : :py:class:`logging.Logger` @@ -38,7 +38,7 @@ def batch_metrics(predictions, ground_truths, names, output_folder, logger): Returns ------- - list + list list containing batch metrics: ``[name, threshold, precision, recall, specificity, accuracy, jaccard, f1_score]`` """ step_size = 0.01 @@ -50,25 +50,25 @@ def batch_metrics(predictions, ground_truths, names, output_folder, logger): file_name = "{}.csv".format(names[j]) logger.info("saving {}".format(file_name)) - + with open (os.path.join(output_folder,file_name), "w+") as outfile: outfile.write("threshold, precision, recall, specificity, accuracy, jaccard, f1_score\n") - for threshold in np.arange(0.0,1.0,step_size): + for threshold in np.arange(0.0,1.0,step_size): # threshold binary_pred = torch.gt(predictions[j], threshold).byte() # equals and not-equals equals = torch.eq(binary_pred, gts) # tensor notequals = torch.ne(binary_pred, gts) # tensor - - # true positives + + # true positives tp_tensor = (gts * binary_pred ) # tensor tp_count = torch.sum(tp_tensor).item() # scalar - # false positives - fp_tensor = torch.eq((binary_pred + tp_tensor), 1) + # false positives + fp_tensor = torch.eq((binary_pred + tp_tensor), 1) fp_count = torch.sum(fp_tensor).item() # true negatives @@ -80,14 +80,14 @@ def batch_metrics(predictions, ground_truths, names, output_folder, logger): fn_count = torch.sum(fn_tensor).item() # calc metrics - metrics = base_metrics(tp_count, fp_count, tn_count, fn_count) - - # write to disk + metrics = base_metrics(tp_count, fp_count, tn_count, fn_count) + + # write to disk outfile.write("{:.2f},{:.5f},{:.5f},{:.5f},{:.5f},{:.5f},{:.5f} \n".format(threshold, *metrics)) - + batch_metrics.append([names[j],threshold, *metrics ]) - + return batch_metrics @@ -97,12 +97,13 @@ def do_eval( data_loader, output_folder = None, title = '2nd human', - legend = '2nd human' + legend = '2nd human', + prediction_extension = None, ): """ Calculate metrics on saved prediction images (needs batch_size = 1 !) - + Parameters --------- model : :py:class:`torch.nn.Module` @@ -115,55 +116,57 @@ def do_eval( logger = logging.getLogger("bob.ip.binseg.engine.evaluate") logger.info("Start evaluation") logger.info("Prediction folder {}".format(prediction_folder)) - results_subfolder = os.path.join(output_folder,'results') + results_subfolder = os.path.join(output_folder,'results') os.makedirs(results_subfolder,exist_ok=True) - - - # Collect overall metrics + + + # Collect overall metrics metrics = [] num_images = len(data_loader) for samples in tqdm(data_loader): names = samples[0] images = samples[1] ground_truths = samples[2] - - - pred_file = os.path.join(prediction_folder,names[0]) - probabilities = Image.open(pred_file) + + if prediction_extension is None: + pred_file = os.path.join(prediction_folder,names[0]) + else: + pred_file = os.path.join(prediction_folder,os.path.splitext(names[0])[0] + '.png') + probabilities = Image.open(pred_file) probabilities = probabilities.convert(mode='L') probabilities = to_tensor(probabilities) - + b_metrics = batch_metrics(probabilities, ground_truths, names,results_subfolder, logger) metrics.extend(b_metrics) - - # DataFrame + + # DataFrame df_metrics = pd.DataFrame(metrics,columns= \ ["name", "threshold", - "precision", - "recall", - "specificity", - "accuracy", - "jaccard", + "precision", + "recall", + "specificity", + "accuracy", + "jaccard", "f1_score"]) # Report and Averages metrics_file = "Metrics.csv" metrics_path = os.path.join(results_subfolder, metrics_file) logger.info("Saving average over all input images: {}".format(metrics_file)) - + avg_metrics = df_metrics.groupby('threshold').mean() std_metrics = df_metrics.groupby('threshold').std() - # Uncomment below for F1-score calculation based on average precision and metrics instead of + # Uncomment below for F1-score calculation based on average precision and metrics instead of # F1-scores of individual images. This method is in line with Maninis et. al. (2016) #avg_metrics["f1_score"] = (2* avg_metrics["precision"]*avg_metrics["recall"])/ \ # (avg_metrics["precision"]+avg_metrics["recall"]) - - + + avg_metrics["std_pr"] = std_metrics["precision"] avg_metrics["pr_upper"] = avg_metrics['precision'] + avg_metrics["std_pr"] avg_metrics["pr_lower"] = avg_metrics['precision'] - avg_metrics["std_pr"] @@ -171,13 +174,13 @@ def do_eval( avg_metrics["re_upper"] = avg_metrics['recall'] + avg_metrics["std_re"] avg_metrics["re_lower"] = avg_metrics['recall'] - avg_metrics["std_re"] avg_metrics["std_f1"] = std_metrics["f1_score"] - + avg_metrics.to_csv(metrics_path) maxf1 = avg_metrics['f1_score'].max() optimal_f1_threshold = avg_metrics['f1_score'].idxmax() - + logger.info("Highest F1-score of {:.5f}, achieved at threshold {}".format(maxf1, optimal_f1_threshold)) - + # Plotting #print(avg_metrics) np_avg_metrics = avg_metrics.to_numpy().T -- GitLab