From c485510f099f888a58cbea3a6d25c32186b3bb8f Mon Sep 17 00:00:00 2001
From: Andre Anjos <andre.anjos@idiap.ch>
Date: Mon, 16 Mar 2020 18:21:09 +0100
Subject: [PATCH] [script/binseg] In evalpred, allow user to specify the
 prediction map extension, but defaults to .png

---
 bob/ip/binseg/script/binseg.py  | 35 ++++++++------
 bob/ip/binseg/utils/evaluate.py | 83 +++++++++++++++++----------------
 2 files changed, 65 insertions(+), 53 deletions(-)

diff --git a/bob/ip/binseg/script/binseg.py b/bob/ip/binseg/script/binseg.py
index f78fe404..944a9995 100644
--- a/bob/ip/binseg/script/binseg.py
+++ b/bob/ip/binseg/script/binseg.py
@@ -139,7 +139,7 @@ def train(model
         ,seed
         ,**kwargs):
     """ Train a model """
-    
+
     if not os.path.exists(output_path): os.makedirs(output_path)
     torch.manual_seed(seed)
     # PyTorch dataloader
@@ -153,11 +153,11 @@ def train(model
     # Checkpointer
     checkpointer = DetectronCheckpointer(model, optimizer, scheduler,save_dir = output_path, save_to_disk=True)
     arguments = {}
-    arguments["epoch"] = 0 
+    arguments["epoch"] = 0
     extra_checkpoint_data = checkpointer.load(pretrained_backbone)
     arguments.update(extra_checkpoint_data)
     arguments["max_epoch"] = epochs
-    
+
     # Train
     logger.info("Training for {} epochs".format(arguments["max_epoch"]))
     logger.info("Continuing from epoch {}".format(arguments["epoch"]))
@@ -234,7 +234,7 @@ def test(model
         ,shuffle= False
         ,pin_memory = torch.cuda.is_available()
         )
-    
+
     # checkpointer, load last model in dir
     checkpointer = DetectronCheckpointer(model, save_dir = output_path, save_to_disk=False)
     checkpointer.load(weight)
@@ -283,7 +283,7 @@ def compare(output_path_list, output_path, title, **kwargs):
 @verbosity_option(cls=ResourceOption)
 def gridtable(output_path, **kwargs):
     """ Creates an overview table in grid rst format for all Metrics.csv in the output_path
-    tree structure: 
+    tree structure:
         ├── DATABASE
         ├── MODEL
             ├── images
@@ -312,7 +312,7 @@ def visualize(dataset, output_path, **kwargs):
     overlayed: test images overlayed with prediction probabilities vessel tree
     tpfnfpviz: highlights true positives, false negatives and false positives
 
-    Required tree structure: 
+    Required tree structure:
     ├── DATABASE
         ├── MODEL
             ├── images
@@ -431,7 +431,7 @@ def ssltrain(model
         ,seed
         ,**kwargs):
     """ Train a model """
-    
+
     if not os.path.exists(output_path): os.makedirs(output_path)
     torch.manual_seed(seed)
     # PyTorch dataloader
@@ -445,11 +445,11 @@ def ssltrain(model
     # Checkpointer
     checkpointer = DetectronCheckpointer(model, optimizer, scheduler,save_dir = output_path, save_to_disk=True)
     arguments = {}
-    arguments["epoch"] = 0 
+    arguments["epoch"] = 0
     extra_checkpoint_data = checkpointer.load(pretrained_backbone)
     arguments.update(extra_checkpoint_data)
     arguments["max_epoch"] = epochs
-    
+
     # Train
     logger.info("Training for {} epochs".format(arguments["max_epoch"]))
     logger.info("Continuing from epoch {}".format(arguments["epoch"]))
@@ -553,7 +553,7 @@ def predict(model
         ,shuffle= False
         ,pin_memory = torch.cuda.is_available()
         )
-    
+
     # checkpointer, load last model in dir
     checkpointer = DetectronCheckpointer(model, save_dir = output_path, save_to_disk=False)
     checkpointer.load(weight)
@@ -580,6 +580,14 @@ def predict(model
     required=True,
     cls=ResourceOption
     )
+@click.option(
+    '--prediction-extension',
+    '-x',
+    help = 'Extension (e.g. ".png") for the prediction files',
+    default=".png",
+    required=False,
+    cls=ResourceOption
+    )
 @click.option(
     '--dataset',
     '-d',
@@ -600,6 +608,7 @@ def predict(model
 def evalpred(
         output_path
         ,prediction_folder
+        ,prediction_extension
         ,dataset
         ,title
         ,legend
@@ -613,9 +622,9 @@ def evalpred(
         ,shuffle= False
         ,pin_memory = torch.cuda.is_available()
         )
-    
+
     # Run eval
-    do_eval(prediction_folder, data_loader, output_folder = output_path, title= title, legend=legend)
+    do_eval(prediction_folder, data_loader, output_folder = output_path, title=title, legend=legend, prediction_extension=prediction_extension)
+
 
 
-    
\ No newline at end of file
diff --git a/bob/ip/binseg/utils/evaluate.py b/bob/ip/binseg/utils/evaluate.py
index 68257128..99259f41 100644
--- a/bob/ip/binseg/utils/evaluate.py
+++ b/bob/ip/binseg/utils/evaluate.py
@@ -1,8 +1,8 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 # only use to evaluate 2nd human annotator
-#  
-import os 
+#
+import os
 import logging
 import time
 import datetime
@@ -30,7 +30,7 @@ def batch_metrics(predictions, ground_truths, names, output_folder, logger):
     ground_truths : :py:class:`torch.Tensor`
         tensor with binary ground-truth
     names : list
-        list of file names 
+        list of file names
     output_folder : str
         output path
     logger : :py:class:`logging.Logger`
@@ -38,7 +38,7 @@ def batch_metrics(predictions, ground_truths, names, output_folder, logger):
 
     Returns
     -------
-    list 
+    list
         list containing batch metrics: ``[name, threshold, precision, recall, specificity, accuracy, jaccard, f1_score]``
     """
     step_size = 0.01
@@ -50,25 +50,25 @@ def batch_metrics(predictions, ground_truths, names, output_folder, logger):
 
         file_name = "{}.csv".format(names[j])
         logger.info("saving {}".format(file_name))
-        
+
         with open (os.path.join(output_folder,file_name), "w+") as outfile:
 
             outfile.write("threshold, precision, recall, specificity, accuracy, jaccard, f1_score\n")
 
-            for threshold in np.arange(0.0,1.0,step_size):        
+            for threshold in np.arange(0.0,1.0,step_size):
                 # threshold
                 binary_pred = torch.gt(predictions[j], threshold).byte()
 
                 # equals and not-equals
                 equals = torch.eq(binary_pred, gts) # tensor
                 notequals = torch.ne(binary_pred, gts) # tensor
-                
-                # true positives 
+
+                # true positives
                 tp_tensor = (gts * binary_pred ) # tensor
                 tp_count = torch.sum(tp_tensor).item() # scalar
 
-                # false positives 
-                fp_tensor = torch.eq((binary_pred + tp_tensor), 1) 
+                # false positives
+                fp_tensor = torch.eq((binary_pred + tp_tensor), 1)
                 fp_count = torch.sum(fp_tensor).item()
 
                 # true negatives
@@ -80,14 +80,14 @@ def batch_metrics(predictions, ground_truths, names, output_folder, logger):
                 fn_count = torch.sum(fn_tensor).item()
 
                 # calc metrics
-                metrics = base_metrics(tp_count, fp_count, tn_count, fn_count)    
-                
-                # write to disk 
+                metrics = base_metrics(tp_count, fp_count, tn_count, fn_count)
+
+                # write to disk
                 outfile.write("{:.2f},{:.5f},{:.5f},{:.5f},{:.5f},{:.5f},{:.5f} \n".format(threshold, *metrics))
-                
+
                 batch_metrics.append([names[j],threshold, *metrics ])
 
-    
+
     return batch_metrics
 
 
@@ -97,12 +97,13 @@ def do_eval(
     data_loader,
     output_folder = None,
     title = '2nd human',
-    legend = '2nd human'
+    legend = '2nd human',
+    prediction_extension = None,
 ):
 
     """
     Calculate metrics on saved prediction images (needs batch_size = 1 !)
-    
+
     Parameters
     ---------
     model : :py:class:`torch.nn.Module`
@@ -115,55 +116,57 @@ def do_eval(
     logger = logging.getLogger("bob.ip.binseg.engine.evaluate")
     logger.info("Start evaluation")
     logger.info("Prediction folder {}".format(prediction_folder))
-    results_subfolder = os.path.join(output_folder,'results') 
+    results_subfolder = os.path.join(output_folder,'results')
     os.makedirs(results_subfolder,exist_ok=True)
-    
-    
-    # Collect overall metrics 
+
+
+    # Collect overall metrics
     metrics = []
     num_images = len(data_loader)
     for samples in tqdm(data_loader):
         names = samples[0]
         images = samples[1]
         ground_truths = samples[2]
-      
-    
-        pred_file = os.path.join(prediction_folder,names[0])
-        probabilities = Image.open(pred_file)    
+
+        if prediction_extension is None:
+            pred_file = os.path.join(prediction_folder,names[0])
+        else:
+            pred_file = os.path.join(prediction_folder,os.path.splitext(names[0])[0] + '.png')
+        probabilities = Image.open(pred_file)
         probabilities = probabilities.convert(mode='L')
         probabilities = to_tensor(probabilities)
 
-            
+
         b_metrics = batch_metrics(probabilities, ground_truths, names,results_subfolder, logger)
         metrics.extend(b_metrics)
-            
 
 
-    # DataFrame 
+
+    # DataFrame
     df_metrics = pd.DataFrame(metrics,columns= \
                            ["name",
                             "threshold",
-                            "precision", 
-                            "recall", 
-                            "specificity", 
-                            "accuracy", 
-                            "jaccard", 
+                            "precision",
+                            "recall",
+                            "specificity",
+                            "accuracy",
+                            "jaccard",
                             "f1_score"])
 
     # Report and Averages
     metrics_file = "Metrics.csv"
     metrics_path = os.path.join(results_subfolder, metrics_file)
     logger.info("Saving average over all input images: {}".format(metrics_file))
-    
+
     avg_metrics = df_metrics.groupby('threshold').mean()
     std_metrics = df_metrics.groupby('threshold').std()
 
-    # Uncomment below for F1-score calculation based on average precision and metrics instead of 
+    # Uncomment below for F1-score calculation based on average precision and metrics instead of
     # F1-scores of individual images. This method is in line with Maninis et. al. (2016)
     #avg_metrics["f1_score"] =  (2* avg_metrics["precision"]*avg_metrics["recall"])/ \
     #    (avg_metrics["precision"]+avg_metrics["recall"])
-    
-    
+
+
     avg_metrics["std_pr"] = std_metrics["precision"]
     avg_metrics["pr_upper"] = avg_metrics['precision'] + avg_metrics["std_pr"]
     avg_metrics["pr_lower"] = avg_metrics['precision'] - avg_metrics["std_pr"]
@@ -171,13 +174,13 @@ def do_eval(
     avg_metrics["re_upper"] = avg_metrics['recall'] + avg_metrics["std_re"]
     avg_metrics["re_lower"] = avg_metrics['recall'] - avg_metrics["std_re"]
     avg_metrics["std_f1"] = std_metrics["f1_score"]
-    
+
     avg_metrics.to_csv(metrics_path)
     maxf1 = avg_metrics['f1_score'].max()
     optimal_f1_threshold = avg_metrics['f1_score'].idxmax()
-    
+
     logger.info("Highest F1-score of {:.5f}, achieved at threshold {}".format(maxf1, optimal_f1_threshold))
-    
+
     # Plotting
     #print(avg_metrics)
     np_avg_metrics = avg_metrics.to_numpy().T
-- 
GitLab