Skip to content
Snippets Groups Projects
Commit f562a264 authored by André Anjos's avatar André Anjos :speech_balloon:
Browse files

Merge branch 'minor-eval-fixes' into 'master'

Minor evaluation fixes

See merge request bob/bob.ip.binseg!11
parents f2f202e7 f28cf5f6
No related branches found
Tags v2.0.2
1 merge request!11Minor evaluation fixes
Pipeline #38131 passed
......@@ -139,7 +139,7 @@ def train(model
,seed
,**kwargs):
""" Train a model """
if not os.path.exists(output_path): os.makedirs(output_path)
torch.manual_seed(seed)
# PyTorch dataloader
......@@ -153,11 +153,11 @@ def train(model
# Checkpointer
checkpointer = DetectronCheckpointer(model, optimizer, scheduler,save_dir = output_path, save_to_disk=True)
arguments = {}
arguments["epoch"] = 0
arguments["epoch"] = 0
extra_checkpoint_data = checkpointer.load(pretrained_backbone)
arguments.update(extra_checkpoint_data)
arguments["max_epoch"] = epochs
# Train
logger.info("Training for {} epochs".format(arguments["max_epoch"]))
logger.info("Continuing from epoch {}".format(arguments["epoch"]))
......@@ -234,7 +234,7 @@ def test(model
,shuffle= False
,pin_memory = torch.cuda.is_available()
)
# checkpointer, load last model in dir
checkpointer = DetectronCheckpointer(model, save_dir = output_path, save_to_disk=False)
checkpointer.load(weight)
......@@ -283,7 +283,7 @@ def compare(output_path_list, output_path, title, **kwargs):
@verbosity_option(cls=ResourceOption)
def gridtable(output_path, **kwargs):
""" Creates an overview table in grid rst format for all Metrics.csv in the output_path
tree structure:
tree structure:
├── DATABASE
├── MODEL
├── images
......@@ -312,7 +312,7 @@ def visualize(dataset, output_path, **kwargs):
overlayed: test images overlayed with prediction probabilities vessel tree
tpfnfpviz: highlights true positives, false negatives and false positives
Required tree structure:
Required tree structure:
├── DATABASE
├── MODEL
├── images
......@@ -431,7 +431,7 @@ def ssltrain(model
,seed
,**kwargs):
""" Train a model """
if not os.path.exists(output_path): os.makedirs(output_path)
torch.manual_seed(seed)
# PyTorch dataloader
......@@ -445,11 +445,11 @@ def ssltrain(model
# Checkpointer
checkpointer = DetectronCheckpointer(model, optimizer, scheduler,save_dir = output_path, save_to_disk=True)
arguments = {}
arguments["epoch"] = 0
arguments["epoch"] = 0
extra_checkpoint_data = checkpointer.load(pretrained_backbone)
arguments.update(extra_checkpoint_data)
arguments["max_epoch"] = epochs
# Train
logger.info("Training for {} epochs".format(arguments["max_epoch"]))
logger.info("Continuing from epoch {}".format(arguments["epoch"]))
......@@ -553,7 +553,7 @@ def predict(model
,shuffle= False
,pin_memory = torch.cuda.is_available()
)
# checkpointer, load last model in dir
checkpointer = DetectronCheckpointer(model, save_dir = output_path, save_to_disk=False)
checkpointer.load(weight)
......@@ -580,6 +580,14 @@ def predict(model
required=True,
cls=ResourceOption
)
@click.option(
'--prediction-extension',
'-x',
help = 'Extension (e.g. ".png") for the prediction files',
default=".png",
required=False,
cls=ResourceOption
)
@click.option(
'--dataset',
'-d',
......@@ -600,6 +608,7 @@ def predict(model
def evalpred(
output_path
,prediction_folder
,prediction_extension
,dataset
,title
,legend
......@@ -613,9 +622,9 @@ def evalpred(
,shuffle= False
,pin_memory = torch.cuda.is_available()
)
# Run eval
do_eval(prediction_folder, data_loader, output_folder = output_path, title= title, legend=legend)
do_eval(prediction_folder, data_loader, output_folder = output_path, title=title, legend=legend, prediction_extension=prediction_extension)
\ No newline at end of file
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# only use to evaluate 2nd human annotator
#
import os
#
import os
import logging
import time
import datetime
......@@ -30,7 +30,7 @@ def batch_metrics(predictions, ground_truths, names, output_folder, logger):
ground_truths : :py:class:`torch.Tensor`
tensor with binary ground-truth
names : list
list of file names
list of file names
output_folder : str
output path
logger : :py:class:`logging.Logger`
......@@ -38,7 +38,7 @@ def batch_metrics(predictions, ground_truths, names, output_folder, logger):
Returns
-------
list
list
list containing batch metrics: ``[name, threshold, precision, recall, specificity, accuracy, jaccard, f1_score]``
"""
step_size = 0.01
......@@ -50,25 +50,25 @@ def batch_metrics(predictions, ground_truths, names, output_folder, logger):
file_name = "{}.csv".format(names[j])
logger.info("saving {}".format(file_name))
with open (os.path.join(output_folder,file_name), "w+") as outfile:
outfile.write("threshold, precision, recall, specificity, accuracy, jaccard, f1_score\n")
for threshold in np.arange(0.0,1.0,step_size):
for threshold in np.arange(0.0,1.0,step_size):
# threshold
binary_pred = torch.gt(predictions[j], threshold).byte()
# equals and not-equals
equals = torch.eq(binary_pred, gts) # tensor
notequals = torch.ne(binary_pred, gts) # tensor
# true positives
# true positives
tp_tensor = (gts * binary_pred ) # tensor
tp_count = torch.sum(tp_tensor).item() # scalar
# false positives
fp_tensor = torch.eq((binary_pred + tp_tensor), 1)
# false positives
fp_tensor = torch.eq((binary_pred + tp_tensor), 1)
fp_count = torch.sum(fp_tensor).item()
# true negatives
......@@ -80,14 +80,14 @@ def batch_metrics(predictions, ground_truths, names, output_folder, logger):
fn_count = torch.sum(fn_tensor).item()
# calc metrics
metrics = base_metrics(tp_count, fp_count, tn_count, fn_count)
# write to disk
metrics = base_metrics(tp_count, fp_count, tn_count, fn_count)
# write to disk
outfile.write("{:.2f},{:.5f},{:.5f},{:.5f},{:.5f},{:.5f},{:.5f} \n".format(threshold, *metrics))
batch_metrics.append([names[j],threshold, *metrics ])
return batch_metrics
......@@ -97,12 +97,13 @@ def do_eval(
data_loader,
output_folder = None,
title = '2nd human',
legend = '2nd human'
legend = '2nd human',
prediction_extension = None,
):
"""
Calculate metrics on saved prediction images (needs batch_size = 1 !)
Parameters
---------
model : :py:class:`torch.nn.Module`
......@@ -115,55 +116,57 @@ def do_eval(
logger = logging.getLogger("bob.ip.binseg.engine.evaluate")
logger.info("Start evaluation")
logger.info("Prediction folder {}".format(prediction_folder))
results_subfolder = os.path.join(output_folder,'results')
results_subfolder = os.path.join(output_folder,'results')
os.makedirs(results_subfolder,exist_ok=True)
# Collect overall metrics
# Collect overall metrics
metrics = []
num_images = len(data_loader)
for samples in tqdm(data_loader):
names = samples[0]
images = samples[1]
ground_truths = samples[2]
pred_file = os.path.join(prediction_folder,names[0])
probabilities = Image.open(pred_file)
if prediction_extension is None:
pred_file = os.path.join(prediction_folder,names[0])
else:
pred_file = os.path.join(prediction_folder,os.path.splitext(names[0])[0] + '.png')
probabilities = Image.open(pred_file)
probabilities = probabilities.convert(mode='L')
probabilities = to_tensor(probabilities)
b_metrics = batch_metrics(probabilities, ground_truths, names,results_subfolder, logger)
metrics.extend(b_metrics)
# DataFrame
# DataFrame
df_metrics = pd.DataFrame(metrics,columns= \
["name",
"threshold",
"precision",
"recall",
"specificity",
"accuracy",
"jaccard",
"precision",
"recall",
"specificity",
"accuracy",
"jaccard",
"f1_score"])
# Report and Averages
metrics_file = "Metrics.csv"
metrics_path = os.path.join(results_subfolder, metrics_file)
logger.info("Saving average over all input images: {}".format(metrics_file))
avg_metrics = df_metrics.groupby('threshold').mean()
std_metrics = df_metrics.groupby('threshold').std()
# Uncomment below for F1-score calculation based on average precision and metrics instead of
# Uncomment below for F1-score calculation based on average precision and metrics instead of
# F1-scores of individual images. This method is in line with Maninis et. al. (2016)
#avg_metrics["f1_score"] = (2* avg_metrics["precision"]*avg_metrics["recall"])/ \
# (avg_metrics["precision"]+avg_metrics["recall"])
avg_metrics["std_pr"] = std_metrics["precision"]
avg_metrics["pr_upper"] = avg_metrics['precision'] + avg_metrics["std_pr"]
avg_metrics["pr_lower"] = avg_metrics['precision'] - avg_metrics["std_pr"]
......@@ -171,13 +174,13 @@ def do_eval(
avg_metrics["re_upper"] = avg_metrics['recall'] + avg_metrics["std_re"]
avg_metrics["re_lower"] = avg_metrics['recall'] - avg_metrics["std_re"]
avg_metrics["std_f1"] = std_metrics["f1_score"]
avg_metrics.to_csv(metrics_path)
maxf1 = avg_metrics['f1_score'].max()
optimal_f1_threshold = avg_metrics['f1_score'].idxmax()
logger.info("Highest F1-score of {:.5f}, achieved at threshold {}".format(maxf1, optimal_f1_threshold))
# Plotting
#print(avg_metrics)
np_avg_metrics = avg_metrics.to_numpy().T
......
......@@ -313,14 +313,6 @@ def plot_overview(outputfolders,title):
re_ups.append(re_upper)
re_lows.append(re_lower)
modelname = folder.split('/')[-1]
datasetname = folder.split('/')[-2]
# parameters
summary_path = os.path.join(folder,'results/ModelSummary.txt')
with open (summary_path, "r") as outfile:
rows = outfile.readlines()
lastrow = rows[-1]
parameter = int(lastrow.split()[1].replace(',',''))
#name = '[P={:.2f}M] {} {}'.format(parameter/100**3, modelname, "")
name = '{} '.format(modelname)
names.append(name)
#title = folder.split('/')[-4]
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment