Skip to content
Snippets Groups Projects
Commit f9e14859 authored by André Anjos's avatar André Anjos :speech_balloon:
Browse files

[data/imagefolderinference] Accept globs

parent a8bc03de
No related branches found
No related tags found
1 merge request!9Minor fixes
...@@ -7,7 +7,8 @@ from bob.ip.binseg.data.imagefolderinference import ImageFolderInference ...@@ -7,7 +7,8 @@ from bob.ip.binseg.data.imagefolderinference import ImageFolderInference
#### Config #### #### Config ####
# add your transforms below # add your transforms below
transforms = Compose([ transforms = Compose([
ToRGB(),
CenterCrop((544,544)) CenterCrop((544,544))
,ToTensor() ,ToTensor()
]) ])
......
...@@ -8,24 +8,44 @@ import torch ...@@ -8,24 +8,44 @@ import torch
import torchvision.transforms.functional as VF import torchvision.transforms.functional as VF
import bob.io.base import bob.io.base
def get_file_lists(data_path): def get_file_lists(data_path, glob):
"""
Recursively retrieves file lists from a given path, matching a given glob
This function will use :py:method:`pathlib.Path.rglob`, together with the
provided glob pattern to search for anything the desired filename.
"""
data_path = Path(data_path) data_path = Path(data_path)
image_file_names = np.array(sorted(list(data_path.glob('*')))) image_file_names = np.array(sorted(list(data_path.rglob(glob))))
return image_file_names return image_file_names
class ImageFolderInference(Dataset): class ImageFolderInference(Dataset):
""" """
Generic ImageFolder containing images for inference Generic ImageFolder containing images for inference
Notice that this implementation, contrary to its sister
:py:class:`ImageFolder`, does not *automatically* convert the input image
to RGB, before passing it to the transforms, so it is possible to
accomodate a wider range of input types (e.g. 16-bit PNG images).
Parameters Parameters
---------- ----------
path : str path : str
full path to root of dataset full path to root of dataset
glob : str
glob that can be used to filter-down files to be loaded on the provided
path
transform : list
List of transformations to apply to every input sample
""" """
def __init__(self, path, transform = None): def __init__(self, path, glob='*', transform = None):
self.transform = transform self.transform = transform
self.img_file_list = get_file_lists(path) self.path = path
self.img_file_list = get_file_lists(path, glob)
def __len__(self): def __len__(self):
""" """
...@@ -35,27 +55,27 @@ class ImageFolderInference(Dataset): ...@@ -35,27 +55,27 @@ class ImageFolderInference(Dataset):
size of the dataset size of the dataset
""" """
return len(self.img_file_list) return len(self.img_file_list)
def __getitem__(self,index): def __getitem__(self,index):
""" """
Parameters Parameters
---------- ----------
index : int index : int
Returns Returns
------- -------
list list
dataitem [img_name, img] dataitem [img_name, img]
""" """
img_path = self.img_file_list[index] img_path = self.img_file_list[index]
img_name = img_path.name img_name = img_path.relative_to(self.path).as_posix()
img = Image.open(img_path).convert(mode='RGB') img = Image.open(img_path)
sample = [img] sample = [img]
if self.transform : if self.transform :
sample = self.transform(*sample) sample = self.transform(*sample)
sample.insert(0,img_name) sample.insert(0,img_name)
return sample return sample
#!/usr/bin/env python #!/usr/bin/env python
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
import os import os
import logging import logging
import time import time
import datetime import datetime
...@@ -30,7 +30,7 @@ def batch_metrics(predictions, ground_truths, names, output_folder, logger): ...@@ -30,7 +30,7 @@ def batch_metrics(predictions, ground_truths, names, output_folder, logger):
ground_truths : :py:class:`torch.Tensor` ground_truths : :py:class:`torch.Tensor`
tensor with binary ground-truth tensor with binary ground-truth
names : list names : list
list of file names list of file names
output_folder : str output_folder : str
output path output path
logger : :py:class:`logging.Logger` logger : :py:class:`logging.Logger`
...@@ -38,7 +38,7 @@ def batch_metrics(predictions, ground_truths, names, output_folder, logger): ...@@ -38,7 +38,7 @@ def batch_metrics(predictions, ground_truths, names, output_folder, logger):
Returns Returns
------- -------
list list
list containing batch metrics: ``[name, threshold, precision, recall, specificity, accuracy, jaccard, f1_score]`` list containing batch metrics: ``[name, threshold, precision, recall, specificity, accuracy, jaccard, f1_score]``
""" """
step_size = 0.01 step_size = 0.01
...@@ -50,25 +50,25 @@ def batch_metrics(predictions, ground_truths, names, output_folder, logger): ...@@ -50,25 +50,25 @@ def batch_metrics(predictions, ground_truths, names, output_folder, logger):
file_name = "{}.csv".format(names[j]) file_name = "{}.csv".format(names[j])
logger.info("saving {}".format(file_name)) logger.info("saving {}".format(file_name))
with open (os.path.join(output_folder,file_name), "w+") as outfile: with open (os.path.join(output_folder,file_name), "w+") as outfile:
outfile.write("threshold, precision, recall, specificity, accuracy, jaccard, f1_score\n") outfile.write("threshold, precision, recall, specificity, accuracy, jaccard, f1_score\n")
for threshold in np.arange(0.0,1.0,step_size): for threshold in np.arange(0.0,1.0,step_size):
# threshold # threshold
binary_pred = torch.gt(predictions[j], threshold).byte() binary_pred = torch.gt(predictions[j], threshold).byte()
# equals and not-equals # equals and not-equals
equals = torch.eq(binary_pred, gts) # tensor equals = torch.eq(binary_pred, gts) # tensor
notequals = torch.ne(binary_pred, gts) # tensor notequals = torch.ne(binary_pred, gts) # tensor
# true positives # true positives
tp_tensor = (gts * binary_pred ) # tensor tp_tensor = (gts * binary_pred ) # tensor
tp_count = torch.sum(tp_tensor).item() # scalar tp_count = torch.sum(tp_tensor).item() # scalar
# false positives # false positives
fp_tensor = torch.eq((binary_pred + tp_tensor), 1) fp_tensor = torch.eq((binary_pred + tp_tensor), 1)
fp_count = torch.sum(fp_tensor).item() fp_count = torch.sum(fp_tensor).item()
# true negatives # true negatives
...@@ -80,14 +80,14 @@ def batch_metrics(predictions, ground_truths, names, output_folder, logger): ...@@ -80,14 +80,14 @@ def batch_metrics(predictions, ground_truths, names, output_folder, logger):
fn_count = torch.sum(fn_tensor).item() fn_count = torch.sum(fn_tensor).item()
# calc metrics # calc metrics
metrics = base_metrics(tp_count, fp_count, tn_count, fn_count) metrics = base_metrics(tp_count, fp_count, tn_count, fn_count)
# write to disk # write to disk
outfile.write("{:.2f},{:.5f},{:.5f},{:.5f},{:.5f},{:.5f},{:.5f} \n".format(threshold, *metrics)) outfile.write("{:.2f},{:.5f},{:.5f},{:.5f},{:.5f},{:.5f},{:.5f} \n".format(threshold, *metrics))
batch_metrics.append([names[j],threshold, *metrics ]) batch_metrics.append([names[j],threshold, *metrics ])
return batch_metrics return batch_metrics
...@@ -100,19 +100,21 @@ def save_probability_images(predictions, names, output_folder, logger): ...@@ -100,19 +100,21 @@ def save_probability_images(predictions, names, output_folder, logger):
predictions : :py:class:`torch.Tensor` predictions : :py:class:`torch.Tensor`
tensor with pixel-wise probabilities tensor with pixel-wise probabilities
names : list names : list
list of file names list of file names
output_folder : str output_folder : str
output path output path
logger : :py:class:`logging.Logger` logger : :py:class:`logging.Logger`
python logger python logger
""" """
images_subfolder = os.path.join(output_folder,'images') images_subfolder = os.path.join(output_folder,'images')
if not os.path.exists(images_subfolder): os.makedirs(images_subfolder)
for j in range(predictions.size()[0]): for j in range(predictions.size()[0]):
img = VF.to_pil_image(predictions.cpu().data[j]) img = VF.to_pil_image(predictions.cpu().data[j])
filename = '{}.png'.format(names[j].split(".")[0]) filename = '{}.png'.format(names[j].split(".")[0])
logger.info("saving {}".format(filename)) fullpath = os.path.join(images_subfolder, filename)
img.save(os.path.join(images_subfolder, filename)) logger.info("saving {}".format(fullpath))
fulldir = os.path.dirname(fullpath)
if not os.path.exists(fulldir): os.makedirs(fulldir)
img.save(fullpath)
def save_hdf(predictions, names, output_folder, logger): def save_hdf(predictions, names, output_folder, logger):
""" """
...@@ -123,19 +125,22 @@ def save_hdf(predictions, names, output_folder, logger): ...@@ -123,19 +125,22 @@ def save_hdf(predictions, names, output_folder, logger):
predictions : :py:class:`torch.Tensor` predictions : :py:class:`torch.Tensor`
tensor with pixel-wise probabilities tensor with pixel-wise probabilities
names : list names : list
list of file names list of file names
output_folder : str output_folder : str
output path output path
logger : :py:class:`logging.Logger` logger : :py:class:`logging.Logger`
python logger python logger
""" """
hdf5_subfolder = os.path.join(output_folder,'hdf5') hdf5_subfolder = os.path.join(output_folder,'hdf5')
if not os.path.exists(hdf5_subfolder): os.makedirs(hdf5_subfolder) if not os.path.exists(hdf5_subfolder): os.makedirs(hdf5_subfolder)
for j in range(predictions.size()[0]): for j in range(predictions.size()[0]):
img = predictions.cpu().data[j].squeeze(0).numpy() img = predictions.cpu().data[j].squeeze(0).numpy()
filename = '{}.hdf5'.format(names[j].split(".")[0]) filename = '{}.hdf5'.format(names[j].split(".")[0])
fullpath = os.path.join(hdf5_subfolder, filename)
logger.info("saving {}".format(filename)) logger.info("saving {}".format(filename))
bob.io.base.save(img, os.path.join(hdf5_subfolder, filename)) fulldir = os.path.dirname(fullpath)
if not os.path.exists(fulldir): os.makedirs(fulldir)
bob.io.base.save(img, fullpath)
def do_inference( def do_inference(
model, model,
...@@ -146,7 +151,7 @@ def do_inference( ...@@ -146,7 +151,7 @@ def do_inference(
""" """
Run inference and calculate metrics Run inference and calculate metrics
Parameters Parameters
--------- ---------
model : :py:class:`torch.nn.Module` model : :py:class:`torch.nn.Module`
...@@ -159,18 +164,18 @@ def do_inference( ...@@ -159,18 +164,18 @@ def do_inference(
logger = logging.getLogger("bob.ip.binseg.engine.inference") logger = logging.getLogger("bob.ip.binseg.engine.inference")
logger.info("Start evaluation") logger.info("Start evaluation")
logger.info("Output folder: {}, Device: {}".format(output_folder, device)) logger.info("Output folder: {}, Device: {}".format(output_folder, device))
results_subfolder = os.path.join(output_folder,'results') results_subfolder = os.path.join(output_folder,'results')
os.makedirs(results_subfolder,exist_ok=True) os.makedirs(results_subfolder,exist_ok=True)
model.eval().to(device) model.eval().to(device)
# Sigmoid for probabilities # Sigmoid for probabilities
sigmoid = torch.nn.Sigmoid() sigmoid = torch.nn.Sigmoid()
# Setup timers # Setup timers
start_total_time = time.time() start_total_time = time.time()
times = [] times = []
# Collect overall metrics # Collect overall metrics
metrics = [] metrics = []
for samples in tqdm(data_loader): for samples in tqdm(data_loader):
...@@ -181,50 +186,50 @@ def do_inference( ...@@ -181,50 +186,50 @@ def do_inference(
start_time = time.perf_counter() start_time = time.perf_counter()
outputs = model(images) outputs = model(images)
# necessary check for hed architecture that uses several outputs # necessary check for hed architecture that uses several outputs
# for loss calculation instead of just the last concatfuse block # for loss calculation instead of just the last concatfuse block
if isinstance(outputs,list): if isinstance(outputs,list):
outputs = outputs[-1] outputs = outputs[-1]
probabilities = sigmoid(outputs) probabilities = sigmoid(outputs)
batch_time = time.perf_counter() - start_time batch_time = time.perf_counter() - start_time
times.append(batch_time) times.append(batch_time)
logger.info("Batch time: {:.5f} s".format(batch_time)) logger.info("Batch time: {:.5f} s".format(batch_time))
b_metrics = batch_metrics(probabilities, ground_truths, names,results_subfolder, logger) b_metrics = batch_metrics(probabilities, ground_truths, names,results_subfolder, logger)
metrics.extend(b_metrics) metrics.extend(b_metrics)
# Create probability images # Create probability images
save_probability_images(probabilities, names, output_folder, logger) save_probability_images(probabilities, names, output_folder, logger)
# save hdf5 # save hdf5
save_hdf(probabilities, names, output_folder, logger) save_hdf(probabilities, names, output_folder, logger)
# DataFrame # DataFrame
df_metrics = pd.DataFrame(metrics,columns= \ df_metrics = pd.DataFrame(metrics,columns= \
["name", ["name",
"threshold", "threshold",
"precision", "precision",
"recall", "recall",
"specificity", "specificity",
"accuracy", "accuracy",
"jaccard", "jaccard",
"f1_score"]) "f1_score"])
# Report and Averages # Report and Averages
metrics_file = "Metrics.csv".format(model.name) metrics_file = "Metrics.csv".format(model.name)
metrics_path = os.path.join(results_subfolder, metrics_file) metrics_path = os.path.join(results_subfolder, metrics_file)
logger.info("Saving average over all input images: {}".format(metrics_file)) logger.info("Saving average over all input images: {}".format(metrics_file))
avg_metrics = df_metrics.groupby('threshold').mean() avg_metrics = df_metrics.groupby('threshold').mean()
std_metrics = df_metrics.groupby('threshold').std() std_metrics = df_metrics.groupby('threshold').std()
# Uncomment below for F1-score calculation based on average precision and metrics instead of # Uncomment below for F1-score calculation based on average precision and metrics instead of
# F1-scores of individual images. This method is in line with Maninis et. al. (2016) # F1-scores of individual images. This method is in line with Maninis et. al. (2016)
#avg_metrics["f1_score"] = (2* avg_metrics["precision"]*avg_metrics["recall"])/ \ #avg_metrics["f1_score"] = (2* avg_metrics["precision"]*avg_metrics["recall"])/ \
# (avg_metrics["precision"]+avg_metrics["recall"]) # (avg_metrics["precision"]+avg_metrics["recall"])
avg_metrics["std_pr"] = std_metrics["precision"] avg_metrics["std_pr"] = std_metrics["precision"]
avg_metrics["pr_upper"] = avg_metrics['precision'] + avg_metrics["std_pr"] avg_metrics["pr_upper"] = avg_metrics['precision'] + avg_metrics["std_pr"]
avg_metrics["pr_lower"] = avg_metrics['precision'] - avg_metrics["std_pr"] avg_metrics["pr_lower"] = avg_metrics['precision'] - avg_metrics["std_pr"]
...@@ -232,13 +237,13 @@ def do_inference( ...@@ -232,13 +237,13 @@ def do_inference(
avg_metrics["re_upper"] = avg_metrics['recall'] + avg_metrics["std_re"] avg_metrics["re_upper"] = avg_metrics['recall'] + avg_metrics["std_re"]
avg_metrics["re_lower"] = avg_metrics['recall'] - avg_metrics["std_re"] avg_metrics["re_lower"] = avg_metrics['recall'] - avg_metrics["std_re"]
avg_metrics["std_f1"] = std_metrics["f1_score"] avg_metrics["std_f1"] = std_metrics["f1_score"]
avg_metrics.to_csv(metrics_path) avg_metrics.to_csv(metrics_path)
maxf1 = avg_metrics['f1_score'].max() maxf1 = avg_metrics['f1_score'].max()
optimal_f1_threshold = avg_metrics['f1_score'].idxmax() optimal_f1_threshold = avg_metrics['f1_score'].idxmax()
logger.info("Highest F1-score of {:.5f}, achieved at threshold {}".format(maxf1, optimal_f1_threshold)) logger.info("Highest F1-score of {:.5f}, achieved at threshold {}".format(maxf1, optimal_f1_threshold))
# Plotting # Plotting
np_avg_metrics = avg_metrics.to_numpy().T np_avg_metrics = avg_metrics.to_numpy().T
fig_name = "precision_recall.pdf" fig_name = "precision_recall.pdf"
...@@ -246,7 +251,7 @@ def do_inference( ...@@ -246,7 +251,7 @@ def do_inference(
fig = precision_recall_f1iso_confintval([np_avg_metrics[0]],[np_avg_metrics[1]],[np_avg_metrics[7]],[np_avg_metrics[8]],[np_avg_metrics[10]],[np_avg_metrics[11]], [model.name,None], title=output_folder) fig = precision_recall_f1iso_confintval([np_avg_metrics[0]],[np_avg_metrics[1]],[np_avg_metrics[7]],[np_avg_metrics[8]],[np_avg_metrics[10]],[np_avg_metrics[11]], [model.name,None], title=output_folder)
fig_filename = os.path.join(results_subfolder, fig_name) fig_filename = os.path.join(results_subfolder, fig_name)
fig.savefig(fig_filename) fig.savefig(fig_filename)
# Report times # Report times
total_inference_time = str(datetime.timedelta(seconds=int(sum(times)))) total_inference_time = str(datetime.timedelta(seconds=int(sum(times))))
average_batch_inference_time = np.mean(times) average_batch_inference_time = np.mean(times)
...@@ -256,7 +261,7 @@ def do_inference( ...@@ -256,7 +261,7 @@ def do_inference(
times_file = "Times.txt" times_file = "Times.txt"
logger.info("saving {}".format(times_file)) logger.info("saving {}".format(times_file))
with open (os.path.join(results_subfolder,times_file), "w+") as outfile: with open (os.path.join(results_subfolder,times_file), "w+") as outfile:
date = datetime.datetime.now() date = datetime.datetime.now()
outfile.write("Date: {} \n".format(date.strftime("%Y-%m-%d %H:%M:%S"))) outfile.write("Date: {} \n".format(date.strftime("%Y-%m-%d %H:%M:%S")))
...@@ -264,7 +269,7 @@ def do_inference( ...@@ -264,7 +269,7 @@ def do_inference(
outfile.write("Average batch inference time: {} \n".format(average_batch_inference_time)) outfile.write("Average batch inference time: {} \n".format(average_batch_inference_time))
outfile.write("Total inference time: {} \n".format(total_inference_time)) outfile.write("Total inference time: {} \n".format(total_inference_time))
# Save model summary # Save model summary
summary_file = 'ModelSummary.txt' summary_file = 'ModelSummary.txt'
logger.info("saving {}".format(summary_file)) logger.info("saving {}".format(summary_file))
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment