diff --git a/bob/ip/binseg/configs/datasets/imagefolderinference.py b/bob/ip/binseg/configs/datasets/imagefolderinference.py index ba760e876811ef5bc80259a7008e6e66e23958f4..634566b9c0f8c899474494651044f138e610f724 100644 --- a/bob/ip/binseg/configs/datasets/imagefolderinference.py +++ b/bob/ip/binseg/configs/datasets/imagefolderinference.py @@ -7,7 +7,8 @@ from bob.ip.binseg.data.imagefolderinference import ImageFolderInference #### Config #### # add your transforms below -transforms = Compose([ +transforms = Compose([ + ToRGB(), CenterCrop((544,544)) ,ToTensor() ]) diff --git a/bob/ip/binseg/data/imagefolderinference.py b/bob/ip/binseg/data/imagefolderinference.py index 79e57e245ffbc3bd369f29523f36f1d09cabc833..0c934cb440e3011ccb0b4f8e99091491a68a7565 100644 --- a/bob/ip/binseg/data/imagefolderinference.py +++ b/bob/ip/binseg/data/imagefolderinference.py @@ -8,24 +8,44 @@ import torch import torchvision.transforms.functional as VF import bob.io.base -def get_file_lists(data_path): +def get_file_lists(data_path, glob): + """ + Recursively retrieves file lists from a given path, matching a given glob + + This function will use :py:method:`pathlib.Path.rglob`, together with the + provided glob pattern to search for anything the desired filename. + """ + data_path = Path(data_path) - image_file_names = np.array(sorted(list(data_path.glob('*')))) + image_file_names = np.array(sorted(list(data_path.rglob(glob)))) return image_file_names class ImageFolderInference(Dataset): """ Generic ImageFolder containing images for inference - + + Notice that this implementation, contrary to its sister + :py:class:`ImageFolder`, does not *automatically* convert the input image + to RGB, before passing it to the transforms, so it is possible to + accomodate a wider range of input types (e.g. 16-bit PNG images). + Parameters ---------- path : str full path to root of dataset - + + glob : str + glob that can be used to filter-down files to be loaded on the provided + path + + transform : list + List of transformations to apply to every input sample + """ - def __init__(self, path, transform = None): + def __init__(self, path, glob='*', transform = None): self.transform = transform - self.img_file_list = get_file_lists(path) + self.path = path + self.img_file_list = get_file_lists(path, glob) def __len__(self): """ @@ -35,27 +55,27 @@ class ImageFolderInference(Dataset): size of the dataset """ return len(self.img_file_list) - + def __getitem__(self,index): """ Parameters ---------- index : int - + Returns ------- list dataitem [img_name, img] """ img_path = self.img_file_list[index] - img_name = img_path.name - img = Image.open(img_path).convert(mode='RGB') - + img_name = img_path.relative_to(self.path).as_posix() + img = Image.open(img_path) + sample = [img] - + if self.transform : sample = self.transform(*sample) - + sample.insert(0,img_name) - + return sample diff --git a/bob/ip/binseg/engine/inferencer.py b/bob/ip/binseg/engine/inferencer.py index c2dd6443152cef96fba9dfdbe5a3907f948e8ebf..c1ed5fafa2aa20b46a749fcdca3f29a46a003455 100644 --- a/bob/ip/binseg/engine/inferencer.py +++ b/bob/ip/binseg/engine/inferencer.py @@ -1,7 +1,7 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -import os +import os import logging import time import datetime @@ -30,7 +30,7 @@ def batch_metrics(predictions, ground_truths, names, output_folder, logger): ground_truths : :py:class:`torch.Tensor` tensor with binary ground-truth names : list - list of file names + list of file names output_folder : str output path logger : :py:class:`logging.Logger` @@ -38,7 +38,7 @@ def batch_metrics(predictions, ground_truths, names, output_folder, logger): Returns ------- - list + list list containing batch metrics: ``[name, threshold, precision, recall, specificity, accuracy, jaccard, f1_score]`` """ step_size = 0.01 @@ -50,25 +50,25 @@ def batch_metrics(predictions, ground_truths, names, output_folder, logger): file_name = "{}.csv".format(names[j]) logger.info("saving {}".format(file_name)) - + with open (os.path.join(output_folder,file_name), "w+") as outfile: outfile.write("threshold, precision, recall, specificity, accuracy, jaccard, f1_score\n") - for threshold in np.arange(0.0,1.0,step_size): + for threshold in np.arange(0.0,1.0,step_size): # threshold binary_pred = torch.gt(predictions[j], threshold).byte() # equals and not-equals equals = torch.eq(binary_pred, gts) # tensor notequals = torch.ne(binary_pred, gts) # tensor - - # true positives + + # true positives tp_tensor = (gts * binary_pred ) # tensor tp_count = torch.sum(tp_tensor).item() # scalar - # false positives - fp_tensor = torch.eq((binary_pred + tp_tensor), 1) + # false positives + fp_tensor = torch.eq((binary_pred + tp_tensor), 1) fp_count = torch.sum(fp_tensor).item() # true negatives @@ -80,14 +80,14 @@ def batch_metrics(predictions, ground_truths, names, output_folder, logger): fn_count = torch.sum(fn_tensor).item() # calc metrics - metrics = base_metrics(tp_count, fp_count, tn_count, fn_count) - - # write to disk + metrics = base_metrics(tp_count, fp_count, tn_count, fn_count) + + # write to disk outfile.write("{:.2f},{:.5f},{:.5f},{:.5f},{:.5f},{:.5f},{:.5f} \n".format(threshold, *metrics)) - + batch_metrics.append([names[j],threshold, *metrics ]) - + return batch_metrics @@ -100,19 +100,21 @@ def save_probability_images(predictions, names, output_folder, logger): predictions : :py:class:`torch.Tensor` tensor with pixel-wise probabilities names : list - list of file names + list of file names output_folder : str output path logger : :py:class:`logging.Logger` python logger """ - images_subfolder = os.path.join(output_folder,'images') - if not os.path.exists(images_subfolder): os.makedirs(images_subfolder) + images_subfolder = os.path.join(output_folder,'images') for j in range(predictions.size()[0]): img = VF.to_pil_image(predictions.cpu().data[j]) filename = '{}.png'.format(names[j].split(".")[0]) - logger.info("saving {}".format(filename)) - img.save(os.path.join(images_subfolder, filename)) + fullpath = os.path.join(images_subfolder, filename) + logger.info("saving {}".format(fullpath)) + fulldir = os.path.dirname(fullpath) + if not os.path.exists(fulldir): os.makedirs(fulldir) + img.save(fullpath) def save_hdf(predictions, names, output_folder, logger): """ @@ -123,19 +125,22 @@ def save_hdf(predictions, names, output_folder, logger): predictions : :py:class:`torch.Tensor` tensor with pixel-wise probabilities names : list - list of file names + list of file names output_folder : str output path logger : :py:class:`logging.Logger` python logger """ - hdf5_subfolder = os.path.join(output_folder,'hdf5') + hdf5_subfolder = os.path.join(output_folder,'hdf5') if not os.path.exists(hdf5_subfolder): os.makedirs(hdf5_subfolder) for j in range(predictions.size()[0]): img = predictions.cpu().data[j].squeeze(0).numpy() filename = '{}.hdf5'.format(names[j].split(".")[0]) + fullpath = os.path.join(hdf5_subfolder, filename) logger.info("saving {}".format(filename)) - bob.io.base.save(img, os.path.join(hdf5_subfolder, filename)) + fulldir = os.path.dirname(fullpath) + if not os.path.exists(fulldir): os.makedirs(fulldir) + bob.io.base.save(img, fullpath) def do_inference( model, @@ -146,7 +151,7 @@ def do_inference( """ Run inference and calculate metrics - + Parameters --------- model : :py:class:`torch.nn.Module` @@ -159,18 +164,18 @@ def do_inference( logger = logging.getLogger("bob.ip.binseg.engine.inference") logger.info("Start evaluation") logger.info("Output folder: {}, Device: {}".format(output_folder, device)) - results_subfolder = os.path.join(output_folder,'results') + results_subfolder = os.path.join(output_folder,'results') os.makedirs(results_subfolder,exist_ok=True) - + model.eval().to(device) - # Sigmoid for probabilities - sigmoid = torch.nn.Sigmoid() + # Sigmoid for probabilities + sigmoid = torch.nn.Sigmoid() # Setup timers start_total_time = time.time() times = [] - # Collect overall metrics + # Collect overall metrics metrics = [] for samples in tqdm(data_loader): @@ -181,50 +186,50 @@ def do_inference( start_time = time.perf_counter() outputs = model(images) - - # necessary check for hed architecture that uses several outputs + + # necessary check for hed architecture that uses several outputs # for loss calculation instead of just the last concatfuse block if isinstance(outputs,list): outputs = outputs[-1] - + probabilities = sigmoid(outputs) - + batch_time = time.perf_counter() - start_time times.append(batch_time) logger.info("Batch time: {:.5f} s".format(batch_time)) - + b_metrics = batch_metrics(probabilities, ground_truths, names,results_subfolder, logger) metrics.extend(b_metrics) - + # Create probability images save_probability_images(probabilities, names, output_folder, logger) # save hdf5 save_hdf(probabilities, names, output_folder, logger) - # DataFrame + # DataFrame df_metrics = pd.DataFrame(metrics,columns= \ ["name", "threshold", - "precision", - "recall", - "specificity", - "accuracy", - "jaccard", + "precision", + "recall", + "specificity", + "accuracy", + "jaccard", "f1_score"]) # Report and Averages metrics_file = "Metrics.csv".format(model.name) metrics_path = os.path.join(results_subfolder, metrics_file) logger.info("Saving average over all input images: {}".format(metrics_file)) - + avg_metrics = df_metrics.groupby('threshold').mean() std_metrics = df_metrics.groupby('threshold').std() - # Uncomment below for F1-score calculation based on average precision and metrics instead of + # Uncomment below for F1-score calculation based on average precision and metrics instead of # F1-scores of individual images. This method is in line with Maninis et. al. (2016) #avg_metrics["f1_score"] = (2* avg_metrics["precision"]*avg_metrics["recall"])/ \ # (avg_metrics["precision"]+avg_metrics["recall"]) - + avg_metrics["std_pr"] = std_metrics["precision"] avg_metrics["pr_upper"] = avg_metrics['precision'] + avg_metrics["std_pr"] avg_metrics["pr_lower"] = avg_metrics['precision'] - avg_metrics["std_pr"] @@ -232,13 +237,13 @@ def do_inference( avg_metrics["re_upper"] = avg_metrics['recall'] + avg_metrics["std_re"] avg_metrics["re_lower"] = avg_metrics['recall'] - avg_metrics["std_re"] avg_metrics["std_f1"] = std_metrics["f1_score"] - + avg_metrics.to_csv(metrics_path) maxf1 = avg_metrics['f1_score'].max() optimal_f1_threshold = avg_metrics['f1_score'].idxmax() - + logger.info("Highest F1-score of {:.5f}, achieved at threshold {}".format(maxf1, optimal_f1_threshold)) - + # Plotting np_avg_metrics = avg_metrics.to_numpy().T fig_name = "precision_recall.pdf" @@ -246,7 +251,7 @@ def do_inference( fig = precision_recall_f1iso_confintval([np_avg_metrics[0]],[np_avg_metrics[1]],[np_avg_metrics[7]],[np_avg_metrics[8]],[np_avg_metrics[10]],[np_avg_metrics[11]], [model.name,None], title=output_folder) fig_filename = os.path.join(results_subfolder, fig_name) fig.savefig(fig_filename) - + # Report times total_inference_time = str(datetime.timedelta(seconds=int(sum(times)))) average_batch_inference_time = np.mean(times) @@ -256,7 +261,7 @@ def do_inference( times_file = "Times.txt" logger.info("saving {}".format(times_file)) - + with open (os.path.join(results_subfolder,times_file), "w+") as outfile: date = datetime.datetime.now() outfile.write("Date: {} \n".format(date.strftime("%Y-%m-%d %H:%M:%S"))) @@ -264,7 +269,7 @@ def do_inference( outfile.write("Average batch inference time: {} \n".format(average_batch_inference_time)) outfile.write("Total inference time: {} \n".format(total_inference_time)) - # Save model summary + # Save model summary summary_file = 'ModelSummary.txt' logger.info("saving {}".format(summary_file))