From eedee6710f0c82aaa107d2c29ac78cd21383e159 Mon Sep 17 00:00:00 2001
From: Tim Laibacher <tim.laibacher@idiap.ch>
Date: Thu, 5 Sep 2019 16:47:33 +0200
Subject: [PATCH] Add inference only dataset config

---
 .../configs/datasets/imagefolderinference.py  | 17 ++++
 .../configs/datasets/imagefoldertest.py       | 17 ++++
 bob/ip/binseg/data/imagefolder.py             | 10 +-
 bob/ip/binseg/data/imagefolderinference.py    | 61 ++++++++++++
 bob/ip/binseg/engine/inferencer.py            | 38 +++++++-
 bob/ip/binseg/engine/predicter.py             | 93 +++++++++++++++++++
 bob/ip/binseg/script/binseg.py                | 74 +++++++++++++++
 bob/ip/binseg/utils/checkpointer.py           |  2 +-
 bob/ip/binseg/utils/plot.py                   |  5 -
 doc/configs.rst                               | 11 +++
 doc/datasets.rst                              | 20 +++-
 doc/evaluation.rst                            | 28 ++++++
 12 files changed, 364 insertions(+), 12 deletions(-)
 create mode 100644 bob/ip/binseg/configs/datasets/imagefolderinference.py
 create mode 100644 bob/ip/binseg/configs/datasets/imagefoldertest.py
 create mode 100644 bob/ip/binseg/data/imagefolderinference.py
 create mode 100644 bob/ip/binseg/engine/predicter.py

diff --git a/bob/ip/binseg/configs/datasets/imagefolderinference.py b/bob/ip/binseg/configs/datasets/imagefolderinference.py
new file mode 100644
index 00000000..ba760e87
--- /dev/null
+++ b/bob/ip/binseg/configs/datasets/imagefolderinference.py
@@ -0,0 +1,17 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+from bob.ip.binseg.data.transforms import *
+from bob.ip.binseg.data.imagefolderinference import ImageFolderInference
+
+#### Config ####
+
+# add your transforms below
+transforms = Compose([  
+                        CenterCrop((544,544))
+                        ,ToTensor()
+                    ])
+
+# PyTorch dataset
+path = '/path/to/folder/containing/images'
+dataset = ImageFolderInference(path,transform=transforms)
diff --git a/bob/ip/binseg/configs/datasets/imagefoldertest.py b/bob/ip/binseg/configs/datasets/imagefoldertest.py
new file mode 100644
index 00000000..15d9b038
--- /dev/null
+++ b/bob/ip/binseg/configs/datasets/imagefoldertest.py
@@ -0,0 +1,17 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+from bob.ip.binseg.data.transforms import *
+from bob.ip.binseg.data.imagefolder import ImageFolder
+
+#### Config ####
+
+# add your transforms below
+transforms = Compose([  
+                        CenterCrop((544,544))
+                        ,ToTensor()
+                    ])
+
+# PyTorch dataset
+path = '/path/to/testdataset'
+dataset = ImageFolder(path,transform=transforms)
diff --git a/bob/ip/binseg/data/imagefolder.py b/bob/ip/binseg/data/imagefolder.py
index da667024..7ec9dd9d 100644
--- a/bob/ip/binseg/data/imagefolder.py
+++ b/bob/ip/binseg/data/imagefolder.py
@@ -6,6 +6,7 @@ import numpy as np
 from PIL import Image
 import torch
 import torchvision.transforms.functional as VF
+import bob.io.base
 
 def get_file_lists(data_path):
     data_path = Path(data_path)
@@ -60,7 +61,14 @@ class ImageFolder(Dataset):
         img = Image.open(img_path).convert(mode='RGB')
     
         gt_path = self.gt_file_list[index]
-        gt = Image.open(gt_path).convert(mode='1', dither=None)
+        if gt_path.suffix == '.hdf5':
+            gt = bob.io.base.load(str(gt_path)).astype('float32')
+            # not elegant but since transforms require PIL images we do this hacky workaround here
+            gt = torch.from_numpy(gt)
+            gt = VF.to_pil_image(gt).convert(mode='1', dither=None)
+        else:
+            gt = Image.open(gt_path).convert(mode='1', dither=None)
+        
         sample = [img, gt]
         
         if self.transform :
diff --git a/bob/ip/binseg/data/imagefolderinference.py b/bob/ip/binseg/data/imagefolderinference.py
new file mode 100644
index 00000000..79e57e24
--- /dev/null
+++ b/bob/ip/binseg/data/imagefolderinference.py
@@ -0,0 +1,61 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+from torch.utils.data import Dataset
+from pathlib import Path
+import numpy as np
+from PIL import Image
+import torch
+import torchvision.transforms.functional as VF
+import bob.io.base
+
+def get_file_lists(data_path):
+    data_path = Path(data_path)
+    image_file_names = np.array(sorted(list(data_path.glob('*'))))
+    return image_file_names
+
+class ImageFolderInference(Dataset):
+    """
+    Generic ImageFolder containing images for inference
+    
+    Parameters
+    ----------
+    path : str
+        full path to root of dataset
+    
+    """
+    def __init__(self, path, transform = None):
+        self.transform = transform
+        self.img_file_list = get_file_lists(path)
+
+    def __len__(self):
+        """
+        Returns
+        -------
+        int
+            size of the dataset
+        """
+        return len(self.img_file_list)
+    
+    def __getitem__(self,index):
+        """
+        Parameters
+        ----------
+        index : int
+        
+        Returns
+        -------
+        list
+            dataitem [img_name, img]
+        """
+        img_path = self.img_file_list[index]
+        img_name = img_path.name
+        img = Image.open(img_path).convert(mode='RGB')
+    
+        sample = [img]
+        
+        if self.transform :
+            sample = self.transform(*sample)
+        
+        sample.insert(0,img_name)
+        
+        return sample
diff --git a/bob/ip/binseg/engine/inferencer.py b/bob/ip/binseg/engine/inferencer.py
index 26de6cf7..c2dd6443 100644
--- a/bob/ip/binseg/engine/inferencer.py
+++ b/bob/ip/binseg/engine/inferencer.py
@@ -11,8 +11,10 @@ import pandas as pd
 import torchvision.transforms.functional as VF
 from tqdm import tqdm
 
+import bob.io.base
+
 from bob.ip.binseg.utils.metric import SmoothedValue, base_metrics
-from bob.ip.binseg.utils.plot import precision_recall_f1iso
+from bob.ip.binseg.utils.plot import precision_recall_f1iso_confintval
 from bob.ip.binseg.utils.summary import summary
 
 
@@ -108,10 +110,32 @@ def save_probability_images(predictions, names, output_folder, logger):
     if not os.path.exists(images_subfolder): os.makedirs(images_subfolder)
     for j in range(predictions.size()[0]):
         img = VF.to_pil_image(predictions.cpu().data[j])
-        filename = '{}'.format(names[j])
+        filename = '{}.png'.format(names[j].split(".")[0])
         logger.info("saving {}".format(filename))
         img.save(os.path.join(images_subfolder, filename))
 
+def save_hdf(predictions, names, output_folder, logger):
+    """
+    Saves probability maps as image in the same format as the test image
+
+    Parameters
+    ----------
+    predictions : :py:class:`torch.Tensor`
+        tensor with pixel-wise probabilities
+    names : list
+        list of file names 
+    output_folder : str
+        output path
+    logger : :py:class:`logging.Logger`
+        python logger
+    """
+    hdf5_subfolder = os.path.join(output_folder,'hdf5') 
+    if not os.path.exists(hdf5_subfolder): os.makedirs(hdf5_subfolder)
+    for j in range(predictions.size()[0]):
+        img = predictions.cpu().data[j].squeeze(0).numpy()
+        filename = '{}.hdf5'.format(names[j].split(".")[0])
+        logger.info("saving {}".format(filename))
+        bob.io.base.save(img, os.path.join(hdf5_subfolder, filename))
 
 def do_inference(
     model,
@@ -174,6 +198,8 @@ def do_inference(
             
             # Create probability images
             save_probability_images(probabilities, names, output_folder, logger)
+            # save hdf5
+            save_hdf(probabilities, names, output_folder, logger)
 
     # DataFrame 
     df_metrics = pd.DataFrame(metrics,columns= \
@@ -199,6 +225,12 @@ def do_inference(
     #avg_metrics["f1_score"] =  (2* avg_metrics["precision"]*avg_metrics["recall"])/ \
     #    (avg_metrics["precision"]+avg_metrics["recall"])
     
+    avg_metrics["std_pr"] = std_metrics["precision"]
+    avg_metrics["pr_upper"] = avg_metrics['precision'] + avg_metrics["std_pr"]
+    avg_metrics["pr_lower"] = avg_metrics['precision'] - avg_metrics["std_pr"]
+    avg_metrics["std_re"] = std_metrics["recall"]
+    avg_metrics["re_upper"] = avg_metrics['recall'] + avg_metrics["std_re"]
+    avg_metrics["re_lower"] = avg_metrics['recall'] - avg_metrics["std_re"]
     avg_metrics["std_f1"] = std_metrics["f1_score"]
     
     avg_metrics.to_csv(metrics_path)
@@ -211,7 +243,7 @@ def do_inference(
     np_avg_metrics = avg_metrics.to_numpy().T
     fig_name = "precision_recall.pdf"
     logger.info("saving {}".format(fig_name))
-    fig = precision_recall_f1iso([np_avg_metrics[0]],[np_avg_metrics[1]], [model.name,None], title=output_folder.split('/')[-2:])
+    fig = precision_recall_f1iso_confintval([np_avg_metrics[0]],[np_avg_metrics[1]],[np_avg_metrics[7]],[np_avg_metrics[8]],[np_avg_metrics[10]],[np_avg_metrics[11]], [model.name,None], title=output_folder)
     fig_filename = os.path.join(results_subfolder, fig_name)
     fig.savefig(fig_filename)
     
diff --git a/bob/ip/binseg/engine/predicter.py b/bob/ip/binseg/engine/predicter.py
new file mode 100644
index 00000000..b6e8ad06
--- /dev/null
+++ b/bob/ip/binseg/engine/predicter.py
@@ -0,0 +1,93 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import os 
+import logging
+import time
+import datetime
+import numpy as np
+import torch
+import torchvision.transforms.functional as VF
+from tqdm import tqdm
+
+from bob.ip.binseg.utils.summary import summary
+from bob.ip.binseg.engine.inferencer import save_probability_images
+from bob.ip.binseg.engine.inferencer import save_hdf
+
+
+def do_predict(
+    model,
+    data_loader,
+    device,
+    output_folder = None
+):
+
+    """
+    Run inference and calculate metrics
+    
+    Parameters
+    ---------
+    model : :py:class:`torch.nn.Module`
+        neural network model (e.g. DRIU, HED, UNet)
+    data_loader : py:class:`torch.torch.utils.data.DataLoader`
+    device : str
+        device to use ``'cpu'`` or ``'cuda'``
+    output_folder : str
+    """
+    logger = logging.getLogger("bob.ip.binseg.engine.inference")
+    logger.info("Start evaluation")
+    logger.info("Output folder: {}, Device: {}".format(output_folder, device))
+    results_subfolder = os.path.join(output_folder,'results') 
+    os.makedirs(results_subfolder,exist_ok=True)
+    
+    model.eval().to(device)
+    # Sigmoid for probabilities 
+    sigmoid = torch.nn.Sigmoid() 
+
+    # Setup timers
+    start_total_time = time.time()
+    times = []
+
+    for samples in tqdm(data_loader):
+        names = samples[0]
+        images = samples[1].to(device)
+        with torch.no_grad():
+            start_time = time.perf_counter()
+
+            outputs = model(images)
+            
+            # necessary check for hed architecture that uses several outputs 
+            # for loss calculation instead of just the last concatfuse block
+            if isinstance(outputs,list):
+                outputs = outputs[-1]
+            
+            probabilities = sigmoid(outputs)
+            
+            batch_time = time.perf_counter() - start_time
+            times.append(batch_time)
+            logger.info("Batch time: {:.5f} s".format(batch_time))
+            
+            # Create probability images
+            save_probability_images(probabilities, names, output_folder, logger)
+            # Save hdf5
+            save_hdf(probabilities, names, output_folder, logger)
+
+ 
+    # Report times
+    total_inference_time = str(datetime.timedelta(seconds=int(sum(times))))
+    average_batch_inference_time = np.mean(times)
+    total_evalution_time = str(datetime.timedelta(seconds=int(time.time() - start_total_time )))
+
+    logger.info("Average batch inference time: {:.5f}s".format(average_batch_inference_time))
+
+    times_file = "Times.txt"
+    logger.info("saving {}".format(times_file))
+ 
+    with open (os.path.join(results_subfolder,times_file), "w+") as outfile:
+        date = datetime.datetime.now()
+        outfile.write("Date: {} \n".format(date.strftime("%Y-%m-%d %H:%M:%S")))
+        outfile.write("Total evaluation run-time: {} \n".format(total_evalution_time))
+        outfile.write("Average batch inference time: {} \n".format(average_batch_inference_time))
+        outfile.write("Total inference time: {} \n".format(total_inference_time))
+
+
diff --git a/bob/ip/binseg/script/binseg.py b/bob/ip/binseg/script/binseg.py
index 3e4e5681..f78fe404 100644
--- a/bob/ip/binseg/script/binseg.py
+++ b/bob/ip/binseg/script/binseg.py
@@ -32,6 +32,7 @@ from bob.ip.binseg.utils.rsttable import create_overview_grid
 from bob.ip.binseg.utils.plot import metricsviz, overlay,savetransformedtest
 from bob.ip.binseg.utils.transformfolder import transformfolder as transfld
 from bob.ip.binseg.utils.evaluate import do_eval
+from bob.ip.binseg.engine.predicter import do_predict
 
 logger = logging.getLogger(__name__)
 
@@ -492,6 +493,77 @@ def transformfolder(source_path ,target_path,transforms,**kwargs):
     transfld(source_path,target_path,transforms)
 
 
+# Run inference and create predictions only (no ground truth available)
+@binseg.command(entry_point_group='bob.ip.binseg.config', cls=ConfigCommand)
+@click.option(
+    '--output-path',
+    '-o',
+    required=True,
+    default="output",
+    cls=ResourceOption
+    )
+@click.option(
+    '--model',
+    '-m',
+    required=True,
+    cls=ResourceOption
+    )
+@click.option(
+    '--dataset',
+    '-d',
+    required=True,
+    cls=ResourceOption
+    )
+@click.option(
+    '--batch-size',
+    '-b',
+    required=True,
+    default=2,
+    cls=ResourceOption)
+@click.option(
+    '--device',
+    '-d',
+    help='A string indicating the device to use (e.g. "cpu" or "cuda:0"',
+    show_default=True,
+    required=True,
+    default='cpu',
+    cls=ResourceOption)
+@click.option(
+    '--weight',
+    '-w',
+    help='Path or URL to pretrained model',
+    required=False,
+    default=None,
+    cls=ResourceOption
+    )
+@verbosity_option(cls=ResourceOption)
+def predict(model
+        ,output_path
+        ,device
+        ,batch_size
+        ,dataset
+        ,weight
+        , **kwargs):
+    """ Run inference and evalaute the model performance """
+
+    # PyTorch dataloader
+    data_loader = DataLoader(
+        dataset = dataset
+        ,batch_size = batch_size
+        ,shuffle= False
+        ,pin_memory = torch.cuda.is_available()
+        )
+    
+    # checkpointer, load last model in dir
+    checkpointer = DetectronCheckpointer(model, save_dir = output_path, save_to_disk=False)
+    checkpointer.load(weight)
+    do_predict(model, data_loader, device, output_path)
+
+    # Overlayed images
+    overlay(dataset=dataset, output_path=output_path)
+
+
+
 # Evaluate only. Runs evaluation on predicted probability maps (--prediction-folder)
 @binseg.command(entry_point_group='bob.ip.binseg.config', cls=ConfigCommand)
 @click.option(
@@ -544,4 +616,6 @@ def evalpred(
     
     # Run eval
     do_eval(prediction_folder, data_loader, output_folder = output_path, title= title, legend=legend)
+
+
     
\ No newline at end of file
diff --git a/bob/ip/binseg/utils/checkpointer.py b/bob/ip/binseg/utils/checkpointer.py
index e2090caa..f3899e1d 100644
--- a/bob/ip/binseg/utils/checkpointer.py
+++ b/bob/ip/binseg/utils/checkpointer.py
@@ -62,7 +62,7 @@ class Checkpointer:
             f = self.get_checkpoint_file()
         if not f:
             # no checkpoint could be found
-            self.logger.info("No checkpoint found. Initializing model from scratch")
+            self.logger.warn("No checkpoint found. Initializing model from scratch")
             return {}
         self.logger.info("Loading checkpoint from {}".format(f))
         checkpoint = self._load_file(f)
diff --git a/bob/ip/binseg/utils/plot.py b/bob/ip/binseg/utils/plot.py
index b5943e9d..ceb268d0 100644
--- a/bob/ip/binseg/utils/plot.py
+++ b/bob/ip/binseg/utils/plot.py
@@ -416,11 +416,6 @@ def overlay(dataset, output_path):
         # get sample
         name  = sample[0]
         img = VF.to_pil_image(sample[1]) # PIL Image
-        gt = sample[2].byte() # byte tensor
-        
-        # read metrics 
-        #metrics = pd.read_csv(os.path.join(output_path,'results',name+'.csv'))
-        #f1 = metrics[' f1_score'].max()
         
         # read probability output 
         pred = Image.open(os.path.join(output_path,'images',name)).convert(mode='L')
diff --git a/doc/configs.rst b/doc/configs.rst
index a45e2986..e25e66c6 100644
--- a/doc/configs.rst
+++ b/doc/configs.rst
@@ -14,6 +14,17 @@ ImageFolder
 ----------------
 .. literalinclude:: ../bob/ip/binseg/configs/datasets/imagefolder.py
 
+.. _bob.ip.binseg.configs.datasets.imagefoldertest:
+
+ImageFolderTest
+----------------
+.. literalinclude:: ../bob/ip/binseg/configs/datasets/imagefoldertest.py
+
+.. _bob.ip.binseg.configs.datasets.imagefolderinference:
+
+ImageFolderInference
+---------------------
+.. literalinclude:: ../bob/ip/binseg/configs/datasets/imagefolderinference.py
 
 .. _bob.ip.binseg.configs.datasets.chasedb1:
 
diff --git a/doc/datasets.rst b/doc/datasets.rst
index 9e00c439..5b82d3b1 100644
--- a/doc/datasets.rst
+++ b/doc/datasets.rst
@@ -42,7 +42,23 @@ dataset folder structure for images and ground-truth (gt):
        |- images
        |- gt 
 
-In the dataset config :ref:`bob.ip.binseg.configs.datasets.imagefolder` the full path of the dataset has to be amended. Training can then for example be started with
-``bob binseg train M2UNet IMAGEFOLDER -b 4 -d cuda -o /my/output/path -vv``
+the file names should have the same stem. Currently all image formats that can be read via PIL are supported. Additionally we support hdf5 binary files.
+
+For training a new dataset config needs to be created. You can copy the template :ref:`bob.ip.binseg.configs.datasets.imagefolder` and amend accordingly, 
+e.g. the full path of the dataset and if necessary any preprocessing steps such as resizing, cropping, padding etc..
+
+Training can then be started with
+
+.. code-block:: bash
+
+    bob binseg train M2UNet /path/to/myimagefolderconfig.py -b 4 -d cuda -o /my/output/path -vv
+
+Similary for testing, a test dataset config needs to be created. You can copy the template :ref:`bob.ip.binseg.configs.datasets.imagefoldertest` and amend accordingly.
+
+Testing can then be started with 
+
+.. code-block:: bash
+
+    bob binseg test M2UNet /path/to/myimagefoldertestconfig.py -b 2 -d cuda -o /my/output/path -vv
 
 .. include:: links.rst
diff --git a/doc/evaluation.rst b/doc/evaluation.rst
index e807c954..2fb923a0 100644
--- a/doc/evaluation.rst
+++ b/doc/evaluation.rst
@@ -26,6 +26,34 @@ E.g. run inference on model M2U-Net on the DRIVE test set:
     # The evaluation results will be stored in the same folder
     bob binseg test M2UNet DRIVETEST -o /DRIVE/M2UNet/output
 
+Outputs
+========
+The inference run generates the following output files:
+
+.. code-block:: bash
+
+    .
+    ├── images  # the predicted probabilities as grayscale images in .png format 
+    ├── hdf5    # the predicted probabilties in hdf5 format
+    ├── last_checkpoint  # text file that keeps track of the last checkpoint 
+    ├── M2UNet_trainlog.csv # training log 
+    ├── M2UNet_trainlog.pdf # training log plot
+    ├── model_*.pth # model checkpoints
+    └── results
+        ├── image*.jpg.csv # evaluation metrics for each image
+        ├── Metrics.csv # average evaluation metrics
+        ├── ModelSummary.txt # model summary and parameter count
+        ├── precision_recall.pdf # precision vs recall plot
+        └── Times.txt # inference times
+
+Inference Only Mode
+====================
+
+If you wish to run inference only on a folder containing images, use the ``predict`` function in combination with a :ref:`bob.ip.binseg.configs.datasets.imagefolderinference` config. E.g.:
+
+.. code-block:: bash
+
+    bob binseg predict M2UNet /path/to/myinferencedatasetconfig.py -b 1 -d cpu -o /my/output/path -w /path/to/pretrained/weight/model_final.pth -vv
 
 Pretrained Models
 =================
-- 
GitLab