From c3dc9915b4e991c6a03fdd8a9de5d3dd7bf0c0a9 Mon Sep 17 00:00:00 2001
From: Andre Anjos <andre.dos.anjos@gmail.com>
Date: Thu, 19 Mar 2020 10:39:54 +0100
Subject: [PATCH] [doc] Major doc reorganisation (autosummary ON; better
 structure)

---
 .gitignore                                    |  17 +-
 bob/ip/binseg/__init__.py                     |  17 --
 bob/ip/binseg/configs/models/driu.py          |  25 +-
 bob/ip/binseg/data/transforms.py              |   8 +
 bob/ip/binseg/engine/adabound.py              | 153 +++++++-----
 bob/ip/binseg/engine/ssltrainer.py            |  93 ++++---
 .../binseg/modeling/backbones/mobilenetv2.py  |  76 +++---
 bob/ip/binseg/modeling/backbones/resnet.py    | 126 ++++++----
 bob/ip/binseg/modeling/backbones/vgg.py       |  30 +--
 bob/ip/binseg/modeling/driu.py                |  87 ++++---
 bob/ip/binseg/modeling/driubn.py              |  42 ++--
 bob/ip/binseg/modeling/driuod.py              |  33 +--
 bob/ip/binseg/modeling/driupix.py             |  37 +--
 bob/ip/binseg/modeling/hed.py                 |  44 ++--
 bob/ip/binseg/modeling/losses.py              | 234 +++++++++++++-----
 bob/ip/binseg/modeling/m2u.py                 |  44 ++--
 bob/ip/binseg/modeling/make_layers.py         | 143 ++++++-----
 bob/ip/binseg/modeling/resunet.py             |  18 +-
 bob/ip/binseg/modeling/unet.py                |  21 +-
 bob/ip/binseg/test/test_checkpointer.py       |   4 +-
 bob/ip/binseg/utils/checkpointer.py           |  17 +-
 bob/ip/binseg/utils/click.py                  |   9 +-
 bob/ip/binseg/utils/model_zoo.py              |   8 +-
 doc/acknowledgements.rst                      |   2 +-
 doc/api.rst                                   | 214 +++++++++++++---
 doc/conf.py                                   |  19 +-
 doc/datasets.rst                              |  60 +++--
 doc/extras.inv                                |   5 +
 doc/extras.txt                                |  10 +
 doc/index.rst                                 |  19 +-
 doc/links.rst                                 |  62 +----
 doc/nitpick-exceptions.txt                    |   6 +-
 doc/references.rst                            |  77 ++++++
 doc/setup.rst                                 | 121 ++++-----
 doc/training.rst                              |  12 +-
 35 files changed, 1159 insertions(+), 734 deletions(-)
 create mode 100644 doc/extras.inv
 create mode 100644 doc/extras.txt
 create mode 100644 doc/references.rst

diff --git a/.gitignore b/.gitignore
index fcc2be3d..a16b602a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,3 @@
-### Bob defaults ###
 *~
 *.swp
 *.pyc
@@ -18,26 +17,12 @@ dist
 build
 *.egg
 src/
+doc/api
 record.txt
 core
 output_temp
 output
 *.DS_Store
-
-
-### JupyterNotebook ###
 *.ipynb
 .ipynb_checkpoints
 */.ipynb_checkpoints/*
-
-
-### VisualStudioCode ###
-.vscode/*
-.vscode/settings.json
-.vscode/tasks.json
-.vscode/launch.json
-.vscode/extensions.json
-
-### VisualStudioCode Patch ###
-# Ignore all local history of files
-.history
\ No newline at end of file
diff --git a/bob/ip/binseg/__init__.py b/bob/ip/binseg/__init__.py
index 8e48dba2..e69de29b 100644
--- a/bob/ip/binseg/__init__.py
+++ b/bob/ip/binseg/__init__.py
@@ -1,17 +0,0 @@
-# gets sphinx autodoc done right - don't remove it
-def __appropriate__(*args):
-  """Says object was actually declared here, an not on the import module.
-
-  Parameters:
-
-    *args: An iterable of objects to modify
-
-  Resolves `Sphinx referencing issues
-  <https://github.com/sphinx-doc/sphinx/issues/3048>`
-  """
-
-  for obj in args: obj.__module__ = __name__
-
-__appropriate__()
-
-__all__ = [_ for _ in dir() if not _.startswith('_')]
diff --git a/bob/ip/binseg/configs/models/driu.py b/bob/ip/binseg/configs/models/driu.py
index 0e8fa132..16ec3fca 100644
--- a/bob/ip/binseg/configs/models/driu.py
+++ b/bob/ip/binseg/configs/models/driu.py
@@ -1,5 +1,5 @@
 #!/usr/bin/env python
-# -*- coding: utf-8 -*-
+# coding=utf-8
 
 from torch.optim.lr_scheduler import MultiStepLR
 from bob.ip.binseg.modeling.driu import build_driu
@@ -22,17 +22,22 @@ amsbound = False
 scheduler_milestones = [900]
 scheduler_gamma = 0.1
 
-# model
 model = build_driu()
 
-# pretrained backbone
-pretrained_backbone = modelurls['vgg16']
+pretrained_backbone = modelurls["vgg16"]
 
-# optimizer
-optimizer = AdaBound(model.parameters(), lr=lr, betas=betas, final_lr=final_lr, gamma=gamma,
-                 eps=eps, weight_decay=weight_decay, amsbound=amsbound) 
-# criterion
+optimizer = AdaBound(
+    model.parameters(),
+    lr=lr,
+    betas=betas,
+    final_lr=final_lr,
+    gamma=gamma,
+    eps=eps,
+    weight_decay=weight_decay,
+    amsbound=amsbound,
+)
 criterion = SoftJaccardBCELogitsLoss(alpha=0.7)
 
-# scheduler
-scheduler = MultiStepLR(optimizer, milestones=scheduler_milestones, gamma=scheduler_gamma)
+scheduler = MultiStepLR(
+    optimizer, milestones=scheduler_milestones, gamma=scheduler_gamma
+)
diff --git a/bob/ip/binseg/data/transforms.py b/bob/ip/binseg/data/transforms.py
index b97fe795..659a090e 100644
--- a/bob/ip/binseg/data/transforms.py
+++ b/bob/ip/binseg/data/transforms.py
@@ -1,6 +1,14 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 
+"""
+Image transformations for our pipelines.
+
+All transforms work with :py:class:`PIL.Image.Image` objects. We make heavy use
+of `torchvision <https://github.com/pytorch/vision>`_.
+"""
+
+
 import torchvision.transforms.functional as VF
 import random
 import PIL
diff --git a/bob/ip/binseg/engine/adabound.py b/bob/ip/binseg/engine/adabound.py
index e220db58..9e658b32 100644
--- a/bob/ip/binseg/engine/adabound.py
+++ b/bob/ip/binseg/engine/adabound.py
@@ -1,46 +1,61 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 
-""" 
-https://github.com/Luolc/AdaBound/blob/master/adabound/adabound.py
-
-@inproceedings{Luo2019AdaBound,
-  author = {Luo, Liangchen and Xiong, Yuanhao and Liu, Yan and Sun, Xu},
-  title = {Adaptive Gradient Methods with Dynamic Bound of Learning Rate},
-  booktitle = {Proceedings of the 7th International Conference on Learning Representations},
-  month = {May},
-  year = {2019},
-  address = {New Orleans, Louisiana}
-}
 """
+Implementation of the `AdaBound optimizer
+<https://github.com/Luolc/AdaBound/blob/master/adabound/adabound.py>`::
+
+    @inproceedings{Luo2019AdaBound,
+      author = {Luo, Liangchen and Xiong, Yuanhao and Liu, Yan and Sun, Xu},
+      title = {Adaptive Gradient Methods with Dynamic Bound of Learning Rate},
+      booktitle = {Proceedings of the 7th International Conference on Learning Representations},
+      month = {May},
+      year = {2019},
+      address = {New Orleans, Louisiana}
+    }
+
+"""
+
 import math
 import torch
-from torch.optim import Optimizer
+import torch.optim
 
 
-class AdaBound(Optimizer):
-    """Implements AdaBound algorithm.
-    It has been proposed in `Adaptive Gradient Methods with Dynamic Bound of Learning Rate`_.
-    
+class AdaBound(torch.optim.Optimizer):
+    """Implements the AdaBound algorithm.
+
     Parameters
     ----------
-    params (iterable): iterable of parameters to optimize or dicts defining
-        parameter groups
-    lr (float, optional): Adam learning rate (default: 1e-3)
-    betas (Tuple[float, float], optional): coefficients used for computing
-        running averages of gradient and its square (default: (0.9, 0.999))
-    final_lr (float, optional): final (SGD) learning rate (default: 0.1)
-    gamma (float, optional): convergence speed of the bound functions (default: 1e-3)
-    eps (float, optional): term added to the denominator to improve
-        numerical stability (default: 1e-8)
-    weight_decay (float, optional): weight decay (L2 penalty) (default: 0)
-    amsbound (boolean, optional): whether to use the AMSBound variant of this algorithm
-    .. Adaptive Gradient Methods with Dynamic Bound of Learning Rate:
-        https://openreview.net/forum?id=Bkg3g2R9FX
+
+    params : list
+        Iterable of parameters to optimize or dicts defining parameter groups
+
+    lr : :obj:`float`, optional
+        Adam learning rate
+
+    betas : :obj:`tuple`, optional
+        Coefficients (as a 2-tuple of floats) used for computing running
+        averages of gradient and its square
+
+    final_lr : :obj:`float`, optional
+        Final (SGD) learning rate
+
+    gamma : :obj:`float`, optional
+        Convergence speed of the bound functions
+
+    eps : :obj:`float`, optional
+        Term added to the denominator to improve numerical stability
+
+    weight_decay : :obj:`float`, optional
+        Weight decay (L2 penalty)
+
+    amsbound : :obj:`bool`, optional
+        Whether to use the AMSBound variant of this algorithm
+
     """
 
-    def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), final_lr=0.1, gamma=1e-3,
-                 eps=1e-8, weight_decay=0, amsbound=False):
+    def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), final_lr=0.1,
+            gamma=1e-3, eps=1e-8, weight_decay=0, amsbound=False):
         if not 0.0 <= lr:
             raise ValueError("Invalid learning rate: {}".format(lr))
         if not 0.0 <= eps:
@@ -66,10 +81,13 @@ class AdaBound(Optimizer):
 
     def step(self, closure=None):
         """Performs a single optimization step.
-        
+
         Parameters
         ----------
-        closure (callable, optional): A closure that reevaluates the model and returns the loss.
+
+        closure : :obj:`callable`, optional
+            A closure that reevaluates the model and returns the loss.
+
         """
         loss = None
         if closure is not None:
@@ -135,29 +153,43 @@ class AdaBound(Optimizer):
 
         return loss
 
-class AdaBoundW(Optimizer):
-    """Implements AdaBound algorithm with Decoupled Weight Decay (arxiv.org/abs/1711.05101)
-    It has been proposed in `Adaptive Gradient Methods with Dynamic Bound of Learning Rate`_.
-    
+class AdaBoundW(torch.optim.Optimizer):
+    """Implements AdaBound algorithm with Decoupled Weight Decay
+    (See https://arxiv.org/abs/1711.05101)
+
     Parameters
     ----------
-    params (iterable): iterable of parameters to optimize or dicts defining
-        parameter groups
-    lr (float, optional): Adam learning rate (default: 1e-3)
-    betas (Tuple[float, float], optional): coefficients used for computing
-        running averages of gradient and its square (default: (0.9, 0.999))
-    final_lr (float, optional): final (SGD) learning rate (default: 0.1)
-    gamma (float, optional): convergence speed of the bound functions (default: 1e-3)
-    eps (float, optional): term added to the denominator to improve
-        numerical stability (default: 1e-8)
-    weight_decay (float, optional): weight decay (L2 penalty) (default: 0)
-    amsbound (boolean, optional): whether to use the AMSBound variant of this algorithm
-    .. Adaptive Gradient Methods with Dynamic Bound of Learning Rate:
-        https://openreview.net/forum?id=Bkg3g2R9FX
+
+    params : list
+        Iterable of parameters to optimize or dicts defining parameter groups
+
+    lr : :obj:`float`, optional
+        Adam learning rate
+
+    betas : :obj:`tuple`, optional
+        Coefficients (as a 2-tuple of floats) used for computing running
+        averages of gradient and its square
+
+    final_lr : :obj:`float`, optional
+        Final (SGD) learning rate
+
+    gamma : :obj:`float`, optional
+        Convergence speed of the bound functions
+
+    eps : :obj:`float`, optional
+        Term added to the denominator to improve numerical stability
+
+    weight_decay : :obj:`float`, optional
+        Weight decay (L2 penalty)
+
+    amsbound : :obj:`bool`, optional
+        Whether to use the AMSBound variant of this algorithm
+
     """
 
-    def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), final_lr=0.1, gamma=1e-3,
-                 eps=1e-8, weight_decay=0, amsbound=False):
+    def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), final_lr=0.1,
+            gamma=1e-3, eps=1e-8, weight_decay=0, amsbound=False):
+
         if not 0.0 <= lr:
             raise ValueError("Invalid learning rate: {}".format(lr))
         if not 0.0 <= eps:
@@ -170,8 +202,8 @@ class AdaBoundW(Optimizer):
             raise ValueError("Invalid final learning rate: {}".format(final_lr))
         if not 0.0 <= gamma < 1.0:
             raise ValueError("Invalid gamma parameter: {}".format(gamma))
-        defaults = dict(lr=lr, betas=betas, final_lr=final_lr, gamma=gamma, eps=eps,
-                        weight_decay=weight_decay, amsbound=amsbound)
+        defaults = dict(lr=lr, betas=betas, final_lr=final_lr, gamma=gamma,
+                eps=eps, weight_decay=weight_decay, amsbound=amsbound)
         super(AdaBoundW, self).__init__(params, defaults)
 
         self.base_lrs = list(map(lambda group: group['lr'], self.param_groups))
@@ -183,11 +215,15 @@ class AdaBoundW(Optimizer):
 
     def step(self, closure=None):
         """Performs a single optimization step.
-        
+
         Parameters
         ----------
-        closure (callable, optional): A closure that reevaluates the model and returns the loss.
+
+        closure : :obj:`callable`, optional
+            A closure that reevaluates the model and returns the loss.
+
         """
+
         loss = None
         if closure is not None:
             loss = closure()
@@ -238,7 +274,8 @@ class AdaBoundW(Optimizer):
                 step_size = group['lr'] * math.sqrt(bias_correction2) / bias_correction1
 
                 # Applies bounds on actual learning rate
-                # lr_scheduler cannot affect final_lr, this is a workaround to apply lr decay
+                # lr_scheduler cannot affect final_lr, this is a workaround to
+                # apply lr decay
                 final_lr = group['final_lr'] * group['lr'] / base_lr
                 lower_bound = final_lr * (1 - 1 / (group['gamma'] * state['step'] + 1))
                 upper_bound = final_lr * (1 + 1 / (group['gamma'] * state['step']))
@@ -252,4 +289,4 @@ class AdaBoundW(Optimizer):
                 else:
                     p.data.add_(-step_size)
 
-        return loss
\ No newline at end of file
+        return loss
diff --git a/bob/ip/binseg/engine/ssltrainer.py b/bob/ip/binseg/engine/ssltrainer.py
index 38243117..dfc73c86 100644
--- a/bob/ip/binseg/engine/ssltrainer.py
+++ b/bob/ip/binseg/engine/ssltrainer.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 
-import os 
+import os
 import logging
 import time
 import datetime
@@ -19,32 +19,39 @@ def sharpen(x, T):
 
 def mix_up(alpha, input, target, unlabeled_input, unlabled_target):
     """Applies mix up as described in [MIXMATCH_19].
-    
+
     Parameters
     ----------
     alpha : float
+
     input : :py:class:`torch.Tensor`
+
     target : :py:class:`torch.Tensor`
+
     unlabeled_input : :py:class:`torch.Tensor`
+
     unlabled_target : :py:class:`torch.Tensor`
-    
+
+
     Returns
     -------
+
     list
+
     """
-    # TODO: 
+    # TODO:
     with torch.no_grad():
         l = np.random.beta(alpha, alpha) # Eq (8)
         l = max(l, 1 - l) # Eq (9)
         # Shuffle and concat. Alg. 1 Line: 12
         w_inputs = torch.cat([input,unlabeled_input],0)
         w_targets = torch.cat([target,unlabled_target],0)
-        idx = torch.randperm(w_inputs.size(0)) # get random index 
-        
+        idx = torch.randperm(w_inputs.size(0)) # get random index
+
         # Apply MixUp to labeled data and entries from W. Alg. 1 Line: 13
-        input_mixedup = l * input + (1 - l) * w_inputs[idx[len(input):]] 
+        input_mixedup = l * input + (1 - l) * w_inputs[idx[len(input):]]
         target_mixedup = l * target + (1 - l) * w_targets[idx[len(target):]]
-        
+
         # Apply MixUp to unlabeled data and entries from W. Alg. 1 Line: 14
         unlabeled_input_mixedup = l * unlabeled_input + (1 - l) * w_inputs[idx[:len(unlabeled_input)]]
         unlabled_target_mixedup =  l * unlabled_target + (1 - l) * w_targets[idx[:len(unlabled_target)]]
@@ -53,19 +60,23 @@ def mix_up(alpha, input, target, unlabeled_input, unlabled_target):
 
 def square_rampup(current, rampup_length=16):
     """slowly ramp-up ``lambda_u``
-    
+
     Parameters
     ----------
+
     current : int
         current epoch
-    rampup_length : int, optional
+
+    rampup_length : :obj:`int`, optional
         how long to ramp up, by default 16
-    
+
     Returns
     -------
-    float
+
+    factor : float
         ramp up factor
     """
+
     if rampup_length == 0:
         return 1.0
     else:
@@ -74,18 +85,21 @@ def square_rampup(current, rampup_length=16):
 
 def linear_rampup(current, rampup_length=16):
     """slowly ramp-up ``lambda_u``
-    
+
     Parameters
     ----------
     current : int
         current epoch
-    rampup_length : int, optional
+
+    rampup_length : :obj:`int`, optional
         how long to ramp up, by default 16
-    
+
     Returns
     -------
-    float
+
+    factor: float
         ramp up factor
+
     """
     if rampup_length == 0:
         return 1.0
@@ -96,16 +110,21 @@ def linear_rampup(current, rampup_length=16):
 def guess_labels(unlabeled_images, model):
     """
     Calculate the average predictions by 2 augmentations: horizontal and vertical flips
+
     Parameters
     ----------
+
     unlabeled_images : :py:class:`torch.Tensor`
-        shape: ``[n,c,h,w]``
+        ``[n,c,h,w]``
+
     target : :py:class:`torch.Tensor`
-    
+
     Returns
     -------
-    :py:class:`torch.Tensor`
-        shape: ``[n,c,h,w]``.
+
+    shape : :py:class:`torch.Tensor`
+        ``[n,c,h,w]``
+
     """
     with torch.no_grad():
         guess1 = torch.sigmoid(model(unlabeled_images)).unsqueeze(0)
@@ -133,31 +152,43 @@ def do_ssltrain(
     output_folder,
     rampup_length
 ):
-    """ 
+    """
     Train model and save to disk.
-    
+
     Parameters
     ----------
-    model : :py:class:`torch.nn.Module` 
+
+    model : :py:class:`torch.nn.Module`
         Network (e.g. DRIU, HED, UNet)
+
     data_loader : :py:class:`torch.utils.data.DataLoader`
+
     optimizer : :py:mod:`torch.optim`
+
     criterion : :py:class:`torch.nn.modules.loss._Loss`
         loss function
+
     scheduler : :py:mod:`torch.optim`
         learning rate scheduler
+
     checkpointer : :py:class:`bob.ip.binseg.utils.checkpointer.DetectronCheckpointer`
         checkpointer
+
     checkpoint_period : int
         save a checkpoint every n epochs
-    device : str  
+
+    device : str
         device to use ``'cpu'`` or ``'cuda'``
+
     arguments : dict
         start end end epochs
-    output_folder : str 
+
+    output_folder : str
         output path
-    rampup_Length : int
+
+    rampup_length : int
         rampup epochs
+
     """
     logger = logging.getLogger("bob.ip.binseg.engine.trainer")
     logger.info("Start training")
@@ -181,7 +212,7 @@ def do_ssltrain(
             unlabeled_loss = SmoothedValue(len(data_loader))
             epoch = epoch + 1
             arguments["epoch"] = epoch
-            
+
             # Epoch time
             start_epoch_time = time.time()
 
@@ -238,8 +269,8 @@ def do_ssltrain(
                     lr=optimizer.param_groups[0]["lr"],
                     memory = (torch.cuda.max_memory_allocated() / 1024.0 / 1024.0) if torch.cuda.is_available() else .0,
                     )
-                )  
-            logger.info(("eta: {eta}, " 
+                )
+            logger.info(("eta: {eta}, "
                         "epoch: {epoch}, "
                         "avg. loss: {avg_loss:.6f}, "
                         "median loss: {median_loss:.6f}, "
@@ -266,10 +297,10 @@ def do_ssltrain(
             "Total training time: {} ({:.4f} s / epoch)".format(
                 total_time_str, total_training_time / (max_epoch)
             ))
-        
+
     log_plot_file = os.path.join(output_folder,"{}_trainlog.pdf".format(model.name))
     logdf = pd.read_csv(os.path.join(output_folder,"{}_trainlog.csv".format(model.name)),header=None, names=["avg. loss", "median loss", "labeled loss", "unlabeled loss", "lr","max memory"])
     fig = loss_curve(logdf,output_folder)
     logger.info("saving {}".format(log_plot_file))
     fig.savefig(log_plot_file)
-  
\ No newline at end of file
+
diff --git a/bob/ip/binseg/modeling/backbones/mobilenetv2.py b/bob/ip/binseg/modeling/backbones/mobilenetv2.py
index 9f1ae8f5..5d87f496 100644
--- a/bob/ip/binseg/modeling/backbones/mobilenetv2.py
+++ b/bob/ip/binseg/modeling/backbones/mobilenetv2.py
@@ -1,30 +1,30 @@
 #!/usr/bin/env python
 # vim: set fileencoding=utf-8 :
 
-# Adopted from https://github.com/tonylins/pytorch-mobilenet-v2/ by @tonylins 
+# Adopted from https://github.com/tonylins/pytorch-mobilenet-v2/ by @tonylins
 # Ji Lin under Apache License 2.0
 
-import torch.nn as nn
+import torch.nn
 import math
 
 
 def conv_bn(inp, oup, stride):
-    return nn.Sequential(
-        nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
-        nn.BatchNorm2d(oup),
-        nn.ReLU6(inplace=True)
+    return torch.nn.Sequential(
+        torch.nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
+        torch.nn.BatchNorm2d(oup),
+        torch.nn.ReLU6(inplace=True)
     )
 
 
 def conv_1x1_bn(inp, oup):
-    return nn.Sequential(
-        nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
-        nn.BatchNorm2d(oup),
-        nn.ReLU6(inplace=True)
+    return torch.nn.Sequential(
+        torch.nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
+        torch.nn.BatchNorm2d(oup),
+        torch.nn.ReLU6(inplace=True)
     )
 
 
-class InvertedResidual(nn.Module):
+class InvertedResidual(torch.nn.Module):
     def __init__(self, inp, oup, stride, expand_ratio):
         super(InvertedResidual, self).__init__()
         self.stride = stride
@@ -34,28 +34,28 @@ class InvertedResidual(nn.Module):
         self.use_res_connect = self.stride == 1 and inp == oup
 
         if expand_ratio == 1:
-            self.conv = nn.Sequential(
+            self.conv = torch.nn.Sequential(
                 # dw
-                nn.Conv2d(hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, bias=False),
-                nn.BatchNorm2d(hidden_dim),
-                nn.ReLU6(inplace=True),
+                torch.nn.Conv2d(hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, bias=False),
+                torch.nn.BatchNorm2d(hidden_dim),
+                torch.nn.ReLU6(inplace=True),
                 # pw-linear
-                nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
-                nn.BatchNorm2d(oup),
+                torch.nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
+                torch.nn.BatchNorm2d(oup),
             )
         else:
-            self.conv = nn.Sequential(
+            self.conv = torch.nn.Sequential(
                 # pw
-                nn.Conv2d(inp, hidden_dim, 1, 1, 0, bias=False),
-                nn.BatchNorm2d(hidden_dim),
-                nn.ReLU6(inplace=True),
+                torch.nn.Conv2d(inp, hidden_dim, 1, 1, 0, bias=False),
+                torch.nn.BatchNorm2d(hidden_dim),
+                torch.nn.ReLU6(inplace=True),
                 # dw
-                nn.Conv2d(hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, bias=False),
-                nn.BatchNorm2d(hidden_dim),
-                nn.ReLU6(inplace=True),
+                torch.nn.Conv2d(hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, bias=False),
+                torch.nn.BatchNorm2d(hidden_dim),
+                torch.nn.ReLU6(inplace=True),
                 # pw-linear
-                nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
-                nn.BatchNorm2d(oup),
+                torch.nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
+                torch.nn.BatchNorm2d(oup),
             )
 
     def forward(self, x):
@@ -65,11 +65,11 @@ class InvertedResidual(nn.Module):
             return self.conv(x)
 
 
-class MobileNetV2(nn.Module):
+class MobileNetV2(torch.nn.Module):
     def __init__(self, n_class=1000, input_size=224, width_mult=1., return_features = None, m2u=True):
         super(MobileNetV2, self).__init__()
-        self.return_features = return_features 
-        self.m2u = m2u 
+        self.return_features = return_features
+        self.m2u = m2u
         block = InvertedResidual
         input_channel = 32
         last_channel = 1280
@@ -100,13 +100,13 @@ class MobileNetV2(nn.Module):
                 input_channel = output_channel
         # building last several layers
         #self.features.append(conv_1x1_bn(input_channel, self.last_channel))
-        # make it nn.Sequential
-        self.features = nn.Sequential(*self.features)
+        # make it torch.nn.Sequential
+        self.features = torch.nn.Sequential(*self.features)
 
         # building classifier
-        #self.classifier = nn.Sequential(
-        #    nn.Dropout(0.2),
-        #    nn.Linear(self.last_channel, n_class),
+        #self.classifier = torch.nn.Sequential(
+        #    torch.nn.Dropout(0.2),
+        #    torch.nn.Linear(self.last_channel, n_class),
         #)
 
         self._initialize_weights()
@@ -126,15 +126,15 @@ class MobileNetV2(nn.Module):
 
     def _initialize_weights(self):
         for m in self.modules():
-            if isinstance(m, nn.Conv2d):
+            if isinstance(m, torch.nn.Conv2d):
                 n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                 m.weight.data.normal_(0, math.sqrt(2. / n))
                 if m.bias is not None:
                     m.bias.data.zero_()
-            elif isinstance(m, nn.BatchNorm2d):
+            elif isinstance(m, torch.nn.BatchNorm2d):
                 m.weight.data.fill_(1)
                 m.bias.data.zero_()
-            elif isinstance(m, nn.Linear):
+            elif isinstance(m, torch.nn.Linear):
                 n = m.weight.size(1)
                 m.weight.data.normal_(0, 0.01)
-                m.bias.data.zero_()
\ No newline at end of file
+                m.bias.data.zero_()
diff --git a/bob/ip/binseg/modeling/backbones/resnet.py b/bob/ip/binseg/modeling/backbones/resnet.py
index 5881652e..285a5a15 100644
--- a/bob/ip/binseg/modeling/backbones/resnet.py
+++ b/bob/ip/binseg/modeling/backbones/resnet.py
@@ -1,44 +1,48 @@
-# Adapted from https://github.com/pytorch/vision/blob/master/torchvision/models/resnet.py 
+# Adapted from https://github.com/pytorch/vision/blob/master/torchvision/models/resnet.py
 # resnet50_trained_on_SIN_and_IN_then_finetuned_on_IN : https://github.com/rgeirhos/texture-vs-shap
 
 import torch.nn as nn
 import torch.utils.model_zoo as model_zoo
 
 
-__all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101',
-           'resnet152']
-
-
 model_urls = {
-    'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
-    'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
-    'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
-    'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
-    'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
-    'resnet50_trained_on_SIN_and_IN_then_finetuned_on_IN': 'https://bitbucket.org/robert_geirhos/texture-vs-shape-pretrained-models/raw/60b770e128fffcbd8562a3ab3546c1a735432d03/resnet50_finetune_60_epochs_lr_decay_after_30_start_resnet50_train_45_epochs_combined_IN_SF-ca06340c.pth.tar',
+    "resnet18": "https://download.pytorch.org/models/resnet18-5c106cde.pth",
+    "resnet34": "https://download.pytorch.org/models/resnet34-333f7ec4.pth",
+    "resnet50": "https://download.pytorch.org/models/resnet50-19c8e357.pth",
+    "resnet101": "https://download.pytorch.org/models/resnet101-5d3b4d8f.pth",
+    "resnet152": "https://download.pytorch.org/models/resnet152-b121ed2d.pth",
+    "resnet50_trained_on_SIN_and_IN_then_finetuned_on_IN": "https://bitbucket.org/robert_geirhos/texture-vs-shape-pretrained-models/raw/60b770e128fffcbd8562a3ab3546c1a735432d03/resnet50_finetune_60_epochs_lr_decay_after_30_start_resnet50_train_45_epochs_combined_IN_SF-ca06340c.pth.tar",
 }
 
 
-def conv3x3(in_planes, out_planes, stride=1):
+def _conv3x3(in_planes, out_planes, stride=1):
     """3x3 convolution with padding"""
-    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
-                     padding=1, bias=False)
+    return nn.Conv2d(
+        in_planes,
+        out_planes,
+        kernel_size=3,
+        stride=stride,
+        padding=1,
+        bias=False,
+    )
 
 
-def conv1x1(in_planes, out_planes, stride=1):
+def _conv1x1(in_planes, out_planes, stride=1):
     """1x1 convolution"""
-    return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
+    return nn.Conv2d(
+        in_planes, out_planes, kernel_size=1, stride=stride, bias=False
+    )
 
 
-class BasicBlock(nn.Module):
+class _BasicBlock(nn.Module):
     expansion = 1
 
     def __init__(self, inplanes, planes, stride=1, downsample=None):
-        super(BasicBlock, self).__init__()
-        self.conv1 = conv3x3(inplanes, planes, stride)
+        super(_BasicBlock, self).__init__()
+        self.conv1 = _conv3x3(inplanes, planes, stride)
         self.bn1 = nn.BatchNorm2d(planes)
         self.relu = nn.ReLU(inplace=True)
-        self.conv2 = conv3x3(planes, planes)
+        self.conv2 = _conv3x3(planes, planes)
         self.bn2 = nn.BatchNorm2d(planes)
         self.downsample = downsample
         self.stride = stride
@@ -62,16 +66,16 @@ class BasicBlock(nn.Module):
         return out
 
 
-class Bottleneck(nn.Module):
+class _Bottleneck(nn.Module):
     expansion = 4
 
     def __init__(self, inplanes, planes, stride=1, downsample=None):
-        super(Bottleneck, self).__init__()
-        self.conv1 = conv1x1(inplanes, planes)
+        super(_Bottleneck, self).__init__()
+        self.conv1 = _conv1x1(inplanes, planes)
         self.bn1 = nn.BatchNorm2d(planes)
-        self.conv2 = conv3x3(planes, planes, stride)
+        self.conv2 = _conv3x3(planes, planes, stride)
         self.bn2 = nn.BatchNorm2d(planes)
-        self.conv3 = conv1x1(planes, planes * self.expansion)
+        self.conv3 = _conv1x1(planes, planes * self.expansion)
         self.bn3 = nn.BatchNorm2d(planes * self.expansion)
         self.relu = nn.ReLU(inplace=True)
         self.downsample = downsample
@@ -101,20 +105,22 @@ class Bottleneck(nn.Module):
 
 
 class ResNet(nn.Module):
-
-    def __init__(self, block, layers, return_features, zero_init_residual=False):
+    def __init__(
+        self, block, layers, return_features, zero_init_residual=False
+    ):
         """
         Generic ResNet network with layer return.
         Attributes
         ----------
         return_features: list of length 5
-            layers to return. 
+            layers to return.
         """
         super(ResNet, self).__init__()
         self.inplanes = 64
         self.return_features = return_features
-        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
-                               bias=False)
+        self.conv1 = nn.Conv2d(
+            3, 64, kernel_size=7, stride=2, padding=3, bias=False
+        )
         self.bn1 = nn.BatchNorm2d(64)
         self.relu = nn.ReLU(inplace=True)
         self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
@@ -123,13 +129,22 @@ class ResNet(nn.Module):
         self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
         self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
 
-        
-        self.features = [self.conv1, self.bn1, self.relu, self.maxpool
-                                    ,self.layer1,self.layer2,self.layer3,self.layer4]
+        self.features = [
+            self.conv1,
+            self.bn1,
+            self.relu,
+            self.maxpool,
+            self.layer1,
+            self.layer2,
+            self.layer3,
+            self.layer4,
+        ]
 
         for m in self.modules():
             if isinstance(m, nn.Conv2d):
-                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
+                nn.init.kaiming_normal_(
+                    m.weight, mode="fan_out", nonlinearity="relu"
+                )
             elif isinstance(m, nn.BatchNorm2d):
                 nn.init.constant_(m.weight, 1)
                 nn.init.constant_(m.bias, 0)
@@ -139,16 +154,16 @@ class ResNet(nn.Module):
         # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
         if zero_init_residual:
             for m in self.modules():
-                if isinstance(m, Bottleneck):
+                if isinstance(m, _Bottleneck):
                     nn.init.constant_(m.bn3.weight, 0)
-                elif isinstance(m, BasicBlock):
+                elif isinstance(m, _BasicBlock):
                     nn.init.constant_(m.bn2.weight, 0)
 
     def _make_layer(self, block, planes, blocks, stride=1):
         downsample = None
         if stride != 1 or self.inplanes != planes * block.expansion:
             downsample = nn.Sequential(
-                conv1x1(self.inplanes, planes * block.expansion, stride),
+                _conv1x1(self.inplanes, planes * block.expansion, stride),
                 nn.BatchNorm2d(planes * block.expansion),
             )
 
@@ -164,7 +179,7 @@ class ResNet(nn.Module):
         outputs = []
         # hw of input, needed for DRIU and HED
         outputs.append(x.shape[2:4])
-        for index,m in enumerate(self.features):
+        for index, m in enumerate(self.features):
             x = m(x)
             # extract layers
             if index in self.return_features:
@@ -177,9 +192,9 @@ def resnet18(pretrained=False, **kwargs):
     Args:
         pretrained (bool): If True, returns a model pre-trained on ImageNet
     """
-    model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs)
+    model = ResNet(_BasicBlock, [2, 2, 2, 2], **kwargs)
     if pretrained:
-        model.load_state_dict(model_zoo.load_url(model_urls['resnet18']))
+        model.load_state_dict(model_zoo.load_url(model_urls["resnet18"]))
     return model
 
 
@@ -188,9 +203,9 @@ def resnet34(pretrained=False, **kwargs):
     Args:
         pretrained (bool): If True, returns a model pre-trained on ImageNet
     """
-    model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs)
+    model = ResNet(_BasicBlock, [3, 4, 6, 3], **kwargs)
     if pretrained:
-        model.load_state_dict(model_zoo.load_url(model_urls['resnet34']))
+        model.load_state_dict(model_zoo.load_url(model_urls["resnet34"]))
     return model
 
 
@@ -199,37 +214,46 @@ def resnet50(pretrained=False, **kwargs):
     Args:
         pretrained (bool): If True, returns a model pre-trained on ImageNet
     """
-    model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs)
+    model = ResNet(_Bottleneck, [3, 4, 6, 3], **kwargs)
     if pretrained:
-        model.load_state_dict(model_zoo.load_url(model_urls['resnet50']))
+        model.load_state_dict(model_zoo.load_url(model_urls["resnet50"]))
     return model
 
+
 def shaperesnet50(pretrained=False, **kwargs):
     """Constructs a ResNet-50 model, pretrained on Stylized-ImageNe and ImageNet and fine-tuned on ImageNet.
     Args:
         pretrained (bool): If True, returns a model pre-trained on ImageNet
     """
-    model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs)
+    model = ResNet(_Bottleneck, [3, 4, 6, 3], **kwargs)
     if pretrained:
-        model.load_state_dict(model_zoo.load_url(model_urls['resnet50_trained_on_SIN_and_IN_then_finetuned_on_IN']))
+        model.load_state_dict(
+            model_zoo.load_url(
+                model_urls[
+                    "resnet50_trained_on_SIN_and_IN_then_finetuned_on_IN"
+                ]
+            )
+        )
     return model
 
+
 def resnet101(pretrained=False, **kwargs):
     """Constructs a ResNet-101 model.
     Args:
         pretrained (bool): If True, returns a model pre-trained on ImageNet
     """
-    model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs)
+    model = ResNet(_Bottleneck, [3, 4, 23, 3], **kwargs)
     if pretrained:
-        model.load_state_dict(model_zoo.load_url(model_urls['resnet101']))
+        model.load_state_dict(model_zoo.load_url(model_urls["resnet101"]))
     return model
 
+
 def resnet152(pretrained=False, **kwargs):
     """Constructs a ResNet-152 model.
     Args:
         pretrained (bool): If True, returns a model pre-trained on ImageNet
     """
-    model = ResNet(Bottleneck, [3, 8, 36, 3], **kwargs)
+    model = ResNet(_Bottleneck, [3, 8, 36, 3], **kwargs)
     if pretrained:
-        model.load_state_dict(model_zoo.load_url(model_urls['resnet152']))
-    return model
\ No newline at end of file
+        model.load_state_dict(model_zoo.load_url(model_urls["resnet152"]))
+    return model
diff --git a/bob/ip/binseg/modeling/backbones/vgg.py b/bob/ip/binseg/modeling/backbones/vgg.py
index 85a37580..7736a4c1 100644
--- a/bob/ip/binseg/modeling/backbones/vgg.py
+++ b/bob/ip/binseg/modeling/backbones/vgg.py
@@ -1,18 +1,12 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 
-# Adapted from https://github.com/pytorch/vision/blob/master/torchvision/models/vgg.py 
+# Adapted from https://github.com/pytorch/vision/blob/master/torchvision/models/vgg.py
 
 import torch.nn as nn
 import torch.utils.model_zoo as model_zoo
 
 
-__all__ = [
-    'VGG', 'vgg11', 'vgg11_bn', 'vgg13', 'vgg13_bn', 'vgg16', 'vgg16_bn',
-    'vgg19_bn', 'vgg19',
-]
-
-
 model_urls = {
     'vgg11': 'https://download.pytorch.org/models/vgg11-bbd30ac9.pth',
     'vgg13': 'https://download.pytorch.org/models/vgg13-c768596a.pth',
@@ -59,7 +53,7 @@ class VGG(nn.Module):
                 nn.init.constant_(m.bias, 0)
 
 
-def make_layers(cfg, batch_norm=False):
+def _make_layers(cfg, batch_norm=False):
     layers = []
     in_channels = 3
     for v in cfg:
@@ -75,7 +69,7 @@ def make_layers(cfg, batch_norm=False):
     return nn.Sequential(*layers)
 
 
-cfg = {
+_cfg = {
     'A': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
     'B': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
     'D': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
@@ -90,7 +84,7 @@ def vgg11(pretrained=False, **kwargs):
     """
     if pretrained:
         kwargs['init_weights'] = False
-    model = VGG(make_layers(cfg['A']), **kwargs)
+    model = VGG(_make_layers(_cfg['A']), **kwargs)
     if pretrained:
         model.load_state_dict(model_zoo.load_url(model_urls['vgg11']))
     return model
@@ -103,7 +97,7 @@ def vgg11_bn(pretrained=False, **kwargs):
     """
     if pretrained:
         kwargs['init_weights'] = False
-    model = VGG(make_layers(cfg['A'], batch_norm=True), **kwargs)
+    model = VGG(_make_layers(_cfg['A'], batch_norm=True), **kwargs)
     if pretrained:
         model.load_state_dict(model_zoo.load_url(model_urls['vgg11_bn']))
     return model
@@ -116,7 +110,7 @@ def vgg13(pretrained=False, **kwargs):
     """
     if pretrained:
         kwargs['init_weights'] = False
-    model = VGG(make_layers(cfg['B']), **kwargs)
+    model = VGG(_make_layers(_cfg['B']), **kwargs)
     if pretrained:
         model.load_state_dict(model_zoo.load_url(model_urls['vgg13']))
     return model
@@ -129,7 +123,7 @@ def vgg13_bn(pretrained=False, **kwargs):
     """
     if pretrained:
         kwargs['init_weights'] = False
-    model = VGG(make_layers(cfg['B'], batch_norm=True), **kwargs)
+    model = VGG(_make_layers(_cfg['B'], batch_norm=True), **kwargs)
     if pretrained:
         model.load_state_dict(model_zoo.load_url(model_urls['vgg13_bn']))
     return model
@@ -142,7 +136,7 @@ def vgg16(pretrained=False, **kwargs):
     """
     if pretrained:
         kwargs['init_weights'] = False
-    model = VGG(make_layers(cfg['D']), **kwargs)
+    model = VGG(_make_layers(_cfg['D']), **kwargs)
     if pretrained:
         model.load_state_dict(model_zoo.load_url(model_urls['vgg16']),strict=False)
     return model
@@ -155,7 +149,7 @@ def vgg16_bn(pretrained=False, **kwargs):
     """
     if pretrained:
         kwargs['init_weights'] = False
-    model = VGG(make_layers(cfg['D'], batch_norm=True), **kwargs)
+    model = VGG(_make_layers(_cfg['D'], batch_norm=True), **kwargs)
     if pretrained:
         model.load_state_dict(model_zoo.load_url(model_urls['vgg16_bn']))
     return model
@@ -168,7 +162,7 @@ def vgg19(pretrained=False, **kwargs):
     """
     if pretrained:
         kwargs['init_weights'] = False
-    model = VGG(make_layers(cfg['E']), **kwargs)
+    model = VGG(_make_layers(_cfg['E']), **kwargs)
     if pretrained:
         model.load_state_dict(model_zoo.load_url(model_urls['vgg19']))
     return model
@@ -181,7 +175,7 @@ def vgg19_bn(pretrained=False, **kwargs):
     """
     if pretrained:
         kwargs['init_weights'] = False
-    model = VGG(make_layers(cfg['E'], batch_norm=True), **kwargs)
+    model = VGG(_make_layers(_cfg['E'], batch_norm=True), **kwargs)
     if pretrained:
         model.load_state_dict(model_zoo.load_url(model_urls['vgg19_bn']))
-    return model
\ No newline at end of file
+    return model
diff --git a/bob/ip/binseg/modeling/driu.py b/bob/ip/binseg/modeling/driu.py
index 466d4eb0..06454a17 100644
--- a/bob/ip/binseg/modeling/driu.py
+++ b/bob/ip/binseg/modeling/driu.py
@@ -2,79 +2,104 @@
 # -*- coding: utf-8 -*-
 
 import torch
-from torch import nn
+import torch.nn
 from collections import OrderedDict
 from bob.ip.binseg.modeling.backbones.vgg import vgg16
-from bob.ip.binseg.modeling.make_layers import conv_with_kaiming_uniform,convtrans_with_kaiming_uniform, UpsampleCropBlock
+from bob.ip.binseg.modeling.make_layers import (
+    conv_with_kaiming_uniform,
+    convtrans_with_kaiming_uniform,
+    UpsampleCropBlock,
+)
 
-class ConcatFuseBlock(nn.Module):
-    """ 
-    Takes in four feature maps with 16 channels each, concatenates them 
-    and applies a 1x1 convolution with 1 output channel. 
+
+class ConcatFuseBlock(torch.nn.Module):
+    """
+    Takes in four feature maps with 16 channels each, concatenates them
+    and applies a 1x1 convolution with 1 output channel.
     """
+
     def __init__(self):
         super().__init__()
-        self.conv = conv_with_kaiming_uniform(4*16,1,1,1,0)
-    
-    def forward(self,x1,x2,x3,x4):
-        x_cat = torch.cat([x1,x2,x3,x4],dim=1)
+        self.conv = conv_with_kaiming_uniform(4 * 16, 1, 1, 1, 0)
+
+    def forward(self, x1, x2, x3, x4):
+
+        x_cat = torch.cat([x1, x2, x3, x4], dim=1)
         x = self.conv(x_cat)
-        return x 
-            
-class DRIU(nn.Module):
+        return x
+
+
+class DRIU(torch.nn.Module):
     """
     DRIU head module
-    Based on paper by `Maninis et al. (2016)`_ 
+
+    Based on paper by [MANINIS-2016]_.
+
     Parameters
     ----------
     in_channels_list : list
         number of channels for each feature map that is returned from backbone
     """
+
     def __init__(self, in_channels_list=None):
         super(DRIU, self).__init__()
-        in_conv_1_2_16, in_upsample2, in_upsample_4, in_upsample_8 = in_channels_list
+        (
+            in_conv_1_2_16,
+            in_upsample2,
+            in_upsample_4,
+            in_upsample_8,
+        ) = in_channels_list
 
-        self.conv1_2_16 = nn.Conv2d(in_conv_1_2_16, 16, 3, 1, 1)
+        self.conv1_2_16 = torch.nn.Conv2d(in_conv_1_2_16, 16, 3, 1, 1)
         # Upsample layers
         self.upsample2 = UpsampleCropBlock(in_upsample2, 16, 4, 2, 0)
         self.upsample4 = UpsampleCropBlock(in_upsample_4, 16, 8, 4, 0)
         self.upsample8 = UpsampleCropBlock(in_upsample_8, 16, 16, 8, 0)
-        
+
         # Concat and Fuse
         self.concatfuse = ConcatFuseBlock()
 
-    def forward(self,x):
+    def forward(self, x):
         """
+
         Parameters
         ----------
+
         x : list
-            list of tensors as returned from the backbone network.
-            First element: height and width of input image. 
-            Remaining elements: feature maps for each feature level.
+            list of tensors as returned from the backbone network.  First
+            element: height and width of input image.  Remaining elements:
+            feature maps for each feature level.
 
         Returns
         -------
-        :py:class:`torch.Tensor`
+
+        tensor : :py:class:`torch.Tensor`
+
         """
         hw = x[0]
-        conv1_2_16 = self.conv1_2_16(x[1])  # conv1_2_16   
-        upsample2 = self.upsample2(x[2], hw) # side-multi2-up
-        upsample4 = self.upsample4(x[3], hw) # side-multi3-up
-        upsample8 = self.upsample8(x[4], hw) # side-multi4-up
+        conv1_2_16 = self.conv1_2_16(x[1])  # conv1_2_16
+        upsample2 = self.upsample2(x[2], hw)  # side-multi2-up
+        upsample4 = self.upsample4(x[3], hw)  # side-multi3-up
+        upsample8 = self.upsample8(x[4], hw)  # side-multi4-up
         out = self.concatfuse(conv1_2_16, upsample2, upsample4, upsample8)
         return out
 
+
 def build_driu():
-    """ 
+    """
     Adds backbone and head together
 
     Returns
     -------
-    :py:class:torch.nn.Module
+
+    module : :py:class:`torch.nn.Module`
+
     """
-    backbone = vgg16(pretrained=False, return_features = [3, 8, 14, 22])
+    backbone = vgg16(pretrained=False, return_features=[3, 8, 14, 22])
     driu_head = DRIU([64, 128, 256, 512])
 
-    model = nn.Sequential(OrderedDict([("backbone", backbone), ("head", driu_head)]))
+    model = torch.nn.Sequential(
+        OrderedDict([("backbone", backbone), ("head", driu_head)])
+    )
     model.name = "DRIU"
-    return model
\ No newline at end of file
+    return model
diff --git a/bob/ip/binseg/modeling/driubn.py b/bob/ip/binseg/modeling/driubn.py
index 6043fcd6..f9145011 100644
--- a/bob/ip/binseg/modeling/driubn.py
+++ b/bob/ip/binseg/modeling/driubn.py
@@ -2,31 +2,33 @@
 # -*- coding: utf-8 -*-
 
 import torch
-from torch import nn
+import torch.nn
 from collections import OrderedDict
 from bob.ip.binseg.modeling.backbones.vgg import vgg16_bn
 from bob.ip.binseg.modeling.make_layers import conv_with_kaiming_uniform,convtrans_with_kaiming_uniform, UpsampleCropBlock
 
-class ConcatFuseBlock(nn.Module):
-    """ 
-    Takes in four feature maps with 16 channels each, concatenates them 
-    and applies a 1x1 convolution with 1 output channel. 
+class ConcatFuseBlock(torch.nn.Module):
+    """
+    Takes in four feature maps with 16 channels each, concatenates them
+    and applies a 1x1 convolution with 1 output channel.
     """
     def __init__(self):
         super().__init__()
-        self.conv = nn.Sequential(
+        self.conv = torch.nn.Sequential(
             conv_with_kaiming_uniform(4*16,1,1,1,0)
-            ,nn.BatchNorm2d(1)
+            ,torch.nn.BatchNorm2d(1)
         )
     def forward(self,x1,x2,x3,x4):
         x_cat = torch.cat([x1,x2,x3,x4],dim=1)
         x = self.conv(x_cat)
-        return x 
-            
-class DRIU(nn.Module):
+        return x
+
+class DRIU(torch.nn.Module):
     """
     DRIU head module
-    Based on paper by `Maninis et al. (2016)`_ 
+
+    Based on paper by [MANINIS-2016]_.
+
     Parameters
     ----------
     in_channels_list : list
@@ -36,12 +38,12 @@ class DRIU(nn.Module):
         super(DRIU, self).__init__()
         in_conv_1_2_16, in_upsample2, in_upsample_4, in_upsample_8 = in_channels_list
 
-        self.conv1_2_16 = nn.Conv2d(in_conv_1_2_16, 16, 3, 1, 1)
+        self.conv1_2_16 = torch.nn.Conv2d(in_conv_1_2_16, 16, 3, 1, 1)
         # Upsample layers
         self.upsample2 = UpsampleCropBlock(in_upsample2, 16, 4, 2, 0)
         self.upsample4 = UpsampleCropBlock(in_upsample_4, 16, 8, 4, 0)
         self.upsample8 = UpsampleCropBlock(in_upsample_8, 16, 16, 8, 0)
-        
+
         # Concat and Fuse
         self.concatfuse = ConcatFuseBlock()
 
@@ -51,7 +53,7 @@ class DRIU(nn.Module):
         ----------
         x : list
             list of tensors as returned from the backbone network.
-            First element: height and width of input image. 
+            First element: height and width of input image.
             Remaining elements: feature maps for each feature level.
 
         Returns
@@ -59,7 +61,7 @@ class DRIU(nn.Module):
         :py:class:`torch.Tensor`
         """
         hw = x[0]
-        conv1_2_16 = self.conv1_2_16(x[1])  # conv1_2_16   
+        conv1_2_16 = self.conv1_2_16(x[1])  # conv1_2_16
         upsample2 = self.upsample2(x[2], hw) # side-multi2-up
         upsample4 = self.upsample4(x[3], hw) # side-multi3-up
         upsample8 = self.upsample8(x[4], hw) # side-multi4-up
@@ -67,16 +69,18 @@ class DRIU(nn.Module):
         return out
 
 def build_driu():
-    """ 
+    """
     Adds backbone and head together
 
     Returns
     -------
-    :py:class:torch.nn.Module
+
+    module : :py:class:`torch.nn.Module`
+
     """
     backbone = vgg16_bn(pretrained=False, return_features = [5, 12, 19, 29])
     driu_head = DRIU([64, 128, 256, 512])
 
-    model = nn.Sequential(OrderedDict([("backbone", backbone), ("head", driu_head)]))
+    model = torch.nn.Sequential(OrderedDict([("backbone", backbone), ("head", driu_head)]))
     model.name = "DRIUBN"
-    return model
\ No newline at end of file
+    return model
diff --git a/bob/ip/binseg/modeling/driuod.py b/bob/ip/binseg/modeling/driuod.py
index cfa11973..ab543e9e 100644
--- a/bob/ip/binseg/modeling/driuod.py
+++ b/bob/ip/binseg/modeling/driuod.py
@@ -2,29 +2,29 @@
 # -*- coding: utf-8 -*-
 
 import torch
-from torch import nn
+import torch.nn
 from collections import OrderedDict
 from bob.ip.binseg.modeling.backbones.vgg import vgg16
 from bob.ip.binseg.modeling.make_layers import conv_with_kaiming_uniform,convtrans_with_kaiming_uniform, UpsampleCropBlock
 
-class ConcatFuseBlock(nn.Module):
-    """ 
-    Takes in four feature maps with 16 channels each, concatenates them 
-    and applies a 1x1 convolution with 1 output channel. 
+class ConcatFuseBlock(torch.nn.Module):
+    """
+    Takes in four feature maps with 16 channels each, concatenates them
+    and applies a 1x1 convolution with 1 output channel.
     """
     def __init__(self):
         super().__init__()
         self.conv = conv_with_kaiming_uniform(4*16,1,1,1,0)
-    
+
     def forward(self,x1,x2,x3,x4):
         x_cat = torch.cat([x1,x2,x3,x4],dim=1)
         x = self.conv(x_cat)
-        return x 
-            
-class DRIUOD(nn.Module):
+        return x
+
+class DRIUOD(torch.nn.Module):
     """
     DRIU head module
-    
+
     Parameters
     ----------
     in_channels_list : list
@@ -40,7 +40,7 @@ class DRIUOD(nn.Module):
         self.upsample8 = UpsampleCropBlock(in_upsample_8, 16, 16, 8, 0)
         self.upsample16 = UpsampleCropBlock(in_upsample_16, 16, 32, 16, 0)
 
-        
+
         # Concat and Fuse
         self.concatfuse = ConcatFuseBlock()
 
@@ -50,7 +50,7 @@ class DRIUOD(nn.Module):
         ----------
         x : list
             list of tensors as returned from the backbone network.
-            First element: height and width of input image. 
+            First element: height and width of input image.
             Remaining elements: feature maps for each feature level.
 
         Returns
@@ -66,16 +66,17 @@ class DRIUOD(nn.Module):
         return out
 
 def build_driuod():
-    """ 
+    """
     Adds backbone and head together
 
     Returns
     -------
-    :py:class:torch.nn.Module
+    module : :py:class:`torch.nn.Module`
+
     """
     backbone = vgg16(pretrained=False, return_features = [8, 14, 22,29])
     driu_head = DRIUOD([128, 256, 512,512])
 
-    model = nn.Sequential(OrderedDict([("backbone", backbone), ("head", driu_head)]))
+    model = torch.nn.Sequential(OrderedDict([("backbone", backbone), ("head", driu_head)]))
     model.name = "DRIUOD"
-    return model
\ No newline at end of file
+    return model
diff --git a/bob/ip/binseg/modeling/driupix.py b/bob/ip/binseg/modeling/driupix.py
index 00e40932..e38768ea 100644
--- a/bob/ip/binseg/modeling/driupix.py
+++ b/bob/ip/binseg/modeling/driupix.py
@@ -2,29 +2,29 @@
 # -*- coding: utf-8 -*-
 
 import torch
-from torch import nn
+import torch.nn
 from collections import OrderedDict
 from bob.ip.binseg.modeling.backbones.vgg import vgg16
 from bob.ip.binseg.modeling.make_layers import conv_with_kaiming_uniform,convtrans_with_kaiming_uniform, UpsampleCropBlock
 
-class ConcatFuseBlock(nn.Module):
-    """ 
-    Takes in four feature maps with 16 channels each, concatenates them 
-    and applies a 1x1 convolution with 1 output channel. 
+class ConcatFuseBlock(torch.nn.Module):
+    """
+    Takes in four feature maps with 16 channels each, concatenates them
+    and applies a 1x1 convolution with 1 output channel.
     """
     def __init__(self):
         super().__init__()
         self.conv = conv_with_kaiming_uniform(4*16,1,1,1,0)
-    
+
     def forward(self,x1,x2,x3,x4):
         x_cat = torch.cat([x1,x2,x3,x4],dim=1)
         x = self.conv(x_cat)
-        return x 
-            
-class DRIUPIX(nn.Module):
+        return x
+
+class DRIUPIX(torch.nn.Module):
     """
     DRIUPIX head module. DRIU with pixelshuffle instead of ConvTrans2D
-    
+
     Parameters
     ----------
     in_channels_list : list
@@ -34,12 +34,12 @@ class DRIUPIX(nn.Module):
         super(DRIUPIX, self).__init__()
         in_conv_1_2_16, in_upsample2, in_upsample_4, in_upsample_8 = in_channels_list
 
-        self.conv1_2_16 = nn.Conv2d(in_conv_1_2_16, 16, 3, 1, 1)
+        self.conv1_2_16 = torch.nn.Conv2d(in_conv_1_2_16, 16, 3, 1, 1)
         # Upsample layers
         self.upsample2 = UpsampleCropBlock(in_upsample2, 16, 4, 2, 0, pixelshuffle=True)
         self.upsample4 = UpsampleCropBlock(in_upsample_4, 16, 8, 4, 0, pixelshuffle=True)
         self.upsample8 = UpsampleCropBlock(in_upsample_8, 16, 16, 8, 0, pixelshuffle=True)
-        
+
         # Concat and Fuse
         self.concatfuse = ConcatFuseBlock()
 
@@ -49,7 +49,7 @@ class DRIUPIX(nn.Module):
         ----------
         x : list
             list of tensors as returned from the backbone network.
-            First element: height and width of input image. 
+            First element: height and width of input image.
             Remaining elements: feature maps for each feature level.
 
         Returns
@@ -57,7 +57,7 @@ class DRIUPIX(nn.Module):
         :py:class:`torch.Tensor`
         """
         hw = x[0]
-        conv1_2_16 = self.conv1_2_16(x[1])  # conv1_2_16   
+        conv1_2_16 = self.conv1_2_16(x[1])  # conv1_2_16
         upsample2 = self.upsample2(x[2], hw) # side-multi2-up
         upsample4 = self.upsample4(x[3], hw) # side-multi3-up
         upsample8 = self.upsample8(x[4], hw) # side-multi4-up
@@ -65,16 +65,17 @@ class DRIUPIX(nn.Module):
         return out
 
 def build_driupix():
-    """ 
+    """
     Adds backbone and head together
 
     Returns
     -------
-    :py:class:torch.nn.Module
+    module : :py:class:`torch.nn.Module`
+
     """
     backbone = vgg16(pretrained=False, return_features = [3, 8, 14, 22])
     driu_head = DRIUPIX([64, 128, 256, 512])
 
-    model = nn.Sequential(OrderedDict([("backbone", backbone), ("head", driu_head)]))
+    model = torch.nn.Sequential(OrderedDict([("backbone", backbone), ("head", driu_head)]))
     model.name = "DRIUPIX"
-    return model
\ No newline at end of file
+    return model
diff --git a/bob/ip/binseg/modeling/hed.py b/bob/ip/binseg/modeling/hed.py
index fa44366e..9be7fc86 100644
--- a/bob/ip/binseg/modeling/hed.py
+++ b/bob/ip/binseg/modeling/hed.py
@@ -2,29 +2,29 @@
 # -*- coding: utf-8 -*-
 
 import torch
-from torch import nn
+import torch.nn
 from collections import OrderedDict
 from bob.ip.binseg.modeling.backbones.vgg import vgg16
 from bob.ip.binseg.modeling.make_layers import conv_with_kaiming_uniform, convtrans_with_kaiming_uniform, UpsampleCropBlock
 
-class ConcatFuseBlock(nn.Module):
-    """ 
-    Takes in five feature maps with one channel each, concatenates thems 
-    and applies a 1x1 convolution with 1 output channel. 
+class ConcatFuseBlock(torch.nn.Module):
+    """
+    Takes in five feature maps with one channel each, concatenates thems
+    and applies a 1x1 convolution with 1 output channel.
     """
     def __init__(self):
         super().__init__()
         self.conv = conv_with_kaiming_uniform(5,1,1,1,0)
-    
+
     def forward(self,x1,x2,x3,x4,x5):
         x_cat = torch.cat([x1,x2,x3,x4,x5],dim=1)
         x = self.conv(x_cat)
-        return x 
-            
-class HED(nn.Module):
+        return x
+
+class HED(torch.nn.Module):
     """
     HED head module
-    
+
     Parameters
     ----------
     in_channels_list : list
@@ -33,8 +33,8 @@ class HED(nn.Module):
     def __init__(self, in_channels_list=None):
         super(HED, self).__init__()
         in_conv_1_2_16, in_upsample2, in_upsample_4, in_upsample_8, in_upsample_16 = in_channels_list
-        
-        self.conv1_2_16 = nn.Conv2d(in_conv_1_2_16,1,3,1,1)
+
+        self.conv1_2_16 = torch.nn.Conv2d(in_conv_1_2_16,1,3,1,1)
         # Upsample
         self.upsample2 = UpsampleCropBlock(in_upsample2,1,4,2,0)
         self.upsample4 = UpsampleCropBlock(in_upsample_4,1,8,4,0)
@@ -49,35 +49,35 @@ class HED(nn.Module):
         ----------
         x : list
             list of tensors as returned from the backbone network.
-            First element: height and width of input image. 
+            First element: height and width of input image.
             Remaining elements: feature maps for each feature level.
-        
+
         Returns
         -------
-        :py:class:`torch.Tensor`
+        tensor : :py:class:`torch.Tensor`
         """
         hw = x[0]
-        conv1_2_16 = self.conv1_2_16(x[1])  
+        conv1_2_16 = self.conv1_2_16(x[1])
         upsample2 = self.upsample2(x[2],hw)
         upsample4 = self.upsample4(x[3],hw)
         upsample8 = self.upsample8(x[4],hw)
-        upsample16 = self.upsample16(x[5],hw) 
+        upsample16 = self.upsample16(x[5],hw)
         concatfuse = self.concatfuse(conv1_2_16,upsample2,upsample4,upsample8,upsample16)
-        
+
         out = [upsample2,upsample4,upsample8,upsample16,concatfuse]
         return out
 
 def build_hed():
-    """ 
+    """
     Adds backbone and head together
 
     Returns
     -------
-    :py:class:torch.nn.Module
+    module : :py:class:`torch.nn.Module`
     """
     backbone = vgg16(pretrained=False, return_features = [3, 8, 14, 22, 29])
     hed_head = HED([64, 128, 256, 512, 512])
 
-    model = nn.Sequential(OrderedDict([("backbone", backbone), ("head", hed_head)]))
+    model = torch.nn.Sequential(OrderedDict([("backbone", backbone), ("head", hed_head)]))
     model.name = "HED"
-    return model
\ No newline at end of file
+    return model
diff --git a/bob/ip/binseg/modeling/losses.py b/bob/ip/binseg/modeling/losses.py
index de85a581..93235d03 100644
--- a/bob/ip/binseg/modeling/losses.py
+++ b/bob/ip/binseg/modeling/losses.py
@@ -1,19 +1,42 @@
+"""Loss implementations"""
+
 import torch
 from torch.nn.modules.loss import _Loss
-from torch._jit_internal import weak_script_method
 
+# Conditionally decorates a method if a decorator exists in PyTorch
+# This overcomes an import error with versions of PyTorch >= 1.2, where the
+# decorator ``weak_script_method`` is not anymore available.  See:
+# https://github.com/pytorch/pytorch/commit/10c4b98ade8349d841518d22f19a653a939e260c#diff-ee07db084d958260fd24b4b02d4f078d
+# from July 4th, 2019.
+try:
+    from torch._jit_internal import weak_script_method
+except ImportError:
 
+    def weak_script_method(x):
+        return x
 
 
 class WeightedBCELogitsLoss(_Loss):
-    """ 
-    Implements Equation 1 in `Maninis et al. (2016)`_. Based on ``torch.nn.modules.loss.BCEWithLogitsLoss``. 
+    """
+    Implements Equation 1 in [MANINIS-2016]_. Based on
+    :py:class:`torch.nn.BCEWithLogitsLoss`.
+
     Calculate sum of weighted cross entropy loss.
     """
-    def __init__(self, weight=None, size_average=None, reduce=None, reduction='mean', pos_weight=None):
-        super(WeightedBCELogitsLoss, self).__init__(size_average, reduce, reduction)
-        self.register_buffer('weight', weight)
-        self.register_buffer('pos_weight', pos_weight)
+
+    def __init__(
+        self,
+        weight=None,
+        size_average=None,
+        reduce=None,
+        reduction="mean",
+        pos_weight=None,
+    ):
+        super(WeightedBCELogitsLoss, self).__init__(
+            size_average, reduce, reduction
+        )
+        self.register_buffer("weight", weight)
+        self.register_buffer("pos_weight", pos_weight)
 
     @weak_script_method
     def forward(self, input, target, masks=None):
@@ -23,37 +46,61 @@ class WeightedBCELogitsLoss(_Loss):
         input : :py:class:`torch.Tensor`
         target : :py:class:`torch.Tensor`
         masks : :py:class:`torch.Tensor`, optional
-        
+
         Returns
         -------
         :py:class:`torch.Tensor`
         """
         n, c, h, w = target.shape
-        num_pos = torch.sum(target, dim=[1, 2, 3]).float().reshape(n,1) # torch.Size([n, 1])
-        if hasattr(masks,'dtype'):
-            num_mask_neg = c * h * w - torch.sum(masks, dim=[1, 2, 3]).float().reshape(n,1) # torch.Size([n, 1])
-            num_neg =  c * h * w - num_pos - num_mask_neg
+        num_pos = (
+            torch.sum(target, dim=[1, 2, 3]).float().reshape(n, 1)
+        )  # torch.Size([n, 1])
+        if hasattr(masks, "dtype"):
+            num_mask_neg = c * h * w - torch.sum(
+                masks, dim=[1, 2, 3]
+            ).float().reshape(
+                n, 1
+            )  # torch.Size([n, 1])
+            num_neg = c * h * w - num_pos - num_mask_neg
         else:
-            num_neg = c * h * w - num_pos 
-        numposnumtotal = torch.ones_like(target) * (num_pos / (num_pos + num_neg)).unsqueeze(1).unsqueeze(2)
-        numnegnumtotal = torch.ones_like(target) * (num_neg / (num_pos + num_neg)).unsqueeze(1).unsqueeze(2)
-        weight = torch.where((target <= 0.5) , numposnumtotal, numnegnumtotal)
+            num_neg = c * h * w - num_pos
+        numposnumtotal = torch.ones_like(target) * (
+            num_pos / (num_pos + num_neg)
+        ).unsqueeze(1).unsqueeze(2)
+        numnegnumtotal = torch.ones_like(target) * (
+            num_neg / (num_pos + num_neg)
+        ).unsqueeze(1).unsqueeze(2)
+        weight = torch.where((target <= 0.5), numposnumtotal, numnegnumtotal)
+
+        loss = torch.nn.functional.binary_cross_entropy_with_logits(
+            input, target, weight=weight, reduction=self.reduction
+        )
+        return loss
 
-        loss = torch.nn.functional.binary_cross_entropy_with_logits(input, target, weight=weight, reduction=self.reduction)
-        return loss 
 
 class SoftJaccardBCELogitsLoss(_Loss):
-    """ 
-    Implements Equation 3 in `Iglovikov  et al. (2018)`_. Based on ``torch.nn.modules.loss.BCEWithLogitsLoss``. 
+    """
+    Implements Equation 3 in [IGLOVIKOV-2018]_.  Based on
+    ``torch.nn.BCEWithLogitsLoss``.
 
     Attributes
     ----------
     alpha : float
         determines the weighting of SoftJaccard and BCE. Default: ``0.7``
     """
-    def __init__(self, alpha=0.7, size_average=None, reduce=None, reduction='mean', pos_weight=None):
-        super(SoftJaccardBCELogitsLoss, self).__init__(size_average, reduce, reduction) 
-        self.alpha = alpha   
+
+    def __init__(
+        self,
+        alpha=0.7,
+        size_average=None,
+        reduce=None,
+        reduction="mean",
+        pos_weight=None,
+    ):
+        super(SoftJaccardBCELogitsLoss, self).__init__(
+            size_average, reduce, reduction
+        )
+        self.alpha = alpha
 
     @weak_script_method
     def forward(self, input, target, masks=None):
@@ -63,7 +110,7 @@ class SoftJaccardBCELogitsLoss(_Loss):
         input : :py:class:`torch.Tensor`
         target : :py:class:`torch.Tensor`
         masks : :py:class:`torch.Tensor`, optional
-        
+
         Returns
         -------
         :py:class:`torch.Tensor`
@@ -72,23 +119,37 @@ class SoftJaccardBCELogitsLoss(_Loss):
         probabilities = torch.sigmoid(input)
         intersection = (probabilities * target).sum()
         sums = probabilities.sum() + target.sum()
-        
-        softjaccard = intersection/(sums - intersection + eps)
 
-        bceloss = torch.nn.functional.binary_cross_entropy_with_logits(input, target, weight=None, reduction=self.reduction)
-        loss = self.alpha * bceloss + (1 - self.alpha) * (1-softjaccard)
+        softjaccard = intersection / (sums - intersection + eps)
+
+        bceloss = torch.nn.functional.binary_cross_entropy_with_logits(
+            input, target, weight=None, reduction=self.reduction
+        )
+        loss = self.alpha * bceloss + (1 - self.alpha) * (1 - softjaccard)
         return loss
 
 
 class HEDWeightedBCELogitsLoss(_Loss):
-    """ 
-    Implements Equation 2 in `He et al. (2015)`_. Based on ``torch.nn.modules.loss.BCEWithLogitsLoss``. 
+    """
+    Implements Equation 2 in [HE-2015]_. Based on
+    ``torch.nn.modules.loss.BCEWithLogitsLoss``.
+
     Calculate sum of weighted cross entropy loss.
     """
-    def __init__(self, weight=None, size_average=None, reduce=None, reduction='mean', pos_weight=None):
-        super(HEDWeightedBCELogitsLoss, self).__init__(size_average, reduce, reduction)
-        self.register_buffer('weight', weight)
-        self.register_buffer('pos_weight', pos_weight)
+
+    def __init__(
+        self,
+        weight=None,
+        size_average=None,
+        reduce=None,
+        reduction="mean",
+        pos_weight=None,
+    ):
+        super(HEDWeightedBCELogitsLoss, self).__init__(
+            size_average, reduce, reduction
+        )
+        self.register_buffer("weight", weight)
+        self.register_buffer("pos_weight", pos_weight)
 
     @weak_script_method
     def forward(self, inputlist, target, masks=None):
@@ -106,33 +167,59 @@ class HEDWeightedBCELogitsLoss(_Loss):
         loss_over_all_inputs = []
         for input in inputlist:
             n, c, h, w = target.shape
-            num_pos = torch.sum(target, dim=[1, 2, 3]).float().reshape(n,1) # torch.Size([n, 1])
-            if hasattr(masks,'dtype'):
-                num_mask_neg = c * h * w - torch.sum(masks, dim=[1, 2, 3]).float().reshape(n,1) # torch.Size([n, 1])
-                num_neg =  c * h * w - num_pos - num_mask_neg
-            else: 
+            num_pos = (
+                torch.sum(target, dim=[1, 2, 3]).float().reshape(n, 1)
+            )  # torch.Size([n, 1])
+            if hasattr(masks, "dtype"):
+                num_mask_neg = c * h * w - torch.sum(
+                    masks, dim=[1, 2, 3]
+                ).float().reshape(
+                    n, 1
+                )  # torch.Size([n, 1])
+                num_neg = c * h * w - num_pos - num_mask_neg
+            else:
                 num_neg = c * h * w - num_pos  # torch.Size([n, 1])
-            numposnumtotal = torch.ones_like(target) * (num_pos / (num_pos + num_neg)).unsqueeze(1).unsqueeze(2)
-            numnegnumtotal = torch.ones_like(target) * (num_neg / (num_pos + num_neg)).unsqueeze(1).unsqueeze(2)
-            weight = torch.where((target <= 0.5) , numposnumtotal, numnegnumtotal)
-            loss = torch.nn.functional.binary_cross_entropy_with_logits(input, target, weight=weight, reduction=self.reduction)
+            numposnumtotal = torch.ones_like(target) * (
+                num_pos / (num_pos + num_neg)
+            ).unsqueeze(1).unsqueeze(2)
+            numnegnumtotal = torch.ones_like(target) * (
+                num_neg / (num_pos + num_neg)
+            ).unsqueeze(1).unsqueeze(2)
+            weight = torch.where(
+                (target <= 0.5), numposnumtotal, numnegnumtotal
+            )
+            loss = torch.nn.functional.binary_cross_entropy_with_logits(
+                input, target, weight=weight, reduction=self.reduction
+            )
             loss_over_all_inputs.append(loss.unsqueeze(0))
         final_loss = torch.cat(loss_over_all_inputs).mean()
-        return final_loss 
+        return final_loss
 
 
 class HEDSoftJaccardBCELogitsLoss(_Loss):
-    """ 
-    Implements  Equation 3 in `Iglovikov  et al. (2018)`_ for the hed network. Based on ``torch.nn.modules.loss.BCEWithLogitsLoss``. 
+    """
+
+    Implements  Equation 3 in [IGLOVIKOV-2018]_ for the hed network. Based on
+    :py:class:`torch.nn.BCEWithLogitsLoss`.
 
     Attributes
     ----------
     alpha : float
         determines the weighting of SoftJaccard and BCE. Default: ``0.3``
     """
-    def __init__(self, alpha=0.3, size_average=None, reduce=None, reduction='mean', pos_weight=None):
-        super(HEDSoftJaccardBCELogitsLoss, self).__init__(size_average, reduce, reduction) 
-        self.alpha = alpha   
+
+    def __init__(
+        self,
+        alpha=0.3,
+        size_average=None,
+        reduce=None,
+        reduction="mean",
+        pos_weight=None,
+    ):
+        super(HEDSoftJaccardBCELogitsLoss, self).__init__(
+            size_average, reduce, reduction
+        )
+        self.alpha = alpha
 
     @weak_script_method
     def forward(self, inputlist, target, masks=None):
@@ -142,7 +229,7 @@ class HEDSoftJaccardBCELogitsLoss(_Loss):
         input : :py:class:`torch.Tensor`
         target : :py:class:`torch.Tensor`
         masks : :py:class:`torch.Tensor`, optional
-        
+
         Returns
         -------
         :py:class:`torch.Tensor`
@@ -153,48 +240,65 @@ class HEDSoftJaccardBCELogitsLoss(_Loss):
             probabilities = torch.sigmoid(input)
             intersection = (probabilities * target).sum()
             sums = probabilities.sum() + target.sum()
-            
-            softjaccard = intersection/(sums - intersection + eps)
-    
-            bceloss = torch.nn.functional.binary_cross_entropy_with_logits(input, target, weight=None, reduction=self.reduction)
-            loss = self.alpha * bceloss + (1 - self.alpha) * (1-softjaccard)
+
+            softjaccard = intersection / (sums - intersection + eps)
+
+            bceloss = torch.nn.functional.binary_cross_entropy_with_logits(
+                input, target, weight=None, reduction=self.reduction
+            )
+            loss = self.alpha * bceloss + (1 - self.alpha) * (1 - softjaccard)
             loss_over_all_inputs.append(loss.unsqueeze(0))
         final_loss = torch.cat(loss_over_all_inputs).mean()
         return loss
 
 
-
 class MixJacLoss(_Loss):
-    """ 
-    Attributes
+    """
+
+    Parameters
     ----------
+
     lambda_u : int
         determines the weighting of SoftJaccard and BCE.
+
     """
-    def __init__(self, lambda_u=100, jacalpha=0.7, size_average=None, reduce=None, reduction='mean', pos_weight=None):
+
+    def __init__(
+        self,
+        lambda_u=100,
+        jacalpha=0.7,
+        size_average=None,
+        reduce=None,
+        reduction="mean",
+        pos_weight=None,
+    ):
         super(MixJacLoss, self).__init__(size_average, reduce, reduction)
         self.lambda_u = lambda_u
         self.labeled_loss = SoftJaccardBCELogitsLoss(alpha=jacalpha)
         self.unlabeled_loss = torch.nn.BCEWithLogitsLoss()
 
-
     @weak_script_method
-    def forward(self, input, target, unlabeled_input, unlabeled_traget, ramp_up_factor):
+    def forward(
+        self, input, target, unlabeled_input, unlabeled_traget, ramp_up_factor
+    ):
         """
         Parameters
         ----------
+
         input : :py:class:`torch.Tensor`
         target : :py:class:`torch.Tensor`
         unlabeled_input : :py:class:`torch.Tensor`
         unlabeled_traget : :py:class:`torch.Tensor`
         ramp_up_factor : float
-        
+
         Returns
         -------
+
         list
+
         """
-        ll = self.labeled_loss(input,target)
+        ll = self.labeled_loss(input, target)
         ul = self.unlabeled_loss(unlabeled_input, unlabeled_traget)
-        
+
         loss = ll + self.lambda_u * ramp_up_factor * ul
-        return loss, ll, ul
\ No newline at end of file
+        return loss, ll, ul
diff --git a/bob/ip/binseg/modeling/m2u.py b/bob/ip/binseg/modeling/m2u.py
index 7db86168..fa34c579 100644
--- a/bob/ip/binseg/modeling/m2u.py
+++ b/bob/ip/binseg/modeling/m2u.py
@@ -5,16 +5,16 @@
 
 from collections import OrderedDict
 import torch
-from torch import nn
+import torch.nn
 from bob.ip.binseg.modeling.backbones.mobilenetv2 import MobileNetV2, InvertedResidual
 
-class DecoderBlock(nn.Module):
+class DecoderBlock(torch.nn.Module):
     """
     Decoder block: upsample and concatenate with features maps from the encoder part
     """
     def __init__(self,up_in_c,x_in_c,upsamplemode='bilinear',expand_ratio=0.15):
         super().__init__()
-        self.upsample = nn.Upsample(scale_factor=2,mode=upsamplemode,align_corners=False) # H, W -> 2H, 2W
+        self.upsample = torch.nn.Upsample(scale_factor=2,mode=upsamplemode,align_corners=False) # H, W -> 2H, 2W
         self.ir1 = InvertedResidual(up_in_c+x_in_c,(x_in_c + up_in_c) // 2,stride=1,expand_ratio=expand_ratio)
 
     def forward(self,up_in,x_in):
@@ -22,11 +22,11 @@ class DecoderBlock(nn.Module):
         cat_x = torch.cat([up_out, x_in] , dim=1)
         x = self.ir1(cat_x)
         return x
-    
-class LastDecoderBlock(nn.Module):
+
+class LastDecoderBlock(torch.nn.Module):
     def __init__(self,x_in_c,upsamplemode='bilinear',expand_ratio=0.15):
         super().__init__()
-        self.upsample = nn.Upsample(scale_factor=2,mode=upsamplemode,align_corners=False) # H, W -> 2H, 2W
+        self.upsample = torch.nn.Upsample(scale_factor=2,mode=upsamplemode,align_corners=False) # H, W -> 2H, 2W
         self.ir1 = InvertedResidual(x_in_c,1,stride=1,expand_ratio=expand_ratio)
 
     def forward(self,up_in,x_in):
@@ -37,10 +37,10 @@ class LastDecoderBlock(nn.Module):
 
 
 
-class M2U(nn.Module):
+class M2U(torch.nn.Module):
     """
     M2U-Net head module
-    
+
     Parameters
     ----------
     in_channels_list : list
@@ -54,50 +54,50 @@ class M2U(nn.Module):
         self.decode3 = DecoderBlock(64,24,upsamplemode,expand_ratio)
         self.decode2 = DecoderBlock(44,16,upsamplemode,expand_ratio)
         self.decode1 = LastDecoderBlock(33,upsamplemode,expand_ratio)
-        
-        # initilaize weights 
+
+        # initilaize weights
         self._initialize_weights()
 
     def _initialize_weights(self):
         for m in self.modules():
-            if isinstance(m, nn.Conv2d):
-                nn.init.kaiming_uniform_(m.weight, a=1)
+            if isinstance(m, torch.nn.Conv2d):
+                torch.nn.init.kaiming_uniform_(m.weight, a=1)
                 if m.bias is not None:
-                    nn.init.constant_(m.bias, 0)
-            elif isinstance(m, nn.BatchNorm2d):
+                    torch.nn.init.constant_(m.bias, 0)
+            elif isinstance(m, torch.nn.BatchNorm2d):
                 m.weight.data.fill_(1)
                 m.bias.data.zero_()
-    
+
     def forward(self,x):
         """
         Parameters
         ----------
         x : list
             list of tensors as returned from the backbone network.
-            First element: height and width of input image. 
+            First element: height and width of input image.
             Remaining elements: feature maps for each feature level.
         Returns
         -------
-        :py:class:`torch.Tensor`
+        tensor : :py:class:`torch.Tensor`
         """
         decode4 = self.decode4(x[5],x[4])    # 96, 32
         decode3 = self.decode3(decode4,x[3]) # 64, 24
         decode2 = self.decode2(decode3,x[2]) # 44, 16
         decode1 = self.decode1(decode2,x[1]) # 30, 3
-        
+
         return decode1
 
 def build_m2unet():
-    """ 
+    """
     Adds backbone and head together
 
     Returns
     -------
-    :py:class:torch.nn.Module
+    module : :py:class:`torch.nn.Module`
     """
     backbone = MobileNetV2(return_features = [1, 3, 6, 13], m2u=True)
     m2u_head = M2U(in_channels_list=[16, 24, 32, 96])
 
-    model = nn.Sequential(OrderedDict([("backbone", backbone), ("head", m2u_head)]))
+    model = torch.nn.Sequential(OrderedDict([("backbone", backbone), ("head", m2u_head)]))
     model.name = "M2UNet"
-    return model
\ No newline at end of file
+    return model
diff --git a/bob/ip/binseg/modeling/make_layers.py b/bob/ip/binseg/modeling/make_layers.py
index 7e398443..88103048 100644
--- a/bob/ip/binseg/modeling/make_layers.py
+++ b/bob/ip/binseg/modeling/make_layers.py
@@ -2,76 +2,94 @@
 # -*- coding: utf-8 -*-
 
 import torch
-import torch.nn as nn
+import torch.nn
 from torch.nn import Conv2d
 from torch.nn import ConvTranspose2d
 
 def conv_with_kaiming_uniform(in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1):
     conv = Conv2d(
-        in_channels, 
-        out_channels, 
-        kernel_size=kernel_size, 
-        stride=stride, 
-        padding=padding, 
-        dilation=dilation, 
+        in_channels,
+        out_channels,
+        kernel_size=kernel_size,
+        stride=stride,
+        padding=padding,
+        dilation=dilation,
         bias= True
         )
         # Caffe2 implementation uses XavierFill, which in fact
         # corresponds to kaiming_uniform_ in PyTorch
-    nn.init.kaiming_uniform_(conv.weight, a=1)
-    nn.init.constant_(conv.bias, 0)
+    torch.nn.init.kaiming_uniform_(conv.weight, a=1)
+    torch.nn.init.constant_(conv.bias, 0)
     return conv
 
 
 def convtrans_with_kaiming_uniform(in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1):
     conv = ConvTranspose2d(
-        in_channels, 
-        out_channels, 
-        kernel_size=kernel_size, 
-        stride=stride, 
-        padding=padding, 
-        dilation=dilation, 
+        in_channels,
+        out_channels,
+        kernel_size=kernel_size,
+        stride=stride,
+        padding=padding,
+        dilation=dilation,
         bias= True
         )
         # Caffe2 implementation uses XavierFill, which in fact
         # corresponds to kaiming_uniform_ in PyTorch
-    nn.init.kaiming_uniform_(conv.weight, a=1)
-    nn.init.constant_(conv.bias, 0)
+    torch.nn.init.kaiming_uniform_(conv.weight, a=1)
+    torch.nn.init.constant_(conv.bias, 0)
     return conv
 
 
-class UpsampleCropBlock(nn.Module):
+class UpsampleCropBlock(torch.nn.Module):
+    """
+    Combines Conv2d, ConvTransposed2d and Cropping. Simulates the caffe2 crop
+    layer in the forward function.
+
+    Used for DRIU and HED.
+
+    Parameters
+    ----------
+
+    in_channels : int
+        number of channels of intermediate layer
+    out_channels : int
+        number of output channels
+    up_kernel_size : int
+        kernel size for transposed convolution
+    up_stride : int
+        stride for transposed convolution
+    up_padding : int
+        padding for transposed convolution
+
+    """
+
     def __init__(self, in_channels, out_channels, up_kernel_size, up_stride, up_padding, pixelshuffle=False):
-        """
-        Combines Conv2d, ConvTransposed2d and Cropping. Simulates the caffe2 crop layer in the forward function.
-        Used for DRIU and HED. 
-        
-        Attributes
-        ----------
-            in_channels : number of channels of intermediate layer
-            out_channels : number of output channels
-            up_kernel_size : kernel size for transposed convolution
-            up_stride : stride for transposed convolution
-            up_padding : padding for transposed convolution
-        """
         super().__init__()
-        # NOTE: Kaiming init, replace with nn.Conv2d and nn.ConvTranspose2d to get original DRIU impl.
+        # NOTE: Kaiming init, replace with torch.nn.Conv2d and torch.nn.ConvTranspose2d to get original DRIU impl.
         self.conv = conv_with_kaiming_uniform(in_channels, out_channels, 3, 1, 1)
         if pixelshuffle:
             self.upconv = PixelShuffle_ICNR( out_channels, out_channels, scale = up_stride)
         else:
-            self.upconv = convtrans_with_kaiming_uniform(out_channels, out_channels, up_kernel_size, up_stride, up_padding)        
-        
-        
+            self.upconv = convtrans_with_kaiming_uniform(out_channels, out_channels, up_kernel_size, up_stride, up_padding)
+
+
     def forward(self, x, input_res):
-        """
-        Forward pass of UpsampleBlock. Upsampled feature maps are cropped to the resolution of the input image.
-        Attributes
+        """Forward pass of UpsampleBlock.
+
+        Upsampled feature maps are cropped to the resolution of the input
+        image.
+
+        Parameters
         ----------
-        x : input channels
-        input_res : tuple (h,w)    
-            Resolution of the input image
+
+        x : tuple
+            input channels
+
+        input_res : tuple
+            Resolution of the input image format ``(height, width)``
+
         """
+
         img_h = input_res[0]
         img_w = input_res[1]
         x = self.conv(x)
@@ -87,23 +105,24 @@ class UpsampleCropBlock(nn.Module):
         w_crop = up_w-img_w
         w_s = w_crop//2
         w_e = up_w - (w_crop - w_s)
-        # perform crop 
-        # needs explicit ranges for onnx export 
-        x = x[:,:,h_s:h_e,w_s:w_e] # crop to input size 
-        
+        # perform crop
+        # needs explicit ranges for onnx export
+        x = x[:,:,h_s:h_e,w_s:w_e] # crop to input size
+
         return x
 
 
 
 def ifnone(a, b):
-    "`a` if `a` is not None, otherwise `b`."
+    "``a`` if ``a`` is not None, otherwise ``b``."
     return b if a is None else a
 
-def icnr(x, scale=2, init=nn.init.kaiming_normal_):
-    """
-    https://docs.fast.ai/layers.html#PixelShuffle_ICNR
-    ICNR init of `x`, with `scale` and `init` function.
+def icnr(x, scale=2, init=torch.nn.init.kaiming_normal_):
+    """https://docs.fast.ai/layers.html#PixelShuffle_ICNR
+
+    ICNR init of ``x``, with ``scale`` and ``init`` function.
     """
+
     ni,nf,h,w = x.shape
     ni2 = int(ni/(scale**2))
     k = init(torch.zeros([ni2,nf,h,w])).transpose(0, 1)
@@ -112,30 +131,32 @@ def icnr(x, scale=2, init=nn.init.kaiming_normal_):
     k = k.contiguous().view([nf,ni,h,w]).transpose(0, 1)
     x.data.copy_(k)
 
-class PixelShuffle_ICNR(nn.Module):
-    """
-    https://docs.fast.ai/layers.html#PixelShuffle_ICNR 
-    Upsample by `scale` from `ni` filters to `nf` (default `ni`), using `nn.PixelShuffle`, `icnr` init, and `weight_norm`.
+class PixelShuffle_ICNR(torch.nn.Module):
+    """https://docs.fast.ai/layers.html#PixelShuffle_ICNR
+
+    Upsample by ``scale`` from ``ni`` filters to ``nf`` (default ``ni``), using
+    ``torch.nn.PixelShuffle``, ``icnr`` init, and ``weight_norm``.
     """
+
     def __init__(self, ni:int, nf:int=None, scale:int=2):
         super().__init__()
         nf = ifnone(nf, ni)
         self.conv = conv_with_kaiming_uniform(ni, nf*(scale**2), 1)
         icnr(self.conv.weight)
-        self.shuf = nn.PixelShuffle(scale)
+        self.shuf = torch.nn.PixelShuffle(scale)
         # Blurring over (h*w) kernel
         # "Super-Resolution using Convolutional Neural Networks without Any Checkerboard Artifacts"
         # - https://arxiv.org/abs/1806.02658
-        self.pad = nn.ReplicationPad2d((1,0,1,0))
-        self.blur = nn.AvgPool2d(2, stride=1)
-        self.relu = nn.ReLU(inplace=True)
+        self.pad = torch.nn.ReplicationPad2d((1,0,1,0))
+        self.blur = torch.nn.AvgPool2d(2, stride=1)
+        self.relu = torch.nn.ReLU(inplace=True)
 
     def forward(self,x):
         x = self.shuf(self.relu(self.conv(x)))
         x = self.blur(self.pad(x))
         return x
 
-class UnetBlock(nn.Module):
+class UnetBlock(torch.nn.Module):
     def __init__(self, up_in_c, x_in_c, pixel_shuffle=False, middle_block=False):
         super().__init__()
 
@@ -146,18 +167,18 @@ class UnetBlock(nn.Module):
             up_out_c =  up_in_c // 2
         cat_channels = x_in_c + up_out_c
         inner_channels = cat_channels // 2
-        
+
         if pixel_shuffle:
             self.upsample = PixelShuffle_ICNR( up_in_c, up_out_c )
         else:
             self.upsample = convtrans_with_kaiming_uniform( up_in_c, up_out_c, 2, 2)
         self.convtrans1 = convtrans_with_kaiming_uniform( cat_channels, inner_channels, 3, 1, 1)
         self.convtrans2 = convtrans_with_kaiming_uniform( inner_channels, inner_channels, 3, 1, 1)
-        self.relu = nn.ReLU(inplace=True)
+        self.relu = torch.nn.ReLU(inplace=True)
 
     def forward(self, up_in, x_in):
         up_out = self.upsample(up_in)
         cat_x = torch.cat([up_out, x_in] , dim=1)
         x = self.relu(self.convtrans1(cat_x))
         x = self.relu(self.convtrans2(x))
-        return x
\ No newline at end of file
+        return x
diff --git a/bob/ip/binseg/modeling/resunet.py b/bob/ip/binseg/modeling/resunet.py
index 38f66cdd..5256bd72 100644
--- a/bob/ip/binseg/modeling/resunet.py
+++ b/bob/ip/binseg/modeling/resunet.py
@@ -12,7 +12,7 @@ from bob.ip.binseg.modeling.backbones.resnet import resnet50
 class ResUNet(nn.Module):
     """
     UNet head module for ResNet backbones
-    
+
     Parameters
     ----------
     in_channels_list : list
@@ -42,28 +42,28 @@ class ResUNet(nn.Module):
         ----------
         x : list
                 list of tensors as returned from the backbone network.
-                First element: height and width of input image. 
+                First element: height and width of input image.
                 Remaining elements: feature maps for each feature level.
         """
         # NOTE: x[0]: height and width of input image not needed in U-Net architecture
-        decode4 = self.decode4(x[5], x[4])  
-        decode3 = self.decode3(decode4, x[3]) 
-        decode2 = self.decode2(decode3, x[2]) 
-        decode1 = self.decode1(decode2, x[1]) 
+        decode4 = self.decode4(x[5], x[4])
+        decode3 = self.decode3(decode4, x[3])
+        decode2 = self.decode2(decode3, x[2])
+        decode1 = self.decode1(decode2, x[1])
         decode0 = self.decode0(decode1)
         out = self.final(decode0)
         return out
 
 def build_res50unet():
-    """ 
+    """
     Adds backbone and head together
 
     Returns
     -------
-    model : :py:class:torch.nn.Module
+    model : :py:class:`torch.nn.Module`
     """
     backbone = resnet50(pretrained=False, return_features = [2, 4, 5, 6, 7])
     unet_head  = ResUNet([64, 256, 512, 1024, 2048],pixel_shuffle=False)
     model = nn.Sequential(OrderedDict([("backbone", backbone), ("head", unet_head)]))
     model.name = "ResUNet"
-    return model
\ No newline at end of file
+    return model
diff --git a/bob/ip/binseg/modeling/unet.py b/bob/ip/binseg/modeling/unet.py
index d1102592..b89ec5f1 100644
--- a/bob/ip/binseg/modeling/unet.py
+++ b/bob/ip/binseg/modeling/unet.py
@@ -12,7 +12,7 @@ from bob.ip.binseg.modeling.backbones.vgg import vgg16
 class UNet(nn.Module):
     """
     UNet head module
-    
+
     Parameters
     ----------
     in_channels_list : list
@@ -22,7 +22,7 @@ class UNet(nn.Module):
         super(UNet, self).__init__()
         # number of channels
         c_decode1, c_decode2, c_decode3, c_decode4, c_decode5 = in_channels_list
-        
+
         # build layers
         self.decode4 = UnetBlock(c_decode5, c_decode4, pixel_shuffle, middle_block=True)
         self.decode3 = UnetBlock(c_decode4, c_decode3, pixel_shuffle)
@@ -36,28 +36,29 @@ class UNet(nn.Module):
         ----------
         x : list
             list of tensors as returned from the backbone network.
-            First element: height and width of input image. 
+            First element: height and width of input image.
             Remaining elements: feature maps for each feature level.
         """
         # NOTE: x[0]: height and width of input image not needed in U-Net architecture
-        decode4 = self.decode4(x[5], x[4])  
-        decode3 = self.decode3(decode4, x[3]) 
-        decode2 = self.decode2(decode3, x[2]) 
-        decode1 = self.decode1(decode2, x[1]) 
+        decode4 = self.decode4(x[5], x[4])
+        decode3 = self.decode3(decode4, x[3])
+        decode2 = self.decode2(decode3, x[2])
+        decode1 = self.decode1(decode2, x[1])
         out = self.final(decode1)
         return out
 
 def build_unet():
-    """ 
+    """
     Adds backbone and head together
 
     Returns
     -------
-    model : :py:class:torch.nn.Module
+    module : :py:class:`torch.nn.Module`
     """
+
     backbone = vgg16(pretrained=False, return_features = [3, 8, 14, 22, 29])
     unet_head = UNet([64, 128, 256, 512, 512], pixel_shuffle=False)
 
     model = nn.Sequential(OrderedDict([("backbone", backbone), ("head", unet_head)]))
     model.name = "UNet"
-    return model
\ No newline at end of file
+    return model
diff --git a/bob/ip/binseg/test/test_checkpointer.py b/bob/ip/binseg/test/test_checkpointer.py
index 45a181b3..976e9d94 100644
--- a/bob/ip/binseg/test/test_checkpointer.py
+++ b/bob/ip/binseg/test/test_checkpointer.py
@@ -1,8 +1,6 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 
-# https://github.com/facebookresearch/maskrcnn-benchmark/blob/master/maskrcnn_benchmark/engine/trainer.py 
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 from collections import OrderedDict
 from tempfile import TemporaryDirectory
 import unittest
@@ -90,4 +88,4 @@ class TestCheckpointer(unittest.TestCase):
 
 
 if __name__ == "__main__":
-    unittest.main()
\ No newline at end of file
+    unittest.main()
diff --git a/bob/ip/binseg/utils/checkpointer.py b/bob/ip/binseg/utils/checkpointer.py
index f3899e1d..1a79d908 100644
--- a/bob/ip/binseg/utils/checkpointer.py
+++ b/bob/ip/binseg/utils/checkpointer.py
@@ -1,7 +1,6 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 
-# Adapted from https://github.com/facebookresearch/maskrcnn-benchmark/blob/master/maskrcnn_benchmark/engine/trainer.py 
 # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 
 import logging
@@ -10,14 +9,12 @@ import os
 from bob.ip.binseg.utils.model_serialization import load_state_dict
 from bob.ip.binseg.utils.model_zoo import cache_url
 
+
 class Checkpointer:
-    """Adapted from `maskrcnn-benchmark`_ under MIT license
-    
-    Returns
-    -------
-    [type]
-        [description]
+    """Adapted from `maskrcnn-benchmark
+    <https://github.com/facebookresearch/maskrcnn-benchmark>`_ under MIT license
     """
+
     def __init__(
         self,
         model,
@@ -62,7 +59,9 @@ class Checkpointer:
             f = self.get_checkpoint_file()
         if not f:
             # no checkpoint could be found
-            self.logger.warn("No checkpoint found. Initializing model from scratch")
+            self.logger.warn(
+                "No checkpoint found. Initializing model from scratch"
+            )
             return {}
         self.logger.info("Loading checkpoint from {}".format(f))
         checkpoint = self._load_file(f)
@@ -130,4 +129,4 @@ class DetectronCheckpointer(Checkpointer):
         loaded = super(DetectronCheckpointer, self)._load_file(f)
         if "model" not in loaded:
             loaded = dict(model=loaded)
-        return loaded
\ No newline at end of file
+        return loaded
diff --git a/bob/ip/binseg/utils/click.py b/bob/ip/binseg/utils/click.py
index 8b8294d9..03fd5d30 100644
--- a/bob/ip/binseg/utils/click.py
+++ b/bob/ip/binseg/utils/click.py
@@ -1,14 +1,13 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 
-
-
 import click
 
 class OptionEatAll(click.Option):
     """
-    Allows for *args and **kwargs to be passed to click 
-    https://stackoverflow.com/questions/48391777/nargs-equivalent-for-options-in-click 
+    Allows for ``*args`` and ``**kwargs`` to be passed to click
+
+    https://stackoverflow.com/questions/48391777/nargs-equivalent-for-options-in-click
     """
 
     def __init__(self, *args, **kwargs):
@@ -50,4 +49,4 @@ class OptionEatAll(click.Option):
                 self._previous_parser_process = our_parser.process
                 our_parser.process = parser_process
                 break
-        return retval
\ No newline at end of file
+        return retval
diff --git a/bob/ip/binseg/utils/model_zoo.py b/bob/ip/binseg/utils/model_zoo.py
index 8bc7c931..00c7f7c5 100644
--- a/bob/ip/binseg/utils/model_zoo.py
+++ b/bob/ip/binseg/utils/model_zoo.py
@@ -17,7 +17,7 @@ import warnings
 import zipfile
 from urllib.request import urlopen
 from urllib.parse import urlparse
-from tqdm import tqdm 
+from tqdm import tqdm
 
 modelurls = {
     "vgg11": "https://download.pytorch.org/models/vgg11-bbd30ac9.pth",
@@ -99,7 +99,7 @@ def cache_url(url, model_dir=None, progress=True):
         os.makedirs(model_dir)
     parts = urlparse(url)
     filename = os.path.basename(parts.path)
-    
+
     cached_file = os.path.join(model_dir, filename)
     if not os.path.exists(cached_file):
         sys.stderr.write('Downloading: "{}" to {}\n'.format(url, cached_file))
@@ -107,5 +107,5 @@ def cache_url(url, model_dir=None, progress=True):
         if hash_prefix is not None:
             hash_prefix = hash_prefix.group(1)
         _download_url_to_file(url, cached_file, hash_prefix, progress=progress)
-    
-    return cached_file
\ No newline at end of file
+
+    return cached_file
diff --git a/doc/acknowledgements.rst b/doc/acknowledgements.rst
index 7dde8b5d..d273b8bf 100644
--- a/doc/acknowledgements.rst
+++ b/doc/acknowledgements.rst
@@ -26,7 +26,7 @@ This packages utilizes code from the following packages:
      month = {May},
      year = {2019},
      address = {New Orleans, Louisiana}
-    }   
+    }
 
 * The MobileNetV2 backbone is based on an implementation by::
 
diff --git a/doc/api.rst b/doc/api.rst
index 2f732d29..5142bd59 100644
--- a/doc/api.rst
+++ b/doc/api.rst
@@ -1,67 +1,211 @@
 .. -*- coding: utf-8 -*-
-.. _bob.ip.binseg.api:
 
-============
- Python API
-============
+=====
+ API
+=====
 
-This section lists all the functionality available in this library allowing to
-run binary-segmentation benchmarks.
+.. To update these lists, run the following command on the root of the package:
+.. find bob -name '*.py' | sed -e 's#/#.#g;s#.py$##g;s#.__init__##g' | sort
+.. You may apply further filtering to update only one of the subsections below
 
+.. autosummary::
+   :toctree: api/base
 
-PyTorch bob.db Dataset
-======================
+   bob.ip.binseg
 
-.. automodule:: bob.ip.binseg.data.binsegdataset
 
+Data Manipulation
+-----------------
 
-PyTorch ImageFolder Dataset
-===========================
+.. autosummary::
+   :toctree: api/data
 
-.. automodule:: bob.ip.binseg.data.imagefolder
+   bob.ip.binseg.data
+   bob.ip.binseg.data.binsegdataset
+   bob.ip.binseg.data.imagefolder
+   bob.ip.binseg.data.imagefolderinference
+   bob.ip.binseg.data.transforms
 
-.. automodule:: bob.ip.binseg.data.imagefolderinference
 
+Engines
+-------
 
-Transforms
-==========
+.. autosummary::
+   :toctree: api/engine
 
-.. note::
+   bob.ip.binseg.engine
+   bob.ip.binseg.engine.adabound
+   bob.ip.binseg.engine.inferencer
+   bob.ip.binseg.engine.predicter
+   bob.ip.binseg.engine.ssltrainer
+   bob.ip.binseg.engine.trainer
 
-   All transforms work with :py:class:`PIL.Image.Image` objects. We make heavy
-   use of the `torchvision package`_.
 
-.. automodule:: bob.ip.binseg.data.transforms
+Neural Network Models
+---------------------
 
+.. autosummary::
+   :toctree: api/modeling
 
-Losses
-======
+   bob.ip.binseg.modeling
+   bob.ip.binseg.modeling.backbones
+   bob.ip.binseg.modeling.backbones.mobilenetv2
+   bob.ip.binseg.modeling.backbones.resnet
+   bob.ip.binseg.modeling.backbones.vgg
+   bob.ip.binseg.modeling.driu
+   bob.ip.binseg.modeling.driubn
+   bob.ip.binseg.modeling.driuod
+   bob.ip.binseg.modeling.driupix
+   bob.ip.binseg.modeling.hed
+   bob.ip.binseg.modeling.losses
+   bob.ip.binseg.modeling.m2u
+   bob.ip.binseg.modeling.make_layers
+   bob.ip.binseg.modeling.resunet
+   bob.ip.binseg.modeling.unet
 
-.. automodule:: bob.ip.binseg.modeling.losses
 
+Toolbox
+-------
 
-Training
-========
+.. autosummary::
+   :toctree: api/utils
 
-.. automodule:: bob.ip.binseg.engine.trainer
+   bob.ip.binseg.utils
+   bob.ip.binseg.utils.checkpointer
+   bob.ip.binseg.utils.click
+   bob.ip.binseg.utils.evaluate
+   bob.ip.binseg.utils.metric
+   bob.ip.binseg.utils.model_serialization
+   bob.ip.binseg.utils.model_zoo
+   bob.ip.binseg.utils.plot
+   bob.ip.binseg.utils.rsttable
+   bob.ip.binseg.utils.summary
+   bob.ip.binseg.utils.transformfolder
 
 
-Checkpointer
-============
+Scripts
+-------
 
-.. automodule:: bob.ip.binseg.utils.checkpointer
+.. autosummary::
+   :toctree: api/scripts
 
+   bob.ip.binseg.script
+   bob.ip.binseg.script.binseg
 
-Inference and Evaluation
-========================
 
-.. automodule:: bob.ip.binseg.engine.inferencer
+Preset Configurations
+---------------------
 
+.. autosummary::
+   :toctree: api/configs
 
-Plotting
-========
+   bob.ip.binseg.configs
 
-.. automodule:: bob.ip.binseg.utils.plot
 
+Models
+======
+
+.. autosummary::
+   :toctree: api/configs/models
+
+   bob.ip.binseg.configs.models
+   bob.ip.binseg.configs.models.driu
+   bob.ip.binseg.configs.models.driubn
+   bob.ip.binseg.configs.models.driubnssl
+   bob.ip.binseg.configs.models.driuod
+   bob.ip.binseg.configs.models.driussl
+   bob.ip.binseg.configs.models.hed
+   bob.ip.binseg.configs.models.m2unet
+   bob.ip.binseg.configs.models.m2unetssl
+   bob.ip.binseg.configs.models.resunet
+   bob.ip.binseg.configs.models.unet
+
+
+Datasets
+========
 
-.. include:: links.rst
+.. autosummary::
+   :toctree: api/configs/datasets
+
+   bob.ip.binseg.configs.datasets
+   bob.ip.binseg.configs.datasets.amdrive
+   bob.ip.binseg.configs.datasets.amdrivetest
+   bob.ip.binseg.configs.datasets.chasedb1
+   bob.ip.binseg.configs.datasets.chasedb11024
+   bob.ip.binseg.configs.datasets.chasedb11168
+   bob.ip.binseg.configs.datasets.chasedb1544
+   bob.ip.binseg.configs.datasets.chasedb1608
+   bob.ip.binseg.configs.datasets.chasedb1test
+   bob.ip.binseg.configs.datasets.drionsdb
+   bob.ip.binseg.configs.datasets.drionsdbtest
+   bob.ip.binseg.configs.datasets.dristhigs1cup
+   bob.ip.binseg.configs.datasets.dristhigs1cuptest
+   bob.ip.binseg.configs.datasets.dristhigs1od
+   bob.ip.binseg.configs.datasets.dristhigs1odtest
+   bob.ip.binseg.configs.datasets.drive
+   bob.ip.binseg.configs.datasets.drive1024
+   bob.ip.binseg.configs.datasets.drive1024test
+   bob.ip.binseg.configs.datasets.drive1168
+   bob.ip.binseg.configs.datasets.drive608
+   bob.ip.binseg.configs.datasets.drive960
+   bob.ip.binseg.configs.datasets.drivechasedb1iostarhrf608
+   bob.ip.binseg.configs.datasets.drivechasedb1iostarhrf608sslstare
+   bob.ip.binseg.configs.datasets.drivestarechasedb11168
+   bob.ip.binseg.configs.datasets.drivestarechasedb1hrf1024
+   bob.ip.binseg.configs.datasets.drivestarechasedb1hrf1024ssliostar
+   bob.ip.binseg.configs.datasets.drivestarechasedb1iostar1168
+   bob.ip.binseg.configs.datasets.drivestarechasedb1iostar1168sslhrf
+   bob.ip.binseg.configs.datasets.drivestareiostarhrf960
+   bob.ip.binseg.configs.datasets.drivestareiostarhrf960sslchase
+   bob.ip.binseg.configs.datasets.drivetest
+   bob.ip.binseg.configs.datasets.hrf
+   bob.ip.binseg.configs.datasets.hrf1024
+   bob.ip.binseg.configs.datasets.hrf1168
+   bob.ip.binseg.configs.datasets.hrf1168test
+   bob.ip.binseg.configs.datasets.hrf544
+   bob.ip.binseg.configs.datasets.hrf544test
+   bob.ip.binseg.configs.datasets.hrf608
+   bob.ip.binseg.configs.datasets.hrf960
+   bob.ip.binseg.configs.datasets.hrftest
+   bob.ip.binseg.configs.datasets.imagefolder
+   bob.ip.binseg.configs.datasets.imagefolderinference
+   bob.ip.binseg.configs.datasets.imagefoldertest
+   bob.ip.binseg.configs.datasets.iostarod
+   bob.ip.binseg.configs.datasets.iostarodtest
+   bob.ip.binseg.configs.datasets.iostarvessel
+   bob.ip.binseg.configs.datasets.iostarvessel1168
+   bob.ip.binseg.configs.datasets.iostarvessel544
+   bob.ip.binseg.configs.datasets.iostarvessel544test
+   bob.ip.binseg.configs.datasets.iostarvessel608
+   bob.ip.binseg.configs.datasets.iostarvessel960
+   bob.ip.binseg.configs.datasets.iostarvesseltest
+   bob.ip.binseg.configs.datasets.refugecup
+   bob.ip.binseg.configs.datasets.refugecuptest
+   bob.ip.binseg.configs.datasets.refugeod
+   bob.ip.binseg.configs.datasets.refugeodtest
+   bob.ip.binseg.configs.datasets.rimoner3cup
+   bob.ip.binseg.configs.datasets.rimoner3cuptest
+   bob.ip.binseg.configs.datasets.rimoner3od
+   bob.ip.binseg.configs.datasets.rimoner3odtest
+   bob.ip.binseg.configs.datasets.stare
+   bob.ip.binseg.configs.datasets.stare1024
+   bob.ip.binseg.configs.datasets.stare1168
+   bob.ip.binseg.configs.datasets.stare544
+   bob.ip.binseg.configs.datasets.stare960
+   bob.ip.binseg.configs.datasets.starechasedb1iostarhrf544
+   bob.ip.binseg.configs.datasets.starechasedb1iostarhrf544ssldrive
+   bob.ip.binseg.configs.datasets.staretest
+
+
+Test Units
+----------
+
+.. autosummary::
+   :toctree: api/tests
+
+   bob.ip.binseg.test
+   bob.ip.binseg.test.test_basemetrics
+   bob.ip.binseg.test.test_batchmetrics
+   bob.ip.binseg.test.test_checkpointer
+   bob.ip.binseg.test.test_summary
+   bob.ip.binseg.test.test_transforms
diff --git a/doc/conf.py b/doc/conf.py
index d64d7794..8abecdd1 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -96,7 +96,12 @@ release = distribution.version
 
 # List of patterns, relative to source directory, that match files and
 # directories to ignore when looking for source files.
-exclude_patterns = ['links.rst']
+exclude_patterns = [
+        'links.rst',
+        'api/modules.rst',
+        'api/bob.rst',
+        'api/bob.ip.rst',
+        ]
 
 # The reST default role (used for this markup: `text`) to use for all documents.
 #default_role = None
@@ -236,6 +241,16 @@ else:
 intersphinx_mapping['torch'] = ('https://pytorch.org/docs/stable/', None)
 intersphinx_mapping['PIL'] = ('http://pillow.readthedocs.io/en/stable', None)
 intersphinx_mapping['pandas'] = ('https://pandas.pydata.org/pandas-docs/stable/',None)
+
+# Figures out the major click version we use
+import pkg_resources
+click_version = pkg_resources.require('click')[0].version.split('.')[0]
+click_version += '.x'
+intersphinx_mapping['click'] = ('https://click.palletsprojects.com/en/%s/' % (click_version,),None)
+
+# Add our private index (for extras and fixes)
+intersphinx_mapping['extras'] = ('', 'extras.inv')
+
 # We want to remove all private (i.e. _. or __.__) members
 # that are not in the list of accepted functions
 accepted_private_functions = ['__array__']
@@ -257,4 +272,4 @@ def member_function_test(app, what, name, obj, skip, options):
 
 def setup(app):
     app.connect('autodoc-skip-member', member_function_test)
-    
\ No newline at end of file
+
diff --git a/doc/datasets.rst b/doc/datasets.rst
index dd0f1cd3..dece6057 100644
--- a/doc/datasets.rst
+++ b/doc/datasets.rst
@@ -6,33 +6,39 @@
  Supported Datasets
 ====================
 
-+-----+---------------+-------------+--------+-------+------+------+--------+-----+-----+----------------------------+
-|  #  |     Name      |    H x W    | # imgs | Train | Test | Mask | Vessel | OD  | Cup | Train-Test split reference |
-+=====+===============+=============+========+=======+======+======+========+=====+=====+============================+
-| 1   | Drive_        | 584 x 565   | 40     | 20    | 20   | x    | x      |     |     | `Staal et al. (2004)`_     |
-+-----+---------------+-------------+--------+-------+------+------+--------+-----+-----+----------------------------+
-| 2   | STARE_        | 605 x 700   | 20     | 10    | 10   |      | x      |     |     | `Maninis et al. (2016)`_   |
-+-----+---------------+-------------+--------+-------+------+------+--------+-----+-----+----------------------------+
-| 3   | CHASEDB1_     | 960 x 999   | 28     | 8     | 20   |      | x      |     |     | `Fraz et al. (2012)`_      |
-+-----+---------------+-------------+--------+-------+------+------+--------+-----+-----+----------------------------+
-| 4   | HRF_          | 2336 x 3504 | 45     | 15    | 30   | x    | x      |     |     | `Orlando et al. (2016)`_   |
-+-----+---------------+-------------+--------+-------+------+------+--------+-----+-----+----------------------------+
-| 5   | IOSTAR_       | 1024 x 1024 | 30     | 20    | 10   | x    | x      | x   |     | `Meyer et al. (2017)`_     |
-+-----+---------------+-------------+--------+-------+------+------+--------+-----+-----+----------------------------+
-| 6   | DRIONS-DB_    | 400 x 600   | 110    | 60    | 50   |      |        | x   |     | `Maninis et al. (2016)`_   |
-+-----+---------------+-------------+--------+-------+------+------+--------+-----+-----+----------------------------+
-| 7   | RIM-ONEr3_    | 1424 x 1072 | 159    | 99    | 60   |      |        | x   | x   | `Maninis et al. (2016)`_   |
-+-----+---------------+-------------+--------+-------+------+------+--------+-----+-----+----------------------------+
-| 8   | Drishti-GS1_  | varying     | 101    | 50    | 51   |      |        | x   | x   | `Sivaswamy et al. (2014)`_ |
-+-----+---------------+-------------+--------+-------+------+------+--------+-----+-----+----------------------------+
-| 9   | REFUGE_ train | 2056 x 2124 | 400    | 400   |      |      |        | x   | x   | REFUGE_                    |
-+-----+---------------+-------------+--------+-------+------+------+--------+-----+-----+----------------------------+
-| 9   | REFUGE_ val   | 1634 x 1634 | 400    |       | 400  |      |        | x   | x   | REFUGE_                    |
-+-----+---------------+-------------+--------+-------+------+------+--------+-----+-----+----------------------------+
-
-
-Add-on: Folder-based Dataset
-============================
+Here is a list of currently support datasets in this package, alongside notable
+properties.  Each dataset name is linked to the current location where raw data
+can be downloaded.  We include the reference of the data split protocols used
+to generate iterators for training and testing.
+
+
++-----------------+--------------------+-----------------------+-------------+---------+------+--------+-----+-----+--------------------+-------+------+
+|   Dataset       |   Reference        | ``bob.db`` package    |    H x W    | Samples | Mask | Vessel | OD  | Cup | Split Reference    | Train | Test |
++-----------------+--------------------+-----------------------+-------------+---------+------+--------+-----+-----+--------------------+-------+------+
+| DRIVE_          | [DRIVE-2004]_      | ``bob.db.drive``      | 584 x 565   | 40      | x    | x      |     |     | [DRIVE-2004]_      | 20    | 20   |
++-----------------+--------------------+-----------------------+-------------+---------+------+--------+-----+-----+--------------------+-------+------+
+| STARE_          | [STARE-2000]_      | ``bob.db.stare``      | 605 x 700   | 20      |      | x      |     |     | [MANINIS-2016]_    | 10    | 10   |
++-----------------+--------------------+-----------------------+-------------+---------+------+--------+-----+-----+--------------------+-------+------+
+| CHASE-DB1_      | [CHASEDB1-2012]_   | ``bob.db.chasedb``    | 960 x 999   | 28      |      | x      |     |     | [CHASEDB1-2012]_   | 8     | 20   |
++-----------------+--------------------+-----------------------+-------------+---------+------+--------+-----+-----+--------------------+-------+------+
+| HRF_            | [HRF-2013]_        | ``bob.db.hrf``        | 2336 x 3504 | 45      | x    | x      |     |     | [ORLANDO-2017]_    | 15    | 30   |
++-----------------+--------------------+-----------------------+-------------+---------+------+--------+-----+-----+--------------------+-------+------+
+| IOSTAR_         | [IOSTAR-2016]_     | ``bob.db.iostar``     | 1024 x 1024 | 30      | x    | x      | x   |     | [MEYER-2017]_      | 20    | 10   |
++-----------------+--------------------+-----------------------+-------------+---------+------+--------+-----+-----+--------------------+-------+------+
+| DRIONS-DB_      | [DRIONSDB-2008]_   | ``bob.db.drionsdb``   | 400 x 600   | 110     |      |        | x   |     | [MANINIS-2016]_    | 60    | 50   |
++-----------------+--------------------+-----------------------+-------------+---------+------+--------+-----+-----+--------------------+-------+------+
+| `RIM-ONE r3`_   | [RIMONER3-2015]_   | ``bob.db.rimoner3``   | 1424 x 1072 | 159     |      |        | x   | x   | [MANINIS-2016]_    | 99    | 60   |
++-----------------+-------------------+------------------------+-------------+---------+------+--------+-----+-----+--------------------+-------+------+
+| Drishti-GS1_    | [DRISHTIGS1-2014]_ | ``bob.db.drishtigs1`` | varying     | 101     |      |        | x   | x   | [DRISHTIGS1-2014]_ | 50    | 51   |
++-----------------+--------------------+-----------------------+-------------+---------+------+--------+-----+-----+--------------------+-------+------+
+| REFUGE_ (train) | [REFUGE-2018]_     | ``bob.db.refuge``     | 2056 x 2124 | 400     |      |        | x   | x   | [REFUGE-2018]_     | 400   |      |
++-----------------+--------------------+-----------------------+-------------+---------+------+--------+-----+-----+--------------------+-------+------+
+| REFUGE_ (val)   | [REFUGE-2018]_     | ``bob.db.refuge``     | 1634 x 1634 | 400     |      |        | x   | x   | [REFUGE-2018]_     |       | 400  |
++-----------------+--------------------+-----------------------+-------------+---------+------+--------+-----+-----+--------------------+-------+------+
+
+
+Folder-based Dataset
+--------------------
 
 For quick experimentation, we also provide a PyTorch_ class that works with the
 following dataset folder structure for images and ground-truth (gt):
diff --git a/doc/extras.inv b/doc/extras.inv
new file mode 100644
index 00000000..11e31176
--- /dev/null
+++ b/doc/extras.inv
@@ -0,0 +1,5 @@
+# Sphinx inventory version 2
+# Project: extras
+# Version: stable
+# The remainder of this file is compressed using zlib.
+xÚ½¿‚0‡wž‚„™6ÆÄ8èj\|RÚ¤´Mïðéí@‚..w÷ËåîûÐz.‰u¨†ÜÕSxr_Ré¦†kPJ‰è ¡ÔM˜Ÿ|O;Ë²Vš!]á†úfÕEÞCn¶µø…ž"ú,U/u(<
‰Qí™K¾€yþC:¢Ò@:†,5H®yþ¢Mÿ;ñŒ«><¿.F£ áaA!]Bø&Ž2ìÒš$?RZÏüDWÆNw[+¿.^ÏÖÔ
\ No newline at end of file
diff --git a/doc/extras.txt b/doc/extras.txt
new file mode 100644
index 00000000..1c1776cd
--- /dev/null
+++ b/doc/extras.txt
@@ -0,0 +1,10 @@
+# Sphinx inventory version 2
+# Project: extras
+# Version: stable
+# The remainder of this file is compressed using zlib.
+torch.optim.optimizer.Optimizer py:class 1 https://pytorch.org/docs/stable/optim.html#torch.optim.Optimizer -
+torch.nn.Module py:class 1 https://pytorch.org/docs/stable/nn.html?highlight=module#torch.nn.Module -
+torch.nn.modules.module.Module py:class 1 https://pytorch.org/docs/stable/nn.html?highlight=module#torch.nn.Module -
+torch.utils.data.dataset.Dataset py:class 1 https://pytorch.org/docs/stable/data.html?highlight=dataset#torch.utils.data.Dataset -
+unittest.case.TestCase py:class 1 https://docs.python.org/3/library/unittest.html?highlight=testcase#unittest.TestCase -
+click.core.Option py:class 1 https://click.palletsprojects.com/en/7.x/api/#click.Option
diff --git a/doc/index.rst b/doc/index.rst
index 1c7b3d74..2dfa532d 100644
--- a/doc/index.rst
+++ b/doc/index.rst
@@ -26,14 +26,17 @@ Please use the BibTeX reference below to cite this work:
 
 
 Additional Material
-===================
+-------------------
 
 The additional material referred to in the paper can be found under
 :ref:`bob.ip.binseg.covdresults` and :download:`here </additionalresults.pdf>`
 
 
+.. todolist::
+
+
 Users Guide
-===========
+-----------
 
 .. toctree::
    :maxdepth: 2
@@ -47,9 +50,17 @@ Users Guide
    configs
    plotting
    visualization
-   api
    acknowledgements
+   references
+   api
+
+
+Indices and tables
+------------------
+
+* :ref:`genindex`
+* :ref:`modindex`
+* :ref:`search`
 
-.. todolist::
 
 .. include:: links.rst
diff --git a/doc/links.rst b/doc/links.rst
index c11cdfe2..5b9a2390 100644
--- a/doc/links.rst
+++ b/doc/links.rst
@@ -4,62 +4,22 @@
 
 .. _idiap: http://www.idiap.ch
 .. _bob: http://www.idiap.ch/software/bob
-.. _installation: https://www.idiap.ch/software/bob/docs/bob/docs/stable/bob/bob/doc/install.html
+.. _installation: https://www.idiap.ch/software/bob/install
 .. _mailing list: https://www.idiap.ch/software/bob/discuss
 .. _pytorch: https://pytorch.org
-.. _torchvision package: https://github.com/pytorch/vision
 
-.. DRIVE
-
-.. _drive: https://doi.org/10.1109/TMI.2004.825627
-.. _staal et al. (2004): https://doi.org/10.1109/TMI.2004.825627
-
-.. STARE
-
-.. _stare: https://doi.org/10.1109/42.845178
-.. _maninis et al. (2016): https://doi.org/10.1007/978-3-319-46723-8_17
-
-.. HRF
-
-.. _hrf: http://dx.doi.org/10.1155/2013/154860
-.. _orlando et al. (2016): https://doi.org/10.1109/TBME.2016.2535311
-
-.. IOSTAR
-
-.. _iostar: https://doi.org/10.1109/TMI.2016.2587062
-.. _meyer et al. (2017): https://doi.org/10.1007/978-3-319-59876-5_56
-
-.. CHASEDB1
-
-.. _chasedb1: https://doi.org/10.1109/TBME.2012.2205687
-.. _fraz et al. (2012): https://doi.org/10.1109/TBME.2012.2205687
-
-.. DRIONSDB
-
-.. _drions-db: http://dx.doi.org/10.1016/j.artmed.2008.04.005
-.. _maninis et al. (2016): https://doi.org/10.1007/978-3-319-46723-8_17
-
-.. RIM-ONE r3
-
-.. _rim-oner3: https://dspace5.zcu.cz/bitstream/11025/29670/1/Fumero.pdf
-.. _maninis et al. (2016): https://doi.org/10.1007/978-3-319-46723-8_17
-
-.. Drishti-GS1
-
-.. _drishti-gs1: https://doi.org/10.1109/ISBI.2014.6867807
-.. _sivaswamy et al. (2014): https://doi.org/10.1109/ISBI.2014.6867807
-
-.. REFUGE
-
-.. _refuge: http://ai.baidu.com/broad/download?dataset=gon
-
-.. OtherPapers
-
-.. _Iglovikov  et al. (2018): http://openaccess.thecvf.com/content_cvpr_2018_workshops/w4/html/Iglovikov_TernausNetV2_Fully_Convolutional_CVPR_2018_paper.html
-.. _He et al. (2015): https://doi.org/10.1109/ICCV.2015.164
+.. Raw data websites
+.. _drive: https://www.isi.uu.nl/Research/Databases/DRIVE/
+.. _stare: http://cecas.clemson.edu/~ahoover/stare/
+.. _hrf: https://www5.cs.fau.de/research/data/fundus-images/
+.. _iostar: http://www.retinacheck.org/datasets
+.. _chase-db1: https://blogs.kingston.ac.uk/retinal/chasedb1/
+.. _drions-db: http://www.ia.uned.es/~ejcarmona/DRIONS-DB.html
+.. _rim-one r3: http://medimrg.webs.ull.es/research/downloads/
+.. _drishti-gs1: http://cvit.iiit.ac.in/projects/mip/drishti-gs/mip-dataset2/Home.php
+.. _refuge: https://refuge.grand-challenge.org/Details/
 
 .. Software Tools
-
 .. _maskrcnn-benchmark: https://github.com/facebookresearch/maskrcnn-benchmark
 
 
diff --git a/doc/nitpick-exceptions.txt b/doc/nitpick-exceptions.txt
index bd53da1a..e508f84b 100644
--- a/doc/nitpick-exceptions.txt
+++ b/doc/nitpick-exceptions.txt
@@ -1,6 +1,4 @@
-py:class torch.nn.modules.module.Module
+py:mod bob.db.base
 py:class torch.nn.modules.loss._Loss
-py:class torch.utils.data.dataset.Dataset
 py:class Module
-py:mod bob.db.base
-py:obj list
+py:class click.core.Option
diff --git a/doc/references.rst b/doc/references.rst
new file mode 100644
index 00000000..d7b4f8d5
--- /dev/null
+++ b/doc/references.rst
@@ -0,0 +1,77 @@
+.. coding=utf-8
+
+============
+ References
+============
+
+.. [STARE-2000] *A. D. Hoover, V. Kouznetsova and M. Goldbaum*, **Locating blood
+   vessels in retinal images by piecewise threshold probing of a matched filter
+   response**, in IEEE Transactions on Medical Imaging, vol. 19, no. 3, pp.
+   203-210, March 2000. https://doi.org/10.1109/42.845178
+
+.. [DRIVE-2004] *J. Staal, M. D. Abramoff, M. Niemeijer, M. A. Viergever and B.
+   van Ginneken*, **Ridge-based vessel segmentation in color images of the
+   retina**, in IEEE Transactions on Medical Imaging, vol. 23, no. 4, pp.
+   501-509, April 2004. https://doi.org/10.1109/TMI.2004.825627
+
+.. [CHASEDB1-2012] *M. M. Fraz et al.*, **An Ensemble Classification-Based
+   Approach Applied to Retinal Blood Vessel Segmentation**, in IEEE
+   Transactions on Biomedical Engineering, vol. 59, no. 9, pp. 2538-2548, Sept.
+   2012. https://doi.org/10.1109/TBME.2012.2205687
+
+.. [HRF-2013] *A. Budai, R. Bock, A. Maier, J. Hornegger, and G. Michelson*,
+   **Robust Vessel Segmentation in Fundus Images**, in International Journal of
+   Biomedical Imaging, vol. 2013, p. 11, 2013.
+   http://dx.doi.org/10.1155/2013/154860
+
+.. [IOSTAR-2016] *J. Zhang, B. Dashtbozorg, E. Bekkers, J. P. W. Pluim, R. Duits
+   and B. M. ter Haar Romeny*, **Robust Retinal Vessel Segmentation via Locally
+   Adaptive Derivative Frames in Orientation Scores**, in IEEE Transactions on
+   Medical Imaging, vol. 35, no. 12, pp. 2631-2644, Dec. 2016.
+   https://doi.org/10.1109/TMI.2016.2587062
+
+.. [DRIONSDB-2008] *Enrique J. Carmona, Mariano RincÃ³n, JuliÃ¡n GarcÃa-FeijoÃ³, JosÃ©
+   M. MartÃnez-de-la-Casa*, **Identification of the optic nerve head with
+   genetic algorithms**, in Artificial Intelligence in Medicine, Volume 43,
+   Issue 3, pp. 243-259, 2008. http://dx.doi.org/10.1016/j.artmed.2008.04.005
+
+.. [RIMONER3-2015] *F. Fumero, J. Sigut, S. AlayoÌn, M. GonzaÌlez-HernaÌndez, M.
+   GonzaÌlez de la Rosa*, **Interactive Tool and Database for Optic Disc and Cup
+   Segmentation of Stereo and Monocular Retinal Fundus Images**, Conference on
+   Computer Graphics, Visualization and Computer Vision, 2015.
+   https://dspace5.zcu.cz/bitstream/11025/29670/1/Fumero.pdf
+
+.. [DRISHTIGS1-2014] *J. Sivaswamy, S. R. Krishnadas, G. Datt Joshi, M. Jain and
+   A. U. Syed Tabish*, **Drishti-GS: Retinal image dataset for optic nerve
+   head (ONH) segmentation**, 2014 IEEE 11th International Symposium on
+   Biomedical Imaging (ISBI), Beijing, 2014, pp. 53-56.
+   https://doi.org/10.1109/ISBI.2014.6867807
+
+.. [REFUGE-2018] https://refuge.grand-challenge.org/Details/
+
+.. [MANINIS-2016] *K.-K. Maninis, J. Pont-Tuset, P. ArbelÃ¡ez, and L. Van Gool*,
+   **Deep Retinal Image Understanding**, in Medical Image Computing and
+   Computer-Assisted Intervention â€“ MICCAI 2016, Cham, 2016, pp. 140â€“148.
+   https://doi.org/10.1007/978-3-319-46723-8_17
+
+.. [ORLANDO-2017] *J. I. Orlando, E. Prokofyeva and M. B. Blaschko*, **A
+   Discriminatively Trained Fully Connected Conditional Random Field Model for
+   Blood Vessel Segmentation in Fundus Images**, in IEEE Transactions on
+   Biomedical Engineering, vol. 64, no. 1, pp. 16-27, Jan. 2017.
+   https://doi.org/10.1109/TBME.2016.2535311
+
+.. [MEYER-2017] *M. I. Meyer, P. Costa, A. Galdran, A. M. MendonÃ§a, and A.
+   Campilho*, **A Deep Neural Network for Vessel Segmentation of Scanning Laser
+   Ophthalmoscopy Images**, in Image Analysis and Recognition, vol. 10317, F.
+   Karray, A. Campilho, and F. Cheriet, Eds. Cham: Springer International
+   Publishing, 2017, pp. 507â€“515. https://doi.org/10.1007/978-3-319-59876-5_56
+
+.. [IGLOVIKOV-2018] *V. Iglovikov, S. Seferbekov, A. Buslaev and A. Shvets*,
+   **TernausNetV2: Fully Convolutional Network for Instance Segmentation**,
+   2018 IEEE/CVF Conference on Computer Vision and Pattern Recognition
+   Workshops (CVPRW), Salt Lake City, UT, 2018, pp. 228-2284.
+   https://doi.org/10.1109/CVPRW.2018.00042
+
+.. [HE-2015] *S. Xie and Z. Tu*, **Holistically-Nested Edge Detection**, 2015
+   IEEE International Conference on Computer Vision (ICCV), Santiago, 2015, pp.
+   1395-1403. https://doi.org/10.1109/ICCV.2015.164
diff --git a/doc/setup.rst b/doc/setup.rst
index 0f1a68d2..7a1d3c7b 100644
--- a/doc/setup.rst
+++ b/doc/setup.rst
@@ -28,89 +28,68 @@ must procure.
 
 To setup a dataset, do the following:
 
-1. Download the dataset from the authors website (see below for all download
-   links)
-2. Install the corresponding bob.db package via ``conda install
-   bob.db.<database>``.  E.g. to install the DRIVE API run ``conda install
-   bob.db.drive``
-3. :ref:`datasetpathsetup`
-4. :ref:`dsconsistency`
-
-+------------+----------------------------------------------------------------------+---------------------+
-| Dataset    | Website                                                              | `bob.db` package    |
-+------------+----------------------------------------------------------------------+---------------------+
-| STARE      | http://cecas.clemson.edu/~ahoover/stare/                             | `bob.db.stare`      |
-+------------+----------------------------------------------------------------------+---------------------+
-| DRIVE      | https://www.isi.uu.nl/Research/Databases/DRIVE/                      | `bob.db.drive`      |
-+------------+----------------------------------------------------------------------+---------------------+
-| DRIONS     | http://www.ia.uned.es/~ejcarmona/DRIONS-DB.html                      | `bob.db.drionsdb`   |
-+------------+----------------------------------------------------------------------+---------------------+
-| RIM-ONE    | http://medimrg.webs.ull.es/research/downloads/                       | `bob.db.rimoner3`   |
-+------------+----------------------------------------------------------------------+---------------------+
-| CHASE-DB1  | https://blogs.kingston.ac.uk/retinal/chasedb1/                       | `bob.db.chasedb`    |
-+------------+----------------------------------------------------------------------+---------------------+
-| HRF        | https://www5.cs.fau.de/research/data/fundus-images/                  | `bob.db.hrf`        |
-+------------+----------------------------------------------------------------------+---------------------+
-| Drishti-GS | http://cvit.iiit.ac.in/projects/mip/drishti-gs/mip-dataset2/Home.php | `bob.db.drishtigs1` |
-+------------+----------------------------------------------------------------------+---------------------+
-| IOSTAR     | http://www.retinacheck.org/datasets                                  | `bob.db.iostar`     |
-+------------+----------------------------------------------------------------------+---------------------+
-| REFUGE     | https://refuge.grand-challenge.org/Details/                          | `bob.db.refuge`     |
-+------------+----------------------------------------------------------------------+---------------------+
-
-
-.. _datasetpathsetup:
-
-Set up dataset paths
-====================
-
-.. warning::
-
-   Our dataset connectors expect you provide "root" paths of raw datasets as
-   you unpack them in their **pristine** state.  Changing the location of files
-   within a dataset distribution will likely cause execution errors.
-
-For each dataset that you are planning to use, set the ``datadir`` to the root
-path where it is stored.  E.g.:
+1. Download the dataset from the authors website (see
+   :ref:`bob.ip.binseg.datasets` for download links and details), unpack it and
+   store the directory leading to the uncompressed directory structure.
 
-.. code-block:: sh
+   .. warning::
 
-   (<myenv>) $ bob config set bob.db.drive.datadir "/path/to/drivedataset/"
+      Our dataset connectors expect you provide "root" paths of raw datasets as
+      you unpack them in their **pristine** state.  Changing the location of
+      files within a dataset distribution will likely cause execution errors.
 
-To check your current setup, do the following:
+2. Install the corresponding ``bob.db`` package (package names are marked on
+   :ref:`bob.ip.binseg.datasets`) with the following command:
 
-.. code-block:: sh
+   .. code-block:: sh
 
-   (<myenv>) $ bob config show
-   {
-       "bob.db.chasedb1.datadir": "/idiap/resource/database/CHASE-DB11/",
-       "bob.db.drionsdb.datadir": "/idiap/resource/database/DRIONS",
-       "bob.db.drishtigs1.datadir": "/idiap/resource/database/Drishti-GS1/",
-       "bob.db.drive.datadir": "/idiap/resource/database/DRIVE",
-       "bob.db.hrf.datadir": "/idiap/resource/database/HRF",
-       "bob.db.iostar.datadir": "/idiap/resource/database/IOSTAR/IOSTAR Vessel Segmentation Dataset/",
-       "bob.db.refuge.datadir": "/idiap/resource/database/REFUGE",
-       "bob.db.rimoner3.datadir": "/idiap/resource/database/RIM-ONE/RIM-ONE r3",
-       "bob.db.stare.datadir": "/idiap/resource/database/STARE"
-   }
+      # replace "<package>" by the corresponding package name
+      (<myenv>) $ conda install <package>
+      # example:
+      (<myenv>) $ conda install bob.db.drive #to install DRIVE iterators
 
+3.  For each dataset that you are planning to use, set the ``datadir`` to the
+    root path where it is stored.  E.g.:
 
-.. _dsconsistency:
+    .. code-block:: sh
 
-Test dataset consitency
-=======================
+       (<myenv>) $ bob config set bob.db.drive.datadir "/path/to/drivedataset/"
 
-To check whether the downloaded version is consistent with the structure that
-is expected by our ``bob.db`` packages, run ``bob_dbmanage.py datasettocheck
-checkfiles`` E.g.:
+    To check your current setup, do the following:
 
-.. code-block:: sh
+    .. code-block:: sh
+
+       (<myenv>) $ bob config show
+       {
+           "bob.db.chasedb1.datadir": "/path/to/chasedb1/",
+           "bob.db.drionsdb.datadir": "/path/to/drionsdb",
+           "bob.db.drive.datadir": "/path/to/drive",
+           "bob.db.hrf.datadir": "/path/to/hrf",
+       }
+
+    This command will show the set location for each configured dataset.  These
+    paths are automatically used by the dataset iterators provided by the
+    ``bob.db`` packages to find the raw datafiles.
+
+4. To check whether the downloaded version is consistent with the structure
+   that is expected by our ``bob.db`` packages, run ``bob_dbmanage.py
+   <dataset> checkfiles``, where ``<dataset>`` should be replaced by the
+   dataset programmatic name. E.g., to check DRIVE files, use:
+
+   .. code-block:: sh
+
+      (<myenv>) $ bob_dbmanage.py drive checkfiles
+      > checkfiles completed sucessfully
+
+   If there are problems on the current file organisation, this procedure
+   should detect and highlight which files are missing.
 
-   (<myenv>) $ bob_dbmanage.py drive checkfiles
-   > checkfiles completed sucessfully
+   .. tip::
 
-If there are problems on the current file organisation, this procedure should
-detect and highlight which files are missing.
+      The programmatic name of datasets follow the ``bob.db.<dataset>``
+      nomenclature.  For example, the programmatic name of CHASE-DB1 is
+      ``chasedb1``, because the package name implementing iterators to its
+      files is ``bob.db.chasedb1``.
 
 
 .. include:: links.rst
diff --git a/doc/training.rst b/doc/training.rst
index 4c8a1da1..e23d1bfc 100644
--- a/doc/training.rst
+++ b/doc/training.rst
@@ -2,13 +2,13 @@
 .. _bob.ip.binseg.training:
 
 
-========
-Training
-========
+==========
+ Training
+==========
 
-To replicate our results use our main application ``bob binseg train`` followed
-by the model configuration and dataset configuration files.  Use ``bob binseg
-train --help`` for more information.
+To replicate our results, use our main application ``bob binseg train``
+followed by the model configuration, and dataset configuration files.  Use ``bob
+binseg train --help`` for more information.
 
 .. note::
 
-- 
GitLab