diff --git a/bob/learn/tensorflow/models/alexnet.py b/bob/learn/tensorflow/models/alexnet.py
new file mode 100644
index 0000000000000000000000000000000000000000..202348034fdbbd299b1fb2f64c09e439e822d682
--- /dev/null
+++ b/bob/learn/tensorflow/models/alexnet.py
@@ -0,0 +1,43 @@
+import tensorflow as tf
+
+
+def AlexNet_simplified(name="AlexNet", **kwargs):
+    """A simplified implementation of AlexNet presented in:
+    Y. LeCun, L. Bottou, Y. Bengio, and P. Haffner, “Gradient-based learning applied to
+    document recognition,” Proceedings of the IEEE, vol. 86, no. 11, pp. 2278–2324, 1998.
+    """
+    model = tf.keras.Sequential(
+        [
+            tf.keras.Input(shape=(227, 227, 3)),
+            tf.keras.layers.Conv2D(filters=96, kernel_size=11, strides=4, name="C1", activation="relu"),
+            tf.keras.layers.MaxPool2D(pool_size=3, strides=2, name="P1"),
+            tf.keras.layers.Conv2D(filters=256, kernel_size=5, strides=1, name="C2", activation="relu", padding="same"),
+            tf.keras.layers.MaxPool2D(pool_size=3, strides=2, name="P2"),
+            tf.keras.layers.Conv2D(filters=384, kernel_size=3, strides=1, name="C3", activation="relu", padding="same"),
+            tf.keras.layers.Conv2D(filters=384, kernel_size=3, strides=1, name="C4", activation="relu", padding="same"),
+            tf.keras.layers.Conv2D(filters=256, kernel_size=3, strides=1, name="C5", activation="relu", padding="same"),
+            tf.keras.layers.MaxPool2D(pool_size=3, strides=2, name="P5"),
+            tf.keras.layers.Flatten(name="FLATTEN"),
+            tf.keras.layers.Dropout(rate=0.5, name="D6"),
+            tf.keras.layers.Dense(units=4096, activation="relu", name="F6"),
+            tf.keras.layers.Dropout(rate=0.5, name="D7"),
+            tf.keras.layers.Dense(units=4096, activation="relu", name="F7"),
+            tf.keras.layers.Dense(units=1000, activation="softmax", name="OUTPUT"),
+        ],
+        name=name,
+        **kwargs
+    )
+    return model
+
+
+if __name__ == "__main__":
+    import pkg_resources
+    from bob.learn.tensorflow.utils import model_summary
+
+    model = AlexNet_simplified()
+    model.summary()
+    rows = model_summary(model, do_print=True)
+    del rows[-2]
+    from tabulate import tabulate
+
+    print(tabulate(rows, headers="firstrow", tablefmt="latex"))
diff --git a/bob/learn/tensorflow/models/autoencoder_face.py b/bob/learn/tensorflow/models/autoencoder_face.py
index 318dd65c695e85e527f79d29df9322c579ab8495..9abf7f6fab81ad6afc65726f1fe14ade52c8352f 100644
--- a/bob/learn/tensorflow/models/autoencoder_face.py
+++ b/bob/learn/tensorflow/models/autoencoder_face.py
@@ -1,5 +1,5 @@
 import tensorflow as tf
-from .densenet import densenet161
+from bob.learn.tensorflow.models.densenet import densenet161
 
 
 def _get_l2_kw(weight_decay):
@@ -9,61 +9,119 @@ def _get_l2_kw(weight_decay):
     return l2_kw
 
 
-class ConvDecoder(tf.keras.Sequential):
+# class ConvDecoder(tf.keras.Sequential):
+#     """The decoder similar to the one in
+#     https://github.com/google/compare_gan/blob/master/compare_gan/architectures/sndcgan.py
+#     """
+
+#     def __init__(
+#         self,
+#         z_dim,
+#         decoder_layers=(
+#             (512, 7, 7, 0),
+#             (256, 4, 2, 1),
+#             (128, 4, 2, 1),
+#             (64, 4, 2, 1),
+#             (32, 4, 2, 1),
+#             (16, 4, 2, 1),
+#             (3, 1, 1, 0),
+#         ),
+#         weight_decay=1e-5,
+#         name="Decoder",
+#         **kwargs,
+#     ):
+#         self.z_dim = z_dim
+#         self.data_format = data_format = "channels_last"
+#         l2_kw = _get_l2_kw(weight_decay)
+#         layers = [
+#             tf.keras.layers.Reshape((1, 1, z_dim), input_shape=(z_dim,), name="reshape")
+#         ]
+#         for i, (filters, kernel_size, strides, cropping) in enumerate(decoder_layers):
+#             dconv = tf.keras.layers.Conv2DTranspose(
+#                 filters,
+#                 kernel_size,
+#                 strides=strides,
+#                 use_bias=i == len(decoder_layers) - 1,
+#                 data_format=data_format,
+#                 name=f"dconv_{i}",
+#                 **l2_kw,
+#             )
+#             crop = tf.keras.layers.Cropping2D(
+#                 cropping=cropping, data_format=data_format, name=f"crop_{i}"
+#             )
+
+#             if i == len(decoder_layers) - 1:
+#                 act = tf.keras.layers.Activation("tanh", name=f"tanh_{i}")
+#                 bn = None
+#             else:
+#                 act = tf.keras.layers.Activation("relu", name=f"relu_{i}")
+#                 bn = tf.keras.layers.BatchNormalization(
+#                     scale=False, fused=False, name=f"bn_{i}"
+#                 )
+#             if bn is not None:
+#                 layers.extend([dconv, crop, bn, act])
+#             else:
+#                 layers.extend([dconv, crop, act])
+#         with tf.name_scope(name):
+#             super().__init__(layers=layers, name=name, **kwargs)
+
+
+def ConvDecoder(
+    z_dim,
+    decoder_layers=(
+        (512, 7, 7, 0),
+        (256, 4, 2, 1),
+        (128, 4, 2, 1),
+        (64, 4, 2, 1),
+        (32, 4, 2, 1),
+        (16, 4, 2, 1),
+        (3, 1, 1, 0),
+    ),
+    weight_decay=1e-5,
+    last_act="tanh",
+    name="Decoder",
+    **kwargs,
+):
     """The decoder similar to the one in
     https://github.com/google/compare_gan/blob/master/compare_gan/architectures/sndcgan.py
     """
+    z_dim = z_dim
+    data_format = "channels_last"
+    l2_kw = _get_l2_kw(weight_decay)
+    layers = [
+        tf.keras.layers.Reshape(
+            (1, 1, z_dim), input_shape=(z_dim,), name=f"{name}/reshape"
+        )
+    ]
+    for i, (filters, kernel_size, strides, cropping) in enumerate(decoder_layers):
+        dconv = tf.keras.layers.Conv2DTranspose(
+            filters,
+            kernel_size,
+            strides=strides,
+            use_bias=i == len(decoder_layers) - 1,
+            data_format=data_format,
+            name=f"{name}/dconv_{i}",
+            **l2_kw,
+        )
+        crop = tf.keras.layers.Cropping2D(
+            cropping=cropping, data_format=data_format, name=f"{name}/crop_{i}"
+        )
 
-    def __init__(
-        self,
-        z_dim,
-        decoder_layers=(
-            (512, 7, 7, 0),
-            (256, 4, 2, 1),
-            (128, 4, 2, 1),
-            (64, 4, 2, 1),
-            (32, 4, 2, 1),
-            (16, 4, 2, 1),
-            (3, 1, 1, 0),
-        ),
-        weight_decay=1e-5,
-        name="Decoder",
-        **kwargs,
-    ):
-        self.z_dim = z_dim
-        self.data_format = data_format = "channels_last"
-        l2_kw = _get_l2_kw(weight_decay)
-        layers = [
-            tf.keras.layers.Reshape((1, 1, z_dim), input_shape=(z_dim,), name="reshape")
-        ]
-        for i, (filters, kernel_size, strides, cropping) in enumerate(decoder_layers):
-            dconv = tf.keras.layers.Conv2DTranspose(
-                filters,
-                kernel_size,
-                strides=strides,
-                use_bias=i == len(decoder_layers) - 1,
-                data_format=data_format,
-                name=f"dconv_{i}",
-                **l2_kw,
+        if i == len(decoder_layers) - 1:
+            act = tf.keras.layers.Activation(
+                f"{last_act}", name=f"{name}/{last_act}_{i}"
             )
-            crop = tf.keras.layers.Cropping2D(
-                cropping=cropping, data_format=data_format, name=f"crop_{i}"
+            bn = None
+        else:
+            act = tf.keras.layers.Activation("relu", name=f"{name}/relu_{i}")
+            bn = tf.keras.layers.BatchNormalization(
+                scale=False, fused=False, name=f"{name}/bn_{i}"
             )
-
-            if i == len(decoder_layers) - 1:
-                act = tf.keras.layers.Activation("tanh", name=f"tanh_{i}")
-                bn = None
-            else:
-                act = tf.keras.layers.Activation("relu", name=f"relu_{i}")
-                bn = tf.keras.layers.BatchNormalization(
-                    scale=False, fused=False, name=f"bn_{i}"
-                )
-            if bn is not None:
-                layers.extend([dconv, crop, bn, act])
-            else:
-                layers.extend([dconv, crop, act])
-        with tf.name_scope(name):
-            super().__init__(layers=layers, name=name, **kwargs)
+        if bn is not None:
+            layers.extend([dconv, crop, bn, act])
+        else:
+            layers.extend([dconv, crop, act])
+    return tf.keras.Sequential(layers, name=name, **kwargs)
 
 
 class Autoencoder(tf.keras.Model):
@@ -90,10 +148,28 @@ class Autoencoder(tf.keras.Model):
         x_hat = self.decoder(z, training=training)
         return z, x_hat
 
-def autoencoder_face(z_dim=256, weight_decay=1e-9):
+
+def autoencoder_face(z_dim=256, weight_decay=1e-9, decoder_last_act="tanh"):
     encoder = densenet161(
         output_classes=z_dim, weight_decay=weight_decay, weights=None, name="DenseNet"
     )
-    decoder = ConvDecoder(z_dim=z_dim, weight_decay=weight_decay, name="Decoder")
+    decoder = ConvDecoder(
+        z_dim=z_dim,
+        weight_decay=weight_decay,
+        last_act=decoder_last_act,
+        name="Decoder",
+    )
     autoencoder = Autoencoder(encoder, decoder, name="Autoencoder")
     return autoencoder
+
+
+if __name__ == "__main__":
+    import pkg_resources
+    from tabulate import tabulate
+    from bob.learn.tensorflow.utils import model_summary
+
+    model = ConvDecoder(z_dim=256, weight_decay=1e-9, last_act="tanh", name="Decoder")
+    model.summary()
+    rows = model_summary(model, do_print=True)
+    del rows[-2]
+    print(tabulate(rows, headers="firstrow", tablefmt="latex"))
diff --git a/bob/learn/tensorflow/models/densenet.py b/bob/learn/tensorflow/models/densenet.py
index e1f70d074263313ee12d0f013a070950ec489b24..5042ba89b79e42b084eb5182132c6b8a4d8b577f 100644
--- a/bob/learn/tensorflow/models/densenet.py
+++ b/bob/learn/tensorflow/models/densenet.py
@@ -36,6 +36,10 @@ class ConvBlock(tf.keras.Model):
 
         axis = -1 if data_format == "channels_last" else 1
         inter_filter = num_filters * 4
+        self.num_filters = num_filters
+        self.bottleneck = bottleneck
+        self.dropout_rate = dropout_rate
+
         self.norm1 = tf.keras.layers.BatchNormalization(axis=axis, name="norm1")
         if self.bottleneck:
             self.relu1 = tf.keras.layers.Activation("relu", name="relu1")
@@ -52,10 +56,10 @@ class ConvBlock(tf.keras.Model):
             self.norm2 = tf.keras.layers.BatchNormalization(axis=axis, name="norm2")
 
         self.relu2 = tf.keras.layers.Activation("relu", name="relu2")
-        # don't forget to set use_bias=False when using batchnorm
         self.conv2_pad = tf.keras.layers.ZeroPadding2D(
             padding=1, data_format=data_format, name="conv2_pad"
         )
+        # don't forget to set use_bias=False when using batchnorm
         self.conv2 = tf.keras.layers.Conv2D(
             num_filters,
             (3, 3),
@@ -109,6 +113,9 @@ class DenseBlock(tf.keras.Model):
     ):
         super().__init__(**kwargs)
         self.num_layers = num_layers
+        self.growth_rate = growth_rate
+        self.bottleneck = bottleneck
+        self.dropout_rate = dropout_rate
         self.axis = -1 if data_format == "channels_last" else 1
 
         self.blocks = []
@@ -127,7 +134,9 @@ class DenseBlock(tf.keras.Model):
     def call(self, x, training=None):
         for i in range(int(self.num_layers)):
             output = self.blocks[i](x, training=training)
-            x = tf.concat([x, output], axis=self.axis)
+            x = tf.keras.layers.Concatenate(axis=self.axis, name=f"concat_{i+1}")(
+                [x, output]
+            )
 
         return x
 
@@ -139,14 +148,12 @@ class TransitionBlock(tf.keras.Model):
         num_filters: number of filters passed to a convolutional layer.
         data_format: "channels_first" or "channels_last"
         weight_decay: weight decay
-        dropout_rate: dropout rate.
     """
 
-    def __init__(
-        self, num_filters, data_format, weight_decay=1e-4, dropout_rate=0, **kwargs
-    ):
+    def __init__(self, num_filters, data_format, weight_decay=1e-4, **kwargs):
         super().__init__(**kwargs)
         axis = -1 if data_format == "channels_last" else 1
+        self.num_filters = num_filters
 
         self.norm = tf.keras.layers.BatchNormalization(axis=axis, name="norm")
         self.relu = tf.keras.layers.Activation("relu", name="relu")
@@ -309,11 +316,11 @@ class DenseNet(tf.keras.Model):
             self.dense_blocks.append(
                 DenseBlock(
                     self.num_layers_in_each_block[i],
-                    self.growth_rate,
-                    self.data_format,
-                    self.bottleneck,
-                    self.weight_decay,
-                    self.dropout_rate,
+                    growth_rate=self.growth_rate,
+                    data_format=self.data_format,
+                    bottleneck=self.bottleneck,
+                    weight_decay=self.weight_decay,
+                    dropout_rate=self.dropout_rate,
                     name=f"dense_block_{i+1}",
                 )
             )
@@ -321,9 +328,8 @@ class DenseNet(tf.keras.Model):
                 self.transition_blocks.append(
                     TransitionBlock(
                         num_filters_after_each_block[i + 1],
-                        self.data_format,
-                        self.weight_decay,
-                        self.dropout_rate,
+                        data_format=self.data_format,
+                        weight_decay=self.weight_decay,
                         name=f"transition_block_{i+1}",
                     )
                 )
@@ -440,3 +446,37 @@ class DeepPixBiS(tf.keras.Model):
             except TypeError:
                 x = l(x)
         return x
+
+
+if __name__ == "__main__":
+    import pkg_resources
+    from tabulate import tabulate
+    from bob.learn.tensorflow.utils import model_summary
+
+    def print_model(inputs, outputs):
+        model = tf.keras.Model(inputs, outputs)
+        rows = model_summary(model, do_print=True)
+        del rows[-2]
+        print(tabulate(rows, headers="firstrow", tablefmt="latex"))
+
+    # inputs = tf.keras.Input((224, 224, 3), name="input")
+    # model = densenet161(weights=None)
+    # outputs = model.call(inputs)
+    # print_model(inputs, outputs)
+
+    # inputs = tf.keras.Input((56, 56, 96))
+    # outputs = model.dense_blocks[0].call(inputs)
+    # print_model(inputs, outputs)
+
+    # inputs = tf.keras.Input((56, 56, 96))
+    # outputs = model.dense_blocks[0].blocks[0].call(inputs)
+    # print_model(inputs, outputs)
+
+    # inputs = tf.keras.Input((56, 56, 384))
+    # outputs = model.transition_blocks[0].call(inputs)
+    # print_model(inputs, outputs)
+
+    inputs = tf.keras.Input((224, 224, 3), name="input")
+    model = DeepPixBiS()
+    outputs = model.call(inputs)
+    print_model(inputs, outputs)
diff --git a/bob/learn/tensorflow/models/inception.py b/bob/learn/tensorflow/models/inception.py
new file mode 100644
index 0000000000000000000000000000000000000000..3e25a59fa4ae9c6bb9d823ea7d6c20c311f37ca4
--- /dev/null
+++ b/bob/learn/tensorflow/models/inception.py
@@ -0,0 +1,159 @@
+import tensorflow as tf
+
+
+class LRN(tf.keras.layers.Lambda):
+    """local response normalization with default parameters for GoogLeNet
+    """
+
+    def __init__(self, alpha=0.0001, beta=0.75, depth_radius=5, **kwargs):
+        self.alpha = alpha
+        self.beta = beta
+        self.depth_radius = depth_radius
+
+        def lrn(inputs):
+            return tf.nn.local_response_normalization(
+                inputs, alpha=self.alpha, beta=self.beta, depth_radius=self.depth_radius
+            )
+
+        return super().__init__(lrn, **kwargs)
+
+
+class InceptionModule(tf.keras.Model):
+    """The inception module as it was introduced in:
+
+        C. Szegedy et al., “Going deeper with convolutions,” in Proceedings of the IEEE
+        Conference on Computer Vision and Pattern Recognition, 2015, pp. 1–9.
+    """
+
+    def __init__(
+        self,
+        filter_1x1,
+        filter_3x3_reduce,
+        filter_3x3,
+        filter_5x5_reduce,
+        filter_5x5,
+        pool_proj,
+        name="InceptionModule",
+        **kwargs
+    ):
+        super().__init__(name=name, **kwargs)
+        self.filter_1x1 = filter_1x1
+        self.filter_3x3_reduce = filter_3x3_reduce
+        self.filter_3x3 = filter_3x3
+        self.filter_5x5_reduce = filter_5x5_reduce
+        self.filter_5x5 = filter_5x5
+        self.pool_proj = pool_proj
+
+        self.branch1_conv1 = tf.keras.layers.Conv2D(
+            filter_1x1, 1, padding="same", activation="relu", name="branch1_conv1"
+        )
+
+        self.branch2_conv1 = tf.keras.layers.Conv2D(
+            filter_3x3_reduce,
+            1,
+            padding="same",
+            activation="relu",
+            name="branch2_conv1",
+        )
+        self.branch2_conv2 = tf.keras.layers.Conv2D(
+            filter_3x3, 3, padding="same", activation="relu", name="branch2_conv2"
+        )
+
+        self.branch3_conv1 = tf.keras.layers.Conv2D(
+            filter_5x5_reduce,
+            1,
+            padding="same",
+            activation="relu",
+            name="branch3_conv1",
+        )
+        self.branch3_conv2 = tf.keras.layers.Conv2D(
+            filter_5x5, 5, padding="same", activation="relu", name="branch3_conv2"
+        )
+
+        self.branch4_pool1 = tf.keras.layers.MaxPool2D(
+            3, 1, padding="same", name="branch4_pool1"
+        )
+        self.branch4_conv1 = tf.keras.layers.Conv2D(
+            pool_proj, 1, padding="same", activation="relu", name="branch4_conv1"
+        )
+
+        self.concat = tf.keras.layers.Concatenate(
+            axis=-1 if tf.keras.backend.image_data_format() == "channels_last" else -3,
+            name="concat",
+        )
+
+    def call(self, inputs):
+        b1 = self.branch1_conv1(inputs)
+
+        b2 = self.branch2_conv1(inputs)
+        b2 = self.branch2_conv2(b2)
+
+        b3 = self.branch3_conv1(inputs)
+        b3 = self.branch3_conv2(b3)
+
+        b4 = self.branch4_pool1(inputs)
+        b4 = self.branch4_conv1(b4)
+
+        return self.concat([b1, b2, b3, b4])
+
+
+def GoogLeNet(*, num_classes=1000, name="GoogLeNet", **kwargs):
+    """GoogLeNet as depicted in Figure 3 of
+    C. Szegedy et al., “Going deeper with convolutions,” in Proceedings of the IEEE
+    Conference on Computer Vision and Pattern Recognition, 2015, pp. 1–9.
+    and implemented in caffe:
+    https://github.com/BVLC/caffe/tree/master/models/bvlc_googlenet
+    """
+    model = tf.keras.Sequential(
+        [
+            tf.keras.Input(shape=(224, 224, 3)),
+            tf.keras.layers.Conv2D(
+                64, 7, strides=2, padding="same", activation="relu", name="conv1/7x7_s2"
+            ),
+            tf.keras.layers.MaxPool2D(3, 2, padding="same", name="pool1/3x3_s2"),
+            LRN(name="pool1/norm1"),
+            tf.keras.layers.Conv2D(64, 1, padding="same", activation="relu", name="conv2/3x3_reduce"),
+            tf.keras.layers.Conv2D(
+                192, 3, padding="same", activation="relu", name="conv2/3x3"
+            ),
+            LRN(name="conv2/norm2"),
+            tf.keras.layers.MaxPool2D(3, 2, padding="same", name="pool2/3x3_s2"),
+            InceptionModule(64, 96, 128, 16, 32, 32, name="inception_3a"),
+            InceptionModule(128, 128, 192, 32, 96, 64, name="inception_3b"),
+            tf.keras.layers.MaxPool2D(3, 2, padding="same", name="pool3/3x3_s2"),
+            InceptionModule(192, 96, 208, 16, 48, 64, name="inception_4a"),
+            InceptionModule(160, 112, 224, 24, 64, 64, name="inception_4b"),
+            InceptionModule(128, 128, 256, 24, 64, 64, name="inception_4c"),
+            InceptionModule(112, 144, 288, 32, 64, 64, name="inception_4d"),
+            InceptionModule(256, 160, 320, 32, 128, 128, name="inception_4e"),
+            tf.keras.layers.MaxPool2D(3, 2, padding="same", name="pool4/3x3_s2"),
+            InceptionModule(256, 160, 320, 32, 128, 128, name="inception_5a"),
+            InceptionModule(384, 192, 384, 48, 128, 128, name="inception_5b"),
+            tf.keras.layers.GlobalAvgPool2D(name="pool5"),
+            tf.keras.layers.Dropout(rate=0.4, name="dropout"),
+            tf.keras.layers.Dense(num_classes, name="output", activation="softmax"),
+        ],
+        name=name,
+        **kwargs
+    )
+
+    return model
+
+
+if __name__ == "__main__":
+    import pkg_resources
+    from tabulate import tabulate
+    from bob.learn.tensorflow.utils import model_summary
+
+    inputs = tf.keras.Input((28, 28, 192), name="input")
+    model = InceptionModule(64, 96, 128, 16, 32, 32)
+    outputs = model.call(inputs)
+    model = tf.keras.Model(inputs, outputs)
+    rows = model_summary(model, do_print=True)
+    del rows[-2]
+    print(tabulate(rows, headers="firstrow", tablefmt="latex"))
+
+    model = GoogLeNet()
+    rows = model_summary(model, do_print=True)
+    del rows[-2]
+    print(tabulate(rows, headers="firstrow", tablefmt="latex"))
diff --git a/bob/learn/tensorflow/models/inception_resnet_v2.py b/bob/learn/tensorflow/models/inception_resnet_v2.py
new file mode 100644
index 0000000000000000000000000000000000000000..79b1a66d24f59a3788742fe843e08466e3a67bbd
--- /dev/null
+++ b/bob/learn/tensorflow/models/inception_resnet_v2.py
@@ -0,0 +1,739 @@
+# -*- coding: utf-8 -*-
+"""Inception-ResNet-V2 MultiScale-Inception-ResNet-V2 models for Keras.
+"""
+from tensorflow.keras.models import Model
+from tensorflow.keras.layers import (
+    Activation,
+    BatchNormalization,
+    Concatenate,
+    Conv2D,
+    Dense,
+    Dropout,
+    Input,
+    Lambda,
+    MaxPool2D,
+    AvgPool2D,
+    GlobalAvgPool2D,
+    GlobalMaxPool2D,
+)
+from tensorflow.keras import backend as K
+import tensorflow as tf
+import logging
+
+logger = logging.getLogger(__name__)
+
+
+class Conv2D_BN(tf.keras.Sequential):
+    """Utility class to apply conv + BN.
+
+    # Arguments
+        x: input tensor.
+        filters:
+        kernel_size:
+        strides:
+        padding:
+        activation:
+        use_bias:
+
+    Attributes
+    ----------
+    activation
+        activation in `Conv2D`.
+    filters
+        filters in `Conv2D`.
+    kernel_size
+        kernel size as in `Conv2D`.
+    padding
+        padding mode in `Conv2D`.
+    strides
+        strides in `Conv2D`.
+    use_bias
+        whether to use a bias in `Conv2D`.
+    name
+        name of the ops; will become `name + '/Act'` for the activation
+        and `name + '/BatchNorm'` for the batch norm layer.
+    """
+
+    def __init__(
+        self,
+        filters,
+        kernel_size,
+        strides=1,
+        padding="same",
+        activation="relu",
+        use_bias=False,
+        name=None,
+        **kwargs,
+    ):
+
+        self.filters = filters
+        self.kernel_size = kernel_size
+        self.strides = strides
+        self.padding = padding
+        self.activation = activation
+        self.use_bias = use_bias
+
+        layers = [
+            Conv2D(
+                filters,
+                kernel_size,
+                strides=strides,
+                padding=padding,
+                use_bias=use_bias,
+                name=name,
+            )
+        ]
+
+        if not use_bias:
+            bn_axis = 1 if K.image_data_format() == "channels_first" else 3
+            bn_name = None if name is None else name + "/BatchNorm"
+            layers += [BatchNormalization(axis=bn_axis, scale=False, name=bn_name)]
+
+        if activation is not None:
+            ac_name = None if name is None else name + "/Act"
+            layers += [Activation(activation, name=ac_name)]
+
+        super().__init__(layers, name=name, **kwargs)
+
+
+class ScaledResidual(tf.keras.Model):
+    """A scaled residual connection layer"""
+    def __init__(self, scale, name="scaled_residual", **kwargs):
+        super().__init__(name=name, **kwargs)
+        self.scale = scale
+
+    def call(self, inputs, training=None):
+        return inputs[0] + inputs[1] * self.scale
+
+
+class InceptionResnetBlock(tf.keras.Model):
+    """An Inception-ResNet block.
+
+    This class builds 3 types of Inception-ResNet blocks mentioned
+    in the paper, controlled by the `block_type` argument (which is the
+    block name used in the official TF-slim implementation):
+        - Inception-ResNet-A: `block_type='block35'`
+        - Inception-ResNet-B: `block_type='block17'`
+        - Inception-ResNet-C: `block_type='block8'`
+
+    # Attributes
+        scale: scaling factor to scale the residuals (i.e., the output of
+            passing `x` through an inception module) before adding them
+            to the shortcut branch.
+            Let `r` be the output from the residual branch,
+            the output of this block will be `x + scale * r`.
+        block_type: `'block35'`, `'block17'` or `'block8'`, determines
+            the network structure in the residual branch.
+        block_idx: an `int` used for generating layer names.
+            The Inception-ResNet blocks
+            are repeated many times in this network.
+            We use `block_idx` to identify
+            each of the repetitions. For example,
+            the first Inception-ResNet-A block
+            will have `block_type='block35', block_idx=0`,
+            and the layer names will have
+            a common prefix `'block35_0'`.
+        activation: activation function to use at the end of the block
+            (see [activations](../activations.md)).
+            When `activation=None`, no activation is applied
+            (i.e., "linear" activation: `a(x) = x`).
+
+    # Raises
+        ValueError: if `block_type` is not one of `'block35'`,
+            `'block17'` or `'block8'`.
+    """
+
+    def __init__(
+        self,
+        n_channels,
+        scale,
+        block_type,
+        block_idx,
+        activation="relu",
+        n=1,
+        name=None,
+        **kwargs,
+    ):
+        name = name or block_type
+        super().__init__(name=name, **kwargs)
+        self.n_channels = n_channels
+        self.scale = scale
+        self.block_type = block_type
+        self.block_idx = block_idx
+        self.activation = activation
+        self.n = n
+
+        if block_type == "block35":
+            branch_0 = [Conv2D_BN(32 // n, 1, name="branch0_conv1")]
+            branch_1 = [Conv2D_BN(32 // n, 1, name="branch1_conv1")]
+            branch_1 += [Conv2D_BN(32 // n, 3, name="branch1_conv2")]
+            branch_2 = [Conv2D_BN(32 // n, 1, name="branch2_conv1")]
+            branch_2 += [Conv2D_BN(48 // n, 3, name="branch2_conv2")]
+            branch_2 += [Conv2D_BN(64 // n, 3, name="branch2_conv3")]
+            branches = [branch_0, branch_1, branch_2]
+        elif block_type == "block17":
+            branch_0 = [Conv2D_BN(192 // n, 1, name="branch0_conv1")]
+            branch_1 = [Conv2D_BN(128 // n, 1, name="branch1_conv1")]
+            branch_1 += [
+                Conv2D_BN(160 // n, (1, 7), name="branch1_conv2")
+            ]
+            branch_1 += [
+                Conv2D_BN(192 // n, (7, 1), name="branch1_conv3")
+            ]
+            branches = [branch_0, branch_1]
+        elif block_type == "block8":
+            branch_0 = [Conv2D_BN(192 // n, 1, name="branch0_conv1")]
+            branch_1 = [Conv2D_BN(192 // n, 1, name="branch1_conv1")]
+            branch_1 += [
+                Conv2D_BN(224 // n, (1, 3), name="branch1_conv2")
+            ]
+            branch_1 += [
+                Conv2D_BN(256 // n, (3, 1), name="branch1_conv3")
+            ]
+            branches = [branch_0, branch_1]
+        else:
+            raise ValueError(
+                "Unknown Inception-ResNet block type. "
+                'Expects "block35", "block17" or "block8", '
+                "but got: " + str(block_type)
+            )
+
+        self.branches = branches
+
+        channel_axis = 1 if K.image_data_format() == "channels_first" else 3
+        self.concat = Concatenate(axis=channel_axis, name="concatenate")
+        self.up_conv = Conv2D_BN(
+            n_channels, 1, activation=None, use_bias=True, name="up_conv"
+        )
+
+        # output_shape = (None, None, n_channels)
+        # if K.image_data_format() == "channels_first":
+        #     output_shape = (n_channels, None, None)
+        # self.residual = Lambda(
+        #     lambda inputs, scale: inputs[0] + inputs[1] * scale,
+        #     output_shape=output_shape,
+        #     arguments={"scale": scale},
+        #     name="residual_scale",
+        # )
+        self.residual = ScaledResidual(scale)
+        self.act = lambda x: x
+        if activation is not None:
+            self.act = Activation(activation, name="act")
+
+    def call(self, inputs, training=None):
+        branch_outputs = []
+        for branch in self.branches:
+            x = inputs
+            for layer in branch:
+                x = layer(x, training=training)
+            branch_outputs.append(x)
+
+        mixed = self.concat(branch_outputs)
+        up = self.up_conv(mixed, training=training)
+
+        x = self.residual([inputs, up])
+        x = self.act(x)
+
+        return x
+
+
+class ReductionA(tf.keras.Model):
+    """A Reduction A block for InceptionResnetV2"""
+
+    def __init__(
+        self,
+        padding,
+        k=256,
+        kl=256,
+        km=384,
+        n=384,
+        use_atrous=False,
+        name="reduction_a",
+        **kwargs,
+    ):
+        super().__init__(name=name, **kwargs)
+        self.padding = padding
+        self.k = k
+        self.kl = kl
+        self.km = km
+        self.n = n
+        self.use_atrous = use_atrous
+
+        branch_1 = [
+            Conv2D_BN(
+                n,
+                3,
+                strides=1 if use_atrous else 2,
+                padding=padding,
+                name="branch1_conv1",
+            )
+        ]
+
+        branch_2 = [
+            Conv2D_BN(k, 1, name="branch2_conv1"),
+            Conv2D_BN(kl, 3, name="branch2_conv2"),
+            Conv2D_BN(
+                km,
+                3,
+                strides=1 if use_atrous else 2,
+                padding=padding,
+                name="branch2_conv3",
+            ),
+        ]
+
+        branch_pool = [
+            MaxPool2D(
+                3,
+                strides=1 if use_atrous else 2,
+                padding=padding,
+                name="branch3_pool1",
+            )
+        ]
+        self.branches = [branch_1, branch_2, branch_pool]
+        channel_axis = 1 if K.image_data_format() == "channels_first" else 3
+        self.concat = Concatenate(axis=channel_axis, name=f"{name}/mixed")
+
+    def call(self, inputs, training=None):
+        branch_outputs = []
+        for branch in self.branches:
+            x = inputs
+            for layer in branch:
+                try:
+                    x = layer(x, training=training)
+                except TypeError:
+                    x = layer(x)
+            branch_outputs.append(x)
+
+        return self.concat(branch_outputs)
+
+
+class ReductionB(tf.keras.Model):
+    """A Reduction B block for InceptionResnetV2"""
+
+    def __init__(
+        self,
+        padding,
+        k=256,
+        kl=288,
+        km=320,
+        n=256,
+        no=384,
+        p=256,
+        pq=288,
+        name="reduction_b",
+        **kwargs,
+    ):
+        super().__init__(name=name, **kwargs)
+        self.padding = padding
+        self.k = k
+        self.kl = kl
+        self.km = km
+        self.n = n
+        self.no = no
+        self.p = p
+        self.pq = pq
+
+        branch_1 = [
+            Conv2D_BN(n, 1, name="branch1_conv1"),
+            Conv2D_BN(
+                no, 3, strides=2, padding=padding, name="branch1_conv2"
+            ),
+        ]
+
+        branch_2 = [
+            Conv2D_BN(p, 1, name="branch2_conv1"),
+            Conv2D_BN(
+                pq, 3, strides=2, padding=padding, name="branch2_conv2"
+            ),
+        ]
+
+        branch_3 = [
+            Conv2D_BN(k, 1, name="branch3_conv1"),
+            Conv2D_BN(kl, 3, name="branch3_conv2"),
+            Conv2D_BN(
+                km, 3, strides=2, padding=padding, name="branch3_conv3"
+            ),
+        ]
+
+        branch_pool = [
+            MaxPool2D(
+                3, strides=2, padding=padding, name=f"branch4_pool1"
+            )
+        ]
+        self.branches = [branch_1, branch_2, branch_3, branch_pool]
+        channel_axis = 1 if K.image_data_format() == "channels_first" else 3
+        self.concat = Concatenate(axis=channel_axis, name=f"{name}/mixed")
+
+    def call(self, inputs, training=None):
+        branch_outputs = []
+        for branch in self.branches:
+            x = inputs
+            for layer in branch:
+                try:
+                    x = layer(x, training=training)
+                except TypeError:
+                    x = layer(x)
+            branch_outputs.append(x)
+
+        return self.concat(branch_outputs)
+
+
+class InceptionA(tf.keras.Model):
+    def __init__(self, pool_filters, name="inception_a", **kwargs):
+        super().__init__(name=name, **kwargs)
+        self.pool_filters = pool_filters
+
+        self.branch1x1 = Conv2D_BN(96, kernel_size=1, padding="same", name="branch1_conv1")
+
+        self.branch3x3dbl_1 = Conv2D_BN(64, kernel_size=1, padding="same", name="branch2_conv1")
+        self.branch3x3dbl_2 = Conv2D_BN(96, kernel_size=3, padding="same", name="branch2_conv2")
+        self.branch3x3dbl_3 = Conv2D_BN(96, kernel_size=3, padding="same", name="branch2_conv3")
+
+        self.branch5x5_1 = Conv2D_BN(48, kernel_size=1, padding="same", name="branch3_conv1")
+        self.branch5x5_2 = Conv2D_BN(64, kernel_size=5, padding="same", name="branch3_conv2")
+
+        self.branch_pool_1 = AvgPool2D(pool_size=3, strides=1, padding="same", name="branch4_pool1")
+        self.branch_pool_2 = Conv2D_BN(pool_filters, kernel_size=1, padding="same", name="branch4_conv1")
+
+        channel_axis = 1 if K.image_data_format() == "channels_first" else 3
+        self.concat = Concatenate(axis=channel_axis)
+
+    def call(self, inputs, training=None):
+        branch1x1 = self.branch1x1(inputs)
+
+        branch3x3dbl = self.branch3x3dbl_1(inputs)
+        branch3x3dbl = self.branch3x3dbl_2(branch3x3dbl)
+        branch3x3dbl = self.branch3x3dbl_3(branch3x3dbl)
+
+        branch5x5 = self.branch5x5_1(inputs)
+        branch5x5 = self.branch5x5_2(branch5x5)
+
+        branch_pool = self.branch_pool_1(inputs)
+        branch_pool = self.branch_pool_2(branch_pool)
+
+        outputs = [branch1x1, branch5x5, branch3x3dbl, branch_pool]
+        return self.concat(outputs)
+
+
+def InceptionResNetV2(
+    include_top=True,
+    input_tensor=None,
+    input_shape=None,
+    pooling=None,
+    classes=1000,
+    **kwargs,
+):
+    """Instantiates the Inception-ResNet v2 architecture.
+    Optionally loads weights pre-trained on ImageNet.
+    Note that the data format convention used by the model is
+    the one specified in your Keras config at `~/.keras/keras.json`.
+    # Arguments
+        include_top: whether to include the fully-connected
+            layer at the top of the network.
+        weights: one of `None` (random initialization),
+              'imagenet' (pre-training on ImageNet),
+              or the path to the weights file to be loaded.
+        input_tensor: optional Keras tensor (i.e. output of `tf.keras.Input()`)
+            to use as image input for the model.
+        input_shape: optional shape tuple, only to be specified
+            if `include_top` is `False` (otherwise the input shape
+            has to be `(299, 299, 3)` (with `'channels_last'` data format)
+            or `(3, 299, 299)` (with `'channels_first'` data format).
+            It should have exactly 3 inputs channels,
+            and width and height should be no smaller than 75.
+            E.g. `(150, 150, 3)` would be one valid value.
+        pooling: Optional pooling mode for feature extraction
+            when `include_top` is `False`.
+            - `None` means that the output of the model will be
+                the 4D tensor output of the last convolutional block.
+            - `'avg'` means that global average pooling
+                will be applied to the output of the
+                last convolutional block, and thus
+                the output of the model will be a 2D tensor.
+            - `'max'` means that global max pooling will be applied.
+        classes: optional number of classes to classify images
+            into, only to be specified if `include_top` is `True`, and
+            if no `weights` argument is specified.
+    # Returns
+        A Keras `Model` instance.
+    # Raises
+        ValueError: in case of invalid argument for `weights`,
+            or invalid input shape.
+    """
+
+    if input_tensor is None:
+        img_input = tf.keras.Input(shape=input_shape)
+    else:
+        if not K.is_keras_tensor(input_tensor):
+            img_input = tf.keras.Input(tensor=input_tensor, shape=input_shape)
+        else:
+            img_input = input_tensor
+
+    # Stem block: 35 x 35 x 192
+    x = Conv2D_BN(32, 3, strides=2, padding="valid")(img_input)
+    x = Conv2D_BN(32, 3, padding="valid")(x)
+    x = Conv2D_BN(64, 3)(x)
+    x = MaxPool2D(3, strides=2)(x)
+    x = Conv2D_BN(80, 1, padding="valid")(x)
+    x = Conv2D_BN(192, 3, padding="valid")(x)
+    x = MaxPool2D(3, strides=2)(x)
+
+    # Mixed 5b (Inception-A block): 35 x 35 x 320
+    # branch_0 = Conv2D_BN(96, 1)(x)
+    # branch_1 = Conv2D_BN(48, 1)(x)
+    # branch_1 = Conv2D_BN(64, 5)(branch_1)
+    # branch_2 = Conv2D_BN(64, 1)(x)
+    # branch_2 = Conv2D_BN(96, 3)(branch_2)
+    # branch_2 = Conv2D_BN(96, 3)(branch_2)
+    # branch_pool = AvgPool2D(3, strides=1, padding="same")(x)
+    # branch_pool = Conv2D_BN(64, 1)(branch_pool)
+    # branches = [branch_0, branch_1, branch_2, branch_pool]
+    # channel_axis = 1 if K.image_data_format() == "channels_first" else 3
+    # x = Concatenate(axis=channel_axis, name="mixed_5b")(branches)
+    x = InceptionA(pool_filters=64)(x)
+
+    # 10x block35 (Inception-ResNet-A block): 35 x 35 x 320
+    for block_idx in range(1, 11):
+        x = InceptionResnetBlock(
+            n_channels=320, scale=0.17, block_type="block35", block_idx=block_idx,
+            name=f"block35_{block_idx}",
+        )(x)
+
+    # Mixed 6a (Reduction-A block): 17 x 17 x 1088
+    x = ReductionA(padding="valid", n=384, k=256, kl=256, km=384, use_atrous=False)(x)
+
+    # 20x block17 (Inception-ResNet-B block): 17 x 17 x 1088
+    for block_idx in range(1, 21):
+        x = InceptionResnetBlock(
+            n_channels=1088, scale=0.1, block_type="block17", block_idx=block_idx,
+            name=f"block17_{block_idx}",
+        )(x)
+
+    # Mixed 7a (Reduction-B block): 8 x 8 x 2080
+    x = ReductionB(
+        padding="valid", n=256, no=384, p=256, pq=288, k=256, kl=288, km=320
+    )(x)
+
+    # 10x block8 (Inception-ResNet-C block): 8 x 8 x 2080
+    for block_idx in range(1, 10):
+        x = InceptionResnetBlock(
+            n_channels=2080, scale=0.2, block_type="block8", block_idx=block_idx,
+            name=f"block8_{block_idx}",
+        )(x)
+    x = InceptionResnetBlock(
+        n_channels=2080, scale=1.0, activation=None, block_type="block8", block_idx=10,
+        name=f"block8_{block_idx+1}",
+    )(x)
+
+    # Final convolution block: 8 x 8 x 1536
+    x = Conv2D_BN(1536, 1, name="conv_7b")(x)
+
+    if include_top:
+        # Classification block
+        x = GlobalAvgPool2D(name="avg_pool")(x)
+        x = Dense(classes, activation="softmax", name="predictions")(x)
+    else:
+        if pooling == "avg":
+            x = GlobalAvgPool2D()(x)
+        elif pooling == "max":
+            x = GlobalMaxPool2D()(x)
+
+    # Ensure that the model takes into account
+    # any potential predecessors of `input_tensor`.
+    if input_tensor is not None:
+        inputs = tf.keras.utils.get_source_inputs(input_tensor)
+    else:
+        inputs = img_input
+
+    # Create model.
+    model = Model(inputs, x, name="inception_resnet_v2")
+
+    return model
+
+
+def MultiScaleInceptionResNetV2(
+    scale=0.17,
+    repeat=3,
+    classes=1,
+    dropout_rate=0.2,
+    input_tensor=None,
+    input_shape=None,
+    align_feature_maps=False,
+    name="InceptionResnetV2",
+    **kwargs,
+):
+    """A multi-scale architecture inspired from InceptionResNetV2"""
+    if input_tensor is None:
+        img_input = Input(shape=input_shape)
+    else:
+        if not K.is_keras_tensor(input_tensor):
+            img_input = Input(tensor=input_tensor, shape=input_shape)
+        else:
+            img_input = input_tensor
+
+    padding = "SAME" if align_feature_maps else "VALID"
+    name = name or "InceptionResnetV2"
+
+    with tf.name_scope(name, "InceptionResnetV2", [img_input]):
+        # convert colors from RGB to a learned color space and batch norm inputs
+        # 224, 224, 4
+        net = Conv2D_BN(
+            4, 1, strides=1, padding="same", activation=None, name="Conv2d_1i_1x1"
+        )(img_input)
+
+        # reduction_a: 111, 111, 32
+        net = ReductionA(
+            padding=padding, k=8, kl=12, km=14, n=14, name="Reduction_a_1"
+        )(net)
+
+        # 111, 111, 32
+        for block_idx in range(1, 1 + repeat):
+            net = InceptionResnetBlock(
+                n_channels=32,
+                scale=scale,
+                block_type="block35",
+                block_idx=block_idx,
+                activation="relu",
+                n=2,
+                name=f"Repeat/block35_{block_idx}",
+            )(net)
+        scale_1 = net
+
+        # 55, 55, 96
+        net = ReductionA(
+            padding=padding, k=32, kl=32, km=32, n=32, name="Reduction_a_2"
+        )(net)
+
+        # 55, 55, 96
+        for block_idx in range(1, 1 + repeat):
+            net = InceptionResnetBlock(
+                n_channels=96,
+                scale=scale,
+                block_type="block17",
+                block_idx=block_idx,
+                n=2,
+                activation="relu",
+                name=f"Repeat_1/block17_{block_idx}",
+            )(net)
+        scale_2 = net
+
+        # 27, 27, 344
+        net = ReductionB(
+            padding, k=64, kl=72, km=80, n=64, no=96, p=64, pq=72, name="Reduction_b"
+        )(net)
+
+        # 27, 27, 344
+        for block_idx in range(1, 1 + repeat):
+            net = InceptionResnetBlock(
+                n_channels=344,
+                scale=scale,
+                block_type="block8",
+                block_idx=block_idx,
+                n=1,
+                activation="relu",
+                name=f"Repeat_2/block8_{block_idx}",
+            )(net)
+        scale_3 = net
+
+        # 27, 27, 32
+        scale_1 = AvgPool2D(3, strides=2, padding=padding, name="Merge/AvgPool_1a")(
+            scale_1
+        )
+        scale_1 = AvgPool2D(3, strides=2, padding=padding, name="Merge/AvgPool_1b")(
+            scale_1
+        )
+        # 27, 27, 96
+        scale_2 = AvgPool2D(3, strides=2, padding=padding, name="Merge/AvgPool_2")(
+            scale_2
+        )
+        # 27, 27, 344
+        scale_3 = scale_3
+
+        # 27, 27, 472 * 3
+        channel_axis = 1 if K.image_data_format() == "channels_first" else 3
+        net = Concatenate(axis=channel_axis, name="Merge/concat")(
+            [scale_1, scale_2, scale_3]
+        )
+
+        # 27, 27, 256
+        net = Conv2D_BN(256, 1, name="Merge/Conv2d_1")(net)
+
+        # 13, 13, 256
+        net = AvgPool2D(3, strides=2, padding=padding, name="Merge/AvgPool_3")(net)
+
+        # 13, 13, 128
+        net = Conv2D_BN(128, 1, name="Merge/Conv2d_2")(net)
+
+        net = Dropout(dropout_rate, name="Merge/Dropout")(net)
+
+        # 13, 13, classes
+        net = Conv2D(classes, 1, padding="same", name="Pixel_Logits")(net)
+
+    # Ensure that the model takes into account
+    # any potential predecessors of `input_tensor`.
+    if input_tensor is not None:
+        inputs = tf.keras.utils.get_source_inputs(input_tensor)
+    else:
+        inputs = img_input
+
+    model = Model(inputs, net, name=name, **kwargs)
+
+    return model
+
+
+if __name__ == "__main__":
+    import pkg_resources
+    from tabulate import tabulate
+    from bob.learn.tensorflow.utils import model_summary
+
+    def print_model(inputs, outputs, name=None):
+        print("")
+        print("===============")
+        print(name)
+        print("===============")
+        model = tf.keras.Model(inputs, outputs)
+        rows = model_summary(model, do_print=True)
+        del rows[-2]
+        print(tabulate(rows, headers="firstrow", tablefmt="latex"))
+
+    # model = InceptionResNetV2(input_shape=(299, 299, 3))
+    # inputs = tf.keras.Input((299, 299, 3))
+    # outputs = model.call(inputs)
+    # print_model(inputs, outputs)
+
+    # inputs = tf.keras.Input((299, 299, 3))
+    # outputs = model.get_layer("conv2d_bn").call(inputs)
+    # print_model(inputs, outputs, name="conv2d_bn")
+
+    # inputs = tf.keras.Input((35, 35, 192))
+    # outputs = model.get_layer("inception_a").call(inputs)
+    # print_model(inputs, outputs, name="inception_a")
+
+    # inputs = tf.keras.Input((35, 35, 320))
+    # outputs = model.get_layer("block35_1").call(inputs)
+    # print_model(inputs, outputs, name="block35_1")
+
+    # inputs = tf.keras.Input((17, 17, 1088))
+    # outputs = model.get_layer("block17_1").call(inputs)
+    # print_model(inputs, outputs, name="block17_1")
+
+    # inputs = tf.keras.Input((8, 8, 2080))
+    # outputs = model.get_layer("block8_1").call(inputs)
+    # print_model(inputs, outputs, name="block8_1")
+
+    # inputs = tf.keras.Input((35, 35, 320))
+    # outputs = model.get_layer("reduction_a").call(inputs)
+    # print_model(inputs, outputs, name="reduction_a")
+
+    # inputs = tf.keras.Input((17, 17, 1088))
+    # outputs = model.get_layer("reduction_b").call(inputs)
+    # print_model(inputs, outputs, name="reduction_b")
+
+    model = MultiScaleInceptionResNetV2(input_shape=(224, 224, 3))
+    inputs = tf.keras.Input((224, 224, 3))
+    outputs = model.call(inputs)
+    print_model(inputs, outputs)
+
+    # inputs = tf.keras.Input((224, 224, 3))
+    # outputs = model.get_layer("Conv2d_1i_1x1").call(inputs)
+    # print_model(inputs, outputs)
diff --git a/bob/learn/tensorflow/models/lenet5.py b/bob/learn/tensorflow/models/lenet5.py
new file mode 100644
index 0000000000000000000000000000000000000000..a868515b897fe2c010cf407b1af44d5fbe352649
--- /dev/null
+++ b/bob/learn/tensorflow/models/lenet5.py
@@ -0,0 +1,43 @@
+import tensorflow as tf
+
+
+def LeNet5_simplified(name="LeNet5", **kwargs):
+    """A heavily simplified implementation of LeNet-5 presented in:
+    Y. LeCun, L. Bottou, Y. Bengio, and P. Haffner, “Gradient-based learning applied to
+    document recognition,” Proceedings of the IEEE, vol. 86, no. 11, pp. 2278–2324, 1998.
+    """
+    model = tf.keras.Sequential(
+        [
+            tf.keras.Input(shape=(32, 32, 1)),
+            tf.keras.layers.Conv2D(
+                filters=6, kernel_size=5, name="C1", activation="tanh"
+            ),
+            tf.keras.layers.AvgPool2D(pool_size=2, name="S2"),
+            tf.keras.layers.Conv2D(
+                filters=16, kernel_size=5, name="C3", activation="tanh"
+            ),
+            tf.keras.layers.AvgPool2D(pool_size=2, name="S4"),
+            tf.keras.layers.Conv2D(
+                filters=120, kernel_size=5, name="C5", activation="tanh"
+            ),
+            tf.keras.layers.Flatten(name="FLATTEN"),
+            tf.keras.layers.Dense(units=84, activation="tanh", name="F6"),
+            tf.keras.layers.Dense(units=10, activation="sigmoid", name="OUTPUT"),
+        ],
+        name=name,
+        **kwargs
+    )
+    return model
+
+
+if __name__ == "__main__":
+    import pkg_resources
+    from bob.learn.tensorflow.utils import model_summary
+
+    model = LeNet5_simplified()
+    model.summary()
+    rows = model_summary(model, do_print=True)
+    del rows[-2]
+    from tabulate import tabulate
+
+    print(tabulate(rows, headers="firstrow", tablefmt="latex"))
diff --git a/bob/learn/tensorflow/models/msu_patch.py b/bob/learn/tensorflow/models/msu_patch.py
new file mode 100644
index 0000000000000000000000000000000000000000..9ae0e58ec3337bac178a14c642a0e4bd3b346db5
--- /dev/null
+++ b/bob/learn/tensorflow/models/msu_patch.py
@@ -0,0 +1,54 @@
+import tensorflow as tf
+
+
+def MSUPatch(name="MSUPatch", **kwargs):
+
+    return tf.keras.Sequential(
+        [
+            tf.keras.layers.Conv2D(
+                50, (5, 5), padding="same", use_bias=False, name="Conv-1", input_shape=(96, 96, 3)
+            ),
+            tf.keras.layers.BatchNormalization(scale=False, name="BN-1"),
+            tf.keras.layers.Activation("relu", name="ReLU-1"),
+            tf.keras.layers.MaxPool2D(padding="same", name="MaxPool-1"),
+            tf.keras.layers.Conv2D(100, (3, 3), padding="same", use_bias=False, name="Conv-2"),
+            tf.keras.layers.BatchNormalization(scale=False, name="BN-2"),
+            tf.keras.layers.Activation("relu", name="ReLU-2"),
+            tf.keras.layers.MaxPool2D(padding="same", name="MaxPool-2"),
+            tf.keras.layers.Conv2D(150, (3, 3), padding="same", use_bias=False, name="Conv-3"),
+            tf.keras.layers.BatchNormalization(scale=False, name="BN-3"),
+            tf.keras.layers.Activation("relu", name="ReLU-3"),
+            tf.keras.layers.MaxPool2D(pool_size=3, strides=2, padding="same", name="MaxPool-3"),
+            tf.keras.layers.Conv2D(200, (3, 3), padding="same", use_bias=False, name="Conv-4"),
+            tf.keras.layers.BatchNormalization(scale=False, name="BN-4"),
+            tf.keras.layers.Activation("relu", name="ReLU-4"),
+            tf.keras.layers.MaxPool2D(padding="same", name="MaxPool-4"),
+            tf.keras.layers.Conv2D(250, (3, 3), padding="same", use_bias=False, name="Conv-5"),
+            tf.keras.layers.BatchNormalization(scale=False, name="BN-5"),
+            tf.keras.layers.Activation("relu", name="ReLU-5"),
+            tf.keras.layers.MaxPool2D(padding="same", name="MaxPool-5"),
+            tf.keras.layers.Flatten(name="Flatten"),
+            tf.keras.layers.Dense(1000, use_bias=False, name="FC-1"),
+            tf.keras.layers.BatchNormalization(scale=False, name="BN-6"),
+            tf.keras.layers.Activation("relu", name="ReLU-6"),
+            tf.keras.layers.Dropout(rate=0.5, name="Dropout"),
+            tf.keras.layers.Dense(400, use_bias=False, name="FC-2"),
+            tf.keras.layers.BatchNormalization(scale=False, name="BN-7"),
+            tf.keras.layers.Activation("relu", name="ReLU-7"),
+            tf.keras.layers.Dense(2, name="FC-3"),
+        ],
+        name=name,
+        **kwargs
+    )
+
+
+if __name__ == "__main__":
+    import pkg_resources
+    from tabulate import tabulate
+    from bob.learn.tensorflow.utils import model_summary
+
+    model = MSUPatch()
+    model.summary()
+    rows = model_summary(model, do_print=True)
+    del rows[-2]
+    print(tabulate(rows, headers="firstrow", tablefmt="latex"))
diff --git a/bob/learn/tensorflow/models/simple_cnn.py b/bob/learn/tensorflow/models/simple_cnn.py
new file mode 100644
index 0000000000000000000000000000000000000000..ae2cb68d4a55ce91460cb8c7ae042eaed37dbfdd
--- /dev/null
+++ b/bob/learn/tensorflow/models/simple_cnn.py
@@ -0,0 +1,110 @@
+"""
+The network using keras (same as new_architecture function below)::
+
+    from tensorflow.python.keras import *
+    from tensorflow.python.keras.layers import *
+    simplecnn = Sequential([
+        Conv2D(32,(3,3),padding='same',use_bias=False, input_shape=(28,28,3)),
+        BatchNormalization(scale=False),
+        Activation('relu'),
+        MaxPool2D(padding='same'),
+        Conv2D(64,(3,3),padding='same',use_bias=False),
+        BatchNormalization(scale=False),
+        Activation('relu'),
+        MaxPool2D(padding='same'),
+        Flatten(),
+        Dense(1024, use_bias=False),
+        BatchNormalization(scale=False),
+        Activation('relu'),
+        Dropout(rate=0.4),
+        Dense(2, activation="softmax"),
+    ])
+    simplecnn.summary()
+    _________________________________________________________________
+    Layer (type)                 Output Shape              Param #
+    =================================================================
+    conv2d_1 (Conv2D)            (None, 28, 28, 32)        864
+    _________________________________________________________________
+    batch_normalization_1 (Batch (None, 28, 28, 32)        96
+    _________________________________________________________________
+    activation_1 (Activation)    (None, 28, 28, 32)        0
+    _________________________________________________________________
+    max_pooling2d_1 (MaxPooling2 (None, 14, 14, 32)        0
+    _________________________________________________________________
+    conv2d_2 (Conv2D)            (None, 14, 14, 64)        18432
+    _________________________________________________________________
+    batch_normalization_2 (Batch (None, 14, 14, 64)        192
+    _________________________________________________________________
+    activation_2 (Activation)    (None, 14, 14, 64)        0
+    _________________________________________________________________
+    max_pooling2d_2 (MaxPooling2 (None, 7, 7, 64)          0
+    _________________________________________________________________
+    flatten_1 (Flatten)          (None, 3136)              0
+    _________________________________________________________________
+    dense_1 (Dense)              (None, 1024)              3211264
+    _________________________________________________________________
+    batch_normalization_3 (Batch (None, 1024)              3072
+    _________________________________________________________________
+    activation_3 (Activation)    (None, 1024)              0
+    _________________________________________________________________
+    dropout_1 (Dropout)          (None, 1024)              0
+    _________________________________________________________________
+    dense_2 (Dense)              (None, 2)                 2050
+    =================================================================
+    Total params: 3,235,970
+    Trainable params: 3,233,730
+    Non-trainable params: 2,240
+    _________________________________________________________________
+"""
+
+from tensorflow.python.keras import Sequential, Input
+from tensorflow.python.keras.layers import (
+    Conv2D,
+    BatchNormalization,
+    Activation,
+    MaxPool2D,
+    Flatten,
+    Dense,
+    Dropout,
+)
+
+
+def SimpleCNN(input_shape=(28, 28, 3), inputs=None, name="SimpleCNN", **kwargs):
+
+    if inputs is None:
+        inputs = Input(input_shape)
+    model = Sequential(
+        [
+            inputs,
+            Conv2D(32, (3, 3), padding="same", use_bias=False),
+            BatchNormalization(scale=False),
+            Activation("relu"),
+            MaxPool2D(padding="same"),
+            Conv2D(64, (3, 3), padding="same", use_bias=False),
+            BatchNormalization(scale=False),
+            Activation("relu"),
+            MaxPool2D(padding="same"),
+            Flatten(),
+            Dense(1024, use_bias=False),
+            BatchNormalization(scale=False),
+            Activation("relu"),
+            Dropout(rate=0.4),
+            Dense(2),
+        ],
+        name=name,
+        **kwargs
+    )
+
+    return model
+
+
+if __name__ == "__main__":
+    import pkg_resources
+    from tabulate import tabulate
+    from bob.learn.tensorflow.utils import model_summary
+
+    model = SimpleCNN()
+    model.summary()
+    rows = model_summary(model, do_print=True)
+    del rows[-2]
+    print(tabulate(rows, headers="firstrow", tablefmt="latex"))