diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..08ae74117cd54402e260369ed4cbf761e09cc283
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,40 @@
+# See https://pre-commit.com for more information
+# See https://pre-commit.com/hooks.html for more hooks
+repos:
+  - repo: https://github.com/timothycrosley/isort
+    rev: 4.3.21-2
+    hooks:
+    - id: isort
+      args: [-sl]
+  - repo: https://github.com/psf/black
+    rev: stable
+    hooks:
+      - id: black
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v2.0.0
+    hooks:
+      - id: check-ast
+      - id: check-case-conflict
+      - id: trailing-whitespace
+      - id: end-of-file-fixer
+      - id: debug-statements
+      - id: check-added-large-files
+      - id: flake8
+  - repo: local
+    hooks:
+      - id: sphinx-build
+        name: sphinx build
+        entry: python -m sphinx.cmd.build
+        args: [-a, -E, -W, doc, sphinx]
+        language: system
+        files: ^doc/
+        types: [file]
+        pass_filenames: false
+      - id: sphinx-doctest
+        name: sphinx doctest
+        entry: python -m sphinx.cmd.build
+        args: [-a, -E, -b, doctest, doc, sphinx]
+        language: system
+        files: ^doc/
+        types: [file]
+        pass_filenames: false
diff --git a/bob/__init__.py b/bob/__init__.py
index 2ca5e07cb73f0bdddcb863ef497955964087e301..edbb4090fca046b19d22d3982711084621bff3be 100644
--- a/bob/__init__.py
+++ b/bob/__init__.py
@@ -1,3 +1,4 @@
 # see https://docs.python.org/3/library/pkgutil.html
 from pkgutil import extend_path
-__path__ = extend_path(__path__, __name__)
\ No newline at end of file
+
+__path__ = extend_path(__path__, __name__)
diff --git a/bob/learn/__init__.py b/bob/learn/__init__.py
index 2ab1e28b150f0549def9963e9e87de3fdd6b2579..edbb4090fca046b19d22d3982711084621bff3be 100644
--- a/bob/learn/__init__.py
+++ b/bob/learn/__init__.py
@@ -1,3 +1,4 @@
 # see https://docs.python.org/3/library/pkgutil.html
 from pkgutil import extend_path
+
 __path__ = extend_path(__path__, __name__)
diff --git a/bob/learn/tensorflow/__init__.py b/bob/learn/tensorflow/__init__.py
index b17348644c602f4d16633e7c392b19a37c59086a..1b0e2db6061b8fcd5e328762aadf66d77a398193 100644
--- a/bob/learn/tensorflow/__init__.py
+++ b/bob/learn/tensorflow/__init__.py
@@ -3,4 +3,5 @@ def get_config():
     Returns a string containing the configuration information.
     """
     import bob.extension
+
     return bob.extension.get_config(__name__)
diff --git a/bob/learn/tensorflow/callbacks.py b/bob/learn/tensorflow/callbacks.py
new file mode 100644
index 0000000000000000000000000000000000000000..efd97e09bc8ddb67495f6f7f69d82b80feaf2afc
--- /dev/null
+++ b/bob/learn/tensorflow/callbacks.py
@@ -0,0 +1,100 @@
+import json
+import os
+
+import tensorflow as tf
+from tensorflow.keras import callbacks
+
+
+class CustomBackupAndRestore(tf.keras.callbacks.experimental.BackupAndRestore):
+    """This callback is experimental and might be removed in future.
+    See :any:`add_backup_callback`
+    """
+
+    def __init__(self, callbacks, backup_dir, **kwargs):
+        super().__init__(backup_dir=backup_dir, **kwargs)
+        self.callbacks = callbacks
+        self.callbacks_backup_path = os.path.join(self.backup_dir, "callbacks.json")
+
+    def backup(self):
+        variables = {}
+        for cb_name, cb in self.callbacks.items():
+            variables[cb_name] = {}
+            for k, v in cb.__dict__.items():
+                if not isinstance(v, (int, float)):
+                    continue
+                variables[cb_name][k] = v
+        with open(self.callbacks_backup_path, "w") as f:
+            json.dump(variables, f, indent=4, sort_keys=True)
+
+    def restore(self):
+        if not os.path.isfile(self.callbacks_backup_path):
+            return False
+
+        with open(self.callbacks_backup_path, "r") as f:
+            variables = json.load(f)
+
+        for cb_name, cb in self.callbacks.items():
+            if cb_name not in variables:
+                continue
+            for k, v in cb.__dict__.items():
+                if k in variables[cb_name]:
+                    cb.__dict__[k] = variables[cb_name][k]
+
+        return True
+
+    def on_train_begin(self, logs=None):
+        super().on_train_begin(logs=logs)
+        if self.restore():
+            print(f"Restored callbacks from {self.callbacks_backup_path}")
+        else:
+            print("Did not restore callbacks")
+
+    def on_epoch_end(self, epoch, logs=None):
+        super().on_epoch_end(epoch, logs=logs)
+        self.backup()
+
+    def on_train_end(self, logs=None):
+        # do not delete backups
+        pass
+
+
+def add_backup_callback(callbacks, backup_dir, **kwargs):
+    """Adds a backup callback to your callbacks to restore the training process
+    if it is interrupted.
+
+    .. warning::
+
+        This function is experimental and may be removed or changed in future.
+
+    Examples
+    --------
+
+    >>> CHECKPOINT = "checkpoints"
+    >>> callbacks = {
+    ...     "best": tf.keras.callbacks.ModelCheckpoint(
+    ...         f"{CHECKPOINT}/best",
+    ...         monitor="val_acc",
+    ...         save_best_only=True,
+    ...         mode="max",
+    ...         verbose=1,
+    ...     ),
+    ...     "tensorboard": tf.keras.callbacks.TensorBoard(
+    ...         log_dir=f"{CHECKPOINT}/logs",
+    ...         update_freq=15,
+    ...         write_graph=False,
+    ...     ),
+    ... }
+    >>> callbacks = add_backup_callback(callbacks, f"{CHECKPOINT}/backup")
+    >>> # callbacks will be a list that can be given to model.fit
+    >>> isinstance(callbacks, list)
+    True
+    """
+    if not isinstance(callbacks, dict):
+        raise ValueError(
+            "Please provide a dictionary of callbacks where "
+            "keys are simple names for your callbacks!"
+        )
+    cb = CustomBackupAndRestore(callbacks=callbacks, backup_dir=backup_dir, **kwargs)
+    callbacks = list(callbacks.values())
+    callbacks.append(cb)
+    return callbacks
diff --git a/bob/learn/tensorflow/configs/style_transfer/inception_v1_example.py b/bob/learn/tensorflow/configs/style_transfer/inception_v1_example.py
deleted file mode 100644
index 00bfa1e0628534802d47bba19ad27b20b04ef236..0000000000000000000000000000000000000000
--- a/bob/learn/tensorflow/configs/style_transfer/inception_v1_example.py
+++ /dev/null
@@ -1,27 +0,0 @@
-"""
-Example using inception resnet v1
-
-"""
-
-import tensorflow as tf
-
-# -- architecture
-from bob.learn.tensorflow.network import inception_resnet_v1_batch_norm
-architecture = inception_resnet_v1_batch_norm
-
-# --checkpoint-dir
-from bob.extension import rc
-checkpoint_dir = rc['bob.bio.face_ongoing.inception-v1_batchnorm_rgb']
-
-# --style-end-points and -- content-end-points
-style_end_points = ["Conv2d_1a_3x3", "Conv2d_2b_3x3"]
-content_end_points = ["Block8"]
-
-scopes = {"InceptionResnetV1/":"InceptionResnetV1/"}
-
-# --style-image-paths
-style_image_paths = ["vincent_van_gogh.jpg",
-                     "vincent_van_gogh2.jpg"]
-
-# --preprocess-fn
-preprocess_fn = tf.image.per_image_standardization
diff --git a/bob/learn/tensorflow/configs/style_transfer/inception_v2_example.py b/bob/learn/tensorflow/configs/style_transfer/inception_v2_example.py
deleted file mode 100644
index 54eb3f2931f65e78fb6a1e201b65d119f172d0be..0000000000000000000000000000000000000000
--- a/bob/learn/tensorflow/configs/style_transfer/inception_v2_example.py
+++ /dev/null
@@ -1,26 +0,0 @@
-"""
-Example using inception resnet v2
-"""
-
-import tensorflow as tf
-
-# -- architecture
-from bob.learn.tensorflow.network import inception_resnet_v2_batch_norm
-architecture = inception_resnet_v2_batch_norm
-
-# --checkpoint-dir
-from bob.extension import rc
-checkpoint_dir = rc['bob.bio.face_ongoing.inception-v2_batchnorm_rgb']
-
-# --style-end-points and -- content-end-points
-style_end_points = ["Conv2d_1a_3x3", "Conv2d_2b_3x3"]
-content_end_points = ["Block8"]
-
-scopes = {"InceptionResnetV2/":"InceptionResnetV2/"}
-
-# --style-image-paths
-style_image_paths = ["vincent_van_gogh.jpg",
-                     "vincent_van_gogh2.jpg"]
-
-# --preprocess-fn
-preprocess_fn = tf.image.per_image_standardization
diff --git a/bob/learn/tensorflow/configs/style_transfer/vgg19_example.py b/bob/learn/tensorflow/configs/style_transfer/vgg19_example.py
deleted file mode 100644
index 4d31888882eb98570549b4015f83a6089074ae6a..0000000000000000000000000000000000000000
--- a/bob/learn/tensorflow/configs/style_transfer/vgg19_example.py
+++ /dev/null
@@ -1,43 +0,0 @@
-"""
-Example using VGG19
-"""
-
-from bob.learn.tensorflow.network import vgg_19
-# --architecture
-architecture = vgg_19
-
-
-import numpy
-
-# -- checkpoint-dir
-# YOU CAN DOWNLOAD THE CHECKPOINTS FROM HERE 
-# https://github.com/tensorflow/models/tree/master/research/slim#pre-trained-models
-checkpoint_dir = "/PATH-TO/vgg_19.ckpt"
-
-# --style-end-points and -- content-end-points
-content_end_points = ['vgg_19/conv4/conv4_2', 'vgg_19/conv5/conv5_2']
-style_end_points = ['vgg_19/conv1/conv1_2', 
-                    'vgg_19/conv2/conv2_1',
-                    'vgg_19/conv3/conv3_1',
-                    'vgg_19/conv4/conv4_1',
-                    'vgg_19/conv5/conv5_1'
-                    ]
-
-
-scopes = {"vgg_19/":"vgg_19/"}
-
-style_image_paths = ["/PATH/TO/vincent_van_gogh.jpg"]
-
-
-# --preprocess-fn and --un-preprocess-fn
-# Taken from VGG19
-def mean_norm(tensor):
-    return tensor - numpy.array([ 123.68 ,  116.779,  103.939])
-
-def un_mean_norm(tensor):
-    return tensor + numpy.array([ 123.68 ,  116.779,  103.939])
-
-preprocess_fn = mean_norm
-
-un_preprocess_fn = un_mean_norm
-
diff --git a/bob/learn/tensorflow/configs/style_transfer/vincent_van_gogh.jpg b/bob/learn/tensorflow/configs/style_transfer/vincent_van_gogh.jpg
deleted file mode 100644
index c17534eca0469abfa043146da04f82eab014ba99..0000000000000000000000000000000000000000
Binary files a/bob/learn/tensorflow/configs/style_transfer/vincent_van_gogh.jpg and /dev/null differ
diff --git a/bob/learn/tensorflow/configs/style_transfer/vincent_van_gogh2.jpg b/bob/learn/tensorflow/configs/style_transfer/vincent_van_gogh2.jpg
deleted file mode 100644
index 9a74d8588e73b044c3f8a51e8bb3bcf1e950174a..0000000000000000000000000000000000000000
Binary files a/bob/learn/tensorflow/configs/style_transfer/vincent_van_gogh2.jpg and /dev/null differ
diff --git a/bob/learn/tensorflow/extractors/__init__.py b/bob/learn/tensorflow/data/__init__.py
similarity index 58%
rename from bob/learn/tensorflow/extractors/__init__.py
rename to bob/learn/tensorflow/data/__init__.py
index 05492a8e524ac60164f5be396ab9597ddcc2ec88..3d31417512b35396459e77db40c3448d4372a34e 100644
--- a/bob/learn/tensorflow/extractors/__init__.py
+++ b/bob/learn/tensorflow/data/__init__.py
@@ -1,7 +1,5 @@
-from .Base import Base, normalize_checkpoint_path
-from .Generic import Generic
-from .Estimator import Estimator
-
+from .generator import Generator, dataset_using_generator
+from .tfrecords import dataset_to_tfrecord, dataset_from_tfrecord, TFRECORDS_EXT
 
 # gets sphinx autodoc done right - don't remove it
 def __appropriate__(*args):
@@ -9,7 +7,7 @@ def __appropriate__(*args):
 
     Parameters:
 
-        *args: An iterable of objects to modify
+      *args: An iterable of objects to modify
 
     Resolves `Sphinx referencing issues
     <https://github.com/sphinx-doc/sphinx/issues/3048>`
@@ -20,8 +18,6 @@ def __appropriate__(*args):
 
 
 __appropriate__(
-    Base,
-    Generic,
-    Estimator,
+    Generator,
 )
-__all__ = [_ for _ in dir() if not _.startswith('_')]
+__all__ = [_ for _ in dir() if not _.startswith("_")]
diff --git a/bob/learn/tensorflow/dataset/generator.py b/bob/learn/tensorflow/data/generator.py
similarity index 92%
rename from bob/learn/tensorflow/dataset/generator.py
rename to bob/learn/tensorflow/data/generator.py
index cf2798ae941db9669a1f1369eff854458b7fd33f..7e68c0246eb227862f6d79214c1000caaa6a8561 100644
--- a/bob/learn/tensorflow/dataset/generator.py
+++ b/bob/learn/tensorflow/data/generator.py
@@ -1,6 +1,7 @@
-import tensorflow as tf
-import random
 import logging
+import random
+
+import tensorflow as tf
 
 logger = logging.getLogger(__name__)
 
@@ -30,7 +31,14 @@ class Generator:
         The shapes of the returned samples.
     """
 
-    def __init__(self, samples, reader, multiple_samples=False, shuffle_on_epoch_end=False, **kwargs):
+    def __init__(
+        self,
+        samples,
+        reader,
+        multiple_samples=False,
+        shuffle_on_epoch_end=False,
+        **kwargs
+    ):
         super().__init__(**kwargs)
         self.reader = reader
         self.samples = list(samples)
@@ -55,8 +63,8 @@ class Generator:
                 break
         # Creating a "fake" dataset just to get the types and shapes
         dataset = tf.data.Dataset.from_tensors(dlk)
-        self._output_types = dataset.output_types
-        self._output_shapes = dataset.output_shapes
+        self._output_types = tf.compat.v1.data.get_output_types(dataset)
+        self._output_shapes = tf.compat.v1.data.get_output_shapes(dataset)
 
         logger.info(
             "Initializing a dataset with %d %s and %s types and %s shapes",
diff --git a/bob/learn/tensorflow/data/tfrecords.py b/bob/learn/tensorflow/data/tfrecords.py
new file mode 100644
index 0000000000000000000000000000000000000000..bacf49cf8c10bbecd8f613a52033c104c212290a
--- /dev/null
+++ b/bob/learn/tensorflow/data/tfrecords.py
@@ -0,0 +1,137 @@
+"""Utilities for TFRecords
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import json
+
+import tensorflow as tf
+
+
+TFRECORDS_EXT = ".tfrecords"
+
+
+def tfrecord_name_and_json_name(output):
+    output = normalize_tfrecords_path(output)
+    json_output = output[: -len(TFRECORDS_EXT)] + ".json"
+    return output, json_output
+
+
+def normalize_tfrecords_path(output):
+    if not output.endswith(TFRECORDS_EXT):
+        output += TFRECORDS_EXT
+    return output
+
+
+def bytes_feature(value):
+    if isinstance(value, type(tf.constant(0))):
+        value = value.numpy()  # BytesList won't unpack a string from an EagerTensor.
+    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
+
+
+def int64_feature(value):
+    return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
+
+
+def dataset_to_tfrecord(dataset, output):
+    """Writes a tf.data.Dataset into a TFRecord file.
+
+    Parameters
+    ----------
+    dataset : ``tf.data.Dataset``
+        The tf.data.Dataset that you want to write into a TFRecord file.
+    output : str
+        Path to the TFRecord file. Besides this file, a .json file is also created.
+        This json file is needed when you want to convert the TFRecord file back into
+        a dataset.
+
+    Returns
+    -------
+    ``tf.Operation``
+        A tf.Operation that, when run, writes contents of dataset to a file. When
+        running in eager mode, calling this function will write the file. Otherwise, you
+        have to call session.run() on the returned operation.
+    """
+    output, json_output = tfrecord_name_and_json_name(output)
+    # dump the structure so that we can read it back
+    meta = {
+        "output_types": repr(tf.compat.v1.data.get_output_types(dataset)),
+        "output_shapes": repr(tf.compat.v1.data.get_output_shapes(dataset)),
+    }
+    with open(json_output, "w") as f:
+        json.dump(meta, f)
+
+    # create a custom map function that serializes the dataset
+    def serialize_example_pyfunction(*args):
+        feature = {}
+        for i, f in enumerate(args):
+            key = f"feature{i}"
+            feature[key] = bytes_feature(f)
+        example_proto = tf.train.Example(features=tf.train.Features(feature=feature))
+        return example_proto.SerializeToString()
+
+    def tf_serialize_example(*args):
+        args = tf.nest.flatten(args)
+        args = [tf.io.serialize_tensor(f) for f in args]
+        tf_string = tf.py_function(serialize_example_pyfunction, args, tf.string)
+        return tf.reshape(tf_string, ())  # The result is a scalar
+
+    dataset = dataset.map(tf_serialize_example)
+    writer = tf.data.experimental.TFRecordWriter(output)
+    return writer.write(dataset)
+
+
+def dataset_from_tfrecord(tfrecord, num_parallel_reads=None):
+    """Reads TFRecords and returns a dataset.
+    The TFRecord file must have been created using the :any:`dataset_to_tfrecord`
+    function.
+
+    Parameters
+    ----------
+    tfrecord : str or list
+        Path to the TFRecord file. Pass a list if you are sure several tfrecords need
+        the same map function.
+    num_parallel_reads: int
+        A `tf.int64` scalar representing the number of files to read in parallel.
+        Defaults to reading files sequentially.
+
+    Returns
+    -------
+    ``tf.data.Dataset``
+        A dataset that contains the data from the TFRecord file.
+    """
+    # these imports are needed so that eval can work
+    from tensorflow import TensorShape
+
+    if isinstance(tfrecord, str):
+        tfrecord = [tfrecord]
+    tfrecord = [tfrecord_name_and_json_name(path) for path in tfrecord]
+    json_output = tfrecord[0][1]
+    tfrecord = [path[0] for path in tfrecord]
+    raw_dataset = tf.data.TFRecordDataset(
+        tfrecord, num_parallel_reads=num_parallel_reads
+    )
+
+    with open(json_output) as f:
+        meta = json.load(f)
+    for k, v in meta.items():
+        meta[k] = eval(v)
+    output_types = tf.nest.flatten(meta["output_types"])
+    output_shapes = tf.nest.flatten(meta["output_shapes"])
+    feature_description = {}
+    for i in range(len(output_types)):
+        key = f"feature{i}"
+        feature_description[key] = tf.io.FixedLenFeature([], tf.string)
+
+    def _parse_function(example_proto):
+        # Parse the input tf.Example proto using the dictionary above.
+        args = tf.io.parse_single_example(
+            serialized=example_proto, features=feature_description
+        )
+        args = tf.nest.flatten(args)
+        args = [tf.io.parse_tensor(v, t) for v, t in zip(args, output_types)]
+        args = [tf.reshape(v, s) for v, s in zip(args, output_shapes)]
+        return tf.nest.pack_sequence_as(meta["output_types"], args)
+
+    return raw_dataset.map(_parse_function)
diff --git a/bob/learn/tensorflow/dataset/__init__.py b/bob/learn/tensorflow/dataset/__init__.py
deleted file mode 100644
index 93eb960fa720b1cfb93b96f9b0bfefc8cf6fc6f2..0000000000000000000000000000000000000000
--- a/bob/learn/tensorflow/dataset/__init__.py
+++ /dev/null
@@ -1,419 +0,0 @@
-import tensorflow as tf
-import numpy
-import os
-import bob.io.base
-
-DEFAULT_FEATURE = {
-    "data": tf.FixedLenFeature([], tf.string),
-    "label": tf.FixedLenFeature([], tf.int64),
-    "key": tf.FixedLenFeature([], tf.string),
-}
-
-
-def from_hdf5file_to_tensor(filename):
-    import bob.io.image
-
-    data = bob.io.image.to_matplotlib(bob.io.base.load(filename))
-
-    # reshaping to ndim == 3
-    if data.ndim == 2:
-        data = numpy.reshape(data, (data.shape[0], data.shape[1], 1))
-    data = data.astype("float32")
-
-    return data
-
-
-def from_filename_to_tensor(filename, extension=None):
-    """
-    Read a file and it convert it to tensor.
-
-    If the file extension is something that tensorflow understands (.jpg, .bmp, .tif,...),
-    it uses the `tf.image.decode_image` otherwise it uses `bob.io.base.load`
-    """
-
-    if extension == "hdf5":
-        return tf.py_func(from_hdf5file_to_tensor, [filename], [tf.float32])
-    else:
-        return tf.cast(tf.image.decode_image(tf.read_file(filename)), tf.float32)
-
-
-def append_image_augmentation(
-    image,
-    gray_scale=False,
-    output_shape=None,
-    random_flip=False,
-    random_brightness=False,
-    random_contrast=False,
-    random_saturation=False,
-    random_rotate=False,
-    per_image_normalization=True,
-    random_gamma=False,
-    random_crop=False,
-):
-    """
-    Append to the current tensor some random image augmentation operation
-
-    **Parameters**
-       gray_scale:
-          Convert to gray scale?
-
-       output_shape:
-          If set, will randomly crop the image given the output shape
-
-       random_flip:
-          Randomly flip an image horizontally  (https://www.tensorflow.org/api_docs/python/tf/image/random_flip_left_right)
-
-       random_brightness:
-           Adjust the brightness of an RGB image by a random factor (https://www.tensorflow.org/api_docs/python/tf/image/random_brightness)
-
-       random_contrast:
-           Adjust the contrast of an RGB image by a random factor (https://www.tensorflow.org/api_docs/python/tf/image/random_contrast)
-
-       random_saturation:
-           Adjust the saturation of an RGB image by a random factor (https://www.tensorflow.org/api_docs/python/tf/image/random_saturation)
-
-       random_rotate:
-           Randomly rotate face images between -5 and 5 degrees
-
-       per_image_normalization:
-           Linearly scales image to have zero mean and unit norm.
-
-    """
-
-    # Changing the range from 0-255 to 0-1
-    image = tf.cast(image, tf.float32) / 255
-    # FORCING A SEED FOR THE RANDOM OPERATIONS
-    tf.set_random_seed(0)
-
-    if output_shape is not None:
-        assert len(output_shape) == 2
-        if random_crop:
-            image = tf.random_crop(image, size=list(output_shape) + [3])
-        else:
-            image = tf.image.resize_image_with_crop_or_pad(
-                image, output_shape[0], output_shape[1]
-            )
-
-    if random_flip:
-        image = tf.image.random_flip_left_right(image)
-
-    if random_brightness:
-        image = tf.image.random_brightness(image, max_delta=0.15)
-        image = tf.clip_by_value(image, 0, 1)
-
-    if random_contrast:
-        image = tf.image.random_contrast(image, lower=0.85, upper=1.15)
-        image = tf.clip_by_value(image, 0, 1)
-
-    if random_saturation:
-        image = tf.image.random_saturation(image, lower=0.85, upper=1.15)
-        image = tf.clip_by_value(image, 0, 1)
-
-    if random_gamma:
-        image = tf.image.adjust_gamma(
-            image, gamma=tf.random.uniform(shape=[], minval=0.85, maxval=1.15)
-        )
-        image = tf.clip_by_value(image, 0, 1)
-
-    if random_rotate:
-        # from https://stackoverflow.com/a/53855704/1286165
-        degree = 0.08726646259971647  # math.pi * 5 /180
-        random_angles = tf.random.uniform(shape=(1,), minval=-degree, maxval=degree)
-        image = tf.contrib.image.transform(
-            image,
-            tf.contrib.image.angles_to_projective_transforms(
-                random_angles,
-                tf.cast(tf.shape(image)[-3], tf.float32),
-                tf.cast(tf.shape(image)[-2], tf.float32),
-            ),
-        )
-
-    if gray_scale:
-        image = tf.image.rgb_to_grayscale(image, name="rgb_to_gray")
-
-    # normalizing data
-    if per_image_normalization:
-        image = tf.image.per_image_standardization(image)
-
-    return image
-
-
-def arrange_indexes_by_label(input_labels, possible_labels):
-
-    # Shuffling all the indexes
-    indexes_per_labels = dict()
-    for l in possible_labels:
-        indexes_per_labels[l] = numpy.where(input_labels == l)[0]
-        numpy.random.shuffle(indexes_per_labels[l])
-    return indexes_per_labels
-
-
-def triplets_random_generator(input_data, input_labels):
-    """
-    Giving a list of samples and a list of labels, it dumps a series of
-    triplets for triple nets.
-
-    **Parameters**
-
-      input_data: List of whatever representing the data samples
-
-      input_labels: List of the labels (needs to be in EXACT same order as input_data)
-    """
-    anchor = []
-    positive = []
-    negative = []
-
-    def append(anchor_sample, positive_sample, negative_sample):
-        """
-        Just appending one element in each list
-        """
-        anchor.append(anchor_sample)
-        positive.append(positive_sample)
-        negative.append(negative_sample)
-
-    possible_labels = list(set(input_labels))
-    input_data = numpy.array(input_data)
-    input_labels = numpy.array(input_labels)
-    total_samples = input_data.shape[0]
-
-    indexes_per_labels = arrange_indexes_by_label(input_labels, possible_labels)
-
-    # searching for random triplets
-    offset_class = 0
-    for i in range(total_samples):
-
-        anchor_sample = input_data[
-            indexes_per_labels[possible_labels[offset_class]][
-                numpy.random.randint(
-                    len(indexes_per_labels[possible_labels[offset_class]])
-                )
-            ],
-            ...,
-        ]
-
-        positive_sample = input_data[
-            indexes_per_labels[possible_labels[offset_class]][
-                numpy.random.randint(
-                    len(indexes_per_labels[possible_labels[offset_class]])
-                )
-            ],
-            ...,
-        ]
-
-        # Changing the class
-        offset_class += 1
-
-        if offset_class == len(possible_labels):
-            offset_class = 0
-
-        negative_sample = input_data[
-            indexes_per_labels[possible_labels[offset_class]][
-                numpy.random.randint(
-                    len(indexes_per_labels[possible_labels[offset_class]])
-                )
-            ],
-            ...,
-        ]
-
-        append(str(anchor_sample), str(positive_sample), str(negative_sample))
-        # yield anchor, positive, negative
-    return anchor, positive, negative
-
-
-def siamease_pairs_generator(input_data, input_labels):
-    """
-    Giving a list of samples and a list of labels, it dumps a series of
-    pairs for siamese nets.
-
-    **Parameters**
-
-      input_data: List of whatever representing the data samples
-
-      input_labels: List of the labels (needs to be in EXACT same order as input_data)
-    """
-
-    # Lists that will be returned
-    left_data = []
-    right_data = []
-    labels = []
-
-    def append(left, right, label):
-        """
-        Just appending one element in each list
-        """
-        left_data.append(left)
-        right_data.append(right)
-        labels.append(label)
-
-    possible_labels = list(set(input_labels))
-    input_data = numpy.array(input_data)
-    input_labels = numpy.array(input_labels)
-    total_samples = input_data.shape[0]
-
-    # Filtering the samples by label and shuffling all the indexes
-    # indexes_per_labels = dict()
-    # for l in possible_labels:
-    #    indexes_per_labels[l] = numpy.where(input_labels == l)[0]
-    #    numpy.random.shuffle(indexes_per_labels[l])
-    indexes_per_labels = arrange_indexes_by_label(input_labels, possible_labels)
-
-    left_possible_indexes = numpy.random.choice(
-        possible_labels, total_samples, replace=True
-    )
-    right_possible_indexes = numpy.random.choice(
-        possible_labels, total_samples, replace=True
-    )
-
-    genuine = True
-    for i in range(total_samples):
-
-        if genuine:
-            # Selecting the class
-            class_index = left_possible_indexes[i]
-
-            # Now selecting the samples for the pair
-            left = input_data[
-                indexes_per_labels[class_index][
-                    numpy.random.randint(len(indexes_per_labels[class_index]))
-                ]
-            ]
-            right = input_data[
-                indexes_per_labels[class_index][
-                    numpy.random.randint(len(indexes_per_labels[class_index]))
-                ]
-            ]
-            append(left, right, 0)
-            # yield left, right, 0
-        else:
-            # Selecting the 2 classes
-            class_index = list()
-            class_index.append(left_possible_indexes[i])
-
-            # Finding the right pair
-            j = i
-            # TODO: Lame solution. Fix this
-            while (
-                j < total_samples
-            ):  # Here is an unidiretinal search for the negative pair
-                if left_possible_indexes[i] != right_possible_indexes[j]:
-                    class_index.append(right_possible_indexes[j])
-                    break
-                j += 1
-
-            if j < total_samples:
-                # Now selecting the samples for the pair
-                left = input_data[
-                    indexes_per_labels[class_index[0]][
-                        numpy.random.randint(len(indexes_per_labels[class_index[0]]))
-                    ]
-                ]
-                right = input_data[
-                    indexes_per_labels[class_index[1]][
-                        numpy.random.randint(len(indexes_per_labels[class_index[1]]))
-                    ]
-                ]
-                append(left, right, 1)
-
-        genuine = not genuine
-    return left_data, right_data, labels
-
-
-def blocks_tensorflow(images, block_size):
-    """Return all non-overlapping blocks of an image using tensorflow
-    operations.
-
-    Parameters
-    ----------
-    images : `tf.Tensor`
-        The input color images. It is assumed that the image has a shape of
-        [?, H, W, C].
-    block_size : (int, int)
-        A tuple of two integers indicating the block size.
-
-    Returns
-    -------
-    blocks : `tf.Tensor`
-        All the blocks in the batch dimension. The output will be of
-        size [?, block_size[0], block_size[1], C].
-    n_blocks : int
-        The number of blocks that was obtained per image.
-    """
-    # normalize block_size
-    block_size = [1] + list(block_size) + [1]
-    output_size = list(block_size)
-    output_size[0] = -1
-    output_size[-1] = images.shape[-1]
-    blocks = tf.extract_image_patches(
-        images, block_size, block_size, [1, 1, 1, 1], "VALID"
-    )
-    n_blocks = int(numpy.prod(blocks.shape[1:3]))
-    output = tf.reshape(blocks, output_size)
-    return output, n_blocks
-
-
-def tf_repeat(tensor, repeats):
-    """
-    Parameters
-    ----------
-    tensor
-        A Tensor. 1-D or higher.
-    repeats
-        A list. Number of repeat for each dimension, length must be the same as
-        the number of dimensions in input
-
-    Returns
-    -------
-    A Tensor. Has the same type as input. Has the shape of tensor.shape *
-    repeats
-    """
-    with tf.variable_scope("repeat"):
-        expanded_tensor = tf.expand_dims(tensor, -1)
-        multiples = [1] + repeats
-        tiled_tensor = tf.tile(expanded_tensor, multiples=multiples)
-        repeated_tesnor = tf.reshape(tiled_tensor, tf.shape(tensor) * repeats)
-    return repeated_tesnor
-
-
-def all_patches(image, label, key, size):
-    """Extracts all patches of an image
-
-    Parameters
-    ----------
-    image:
-        The image should be channels_last format and already batched.
-
-    label:
-        The label for the image
-
-    key:
-        The key for the image
-
-    size: (int, int)
-        The height and width of the blocks.
-
-    Returns
-    -------
-    blocks:
-       The non-overlapping blocks of size from image and labels and keys are
-       repeated.
-
-    label:
-
-    key:
-    """
-    blocks, n_blocks = blocks_tensorflow(image, size)
-
-    # duplicate label and key as n_blocks
-    def repeats(shape):
-        r = shape.as_list()
-        for i in range(len(r)):
-            if i == 0:
-                r[i] = n_blocks
-            else:
-                r[i] = 1
-        return r
-
-    label = tf_repeat(label, repeats(label.shape))
-    key = tf_repeat(key, repeats(key.shape))
-
-    return blocks, label, key
diff --git a/bob/learn/tensorflow/dataset/bio.py b/bob/learn/tensorflow/dataset/bio.py
deleted file mode 100644
index 4a9b66bcc91f8656367739b59bfaf580a355c381..0000000000000000000000000000000000000000
--- a/bob/learn/tensorflow/dataset/bio.py
+++ /dev/null
@@ -1,93 +0,0 @@
-from bob.bio.base import read_original_data
-from .generator import Generator
-import logging
-
-logger = logging.getLogger(__name__)
-
-
-class BioGenerator(Generator):
-    """A generator class which wraps bob.bio.base databases so that they can
-    be used with tf.data.Dataset.from_generator
-
-    Attributes
-    ----------
-    biofile_to_label : :obj:`object`, optional
-        A callable with the signature of ``label = biofile_to_label(biofile)``.
-        By default -1 is returned as label.
-    database : :any:`bob.bio.base.database.BioDatabase`
-        The database that you want to use.
-    load_data : :obj:`object`, optional
-        A callable with the signature of
-        ``data = load_data(database, biofile)``.
-        :any:`bob.bio.base.read_original_data` is wrapped to be used by
-        default.
-    biofiles : [:any:`bob.bio.base.database.BioFile`]
-        The list of the bio files .
-    keys : [str]
-        The keys of samples obtained by calling ``biofile.make_path("", "")``
-    labels : [int]
-        The labels obtained by calling ``label = biofile_to_label(biofile)``
-    """
-
-    def __init__(
-        self,
-        database,
-        biofiles,
-        load_data=None,
-        biofile_to_label=None,
-        multiple_samples=False,
-        **kwargs
-    ):
-
-        if load_data is None:
-
-            def load_data(database, biofile):
-                data = read_original_data(
-                    biofile, database.original_directory, database.original_extension
-                )
-                return data
-
-        if biofile_to_label is None:
-
-            def biofile_to_label(biofile):
-                return -1
-
-        self.database = database
-        self.load_data = load_data
-        self.biofile_to_label = biofile_to_label
-
-        def _reader(f):
-            label = int(self.biofile_to_label(f))
-            data = self.load_data(self.database, f)
-            key = str(f.make_path("", "")).encode("utf-8")
-            return data, label, key
-
-        if multiple_samples:
-            def reader(f):
-                data, label, key = _reader(f)
-                for d in data:
-                    yield (d, label, key)
-        else:
-            def reader(f):
-                return _reader(f)
-
-        super(BioGenerator, self).__init__(
-            biofiles, reader, multiple_samples=multiple_samples, **kwargs
-        )
-
-    @property
-    def labels(self):
-        for f in self.biofiles:
-            yield int(self.biofile_to_label(f))
-
-    @property
-    def keys(self):
-        for f in self.biofiles:
-            yield str(f.make_path("", "")).encode("utf-8")
-
-    @property
-    def biofiles(self):
-        return self.samples
-
-    def __len__(self):
-        return len(self.biofiles)
diff --git a/bob/learn/tensorflow/dataset/image.py b/bob/learn/tensorflow/dataset/image.py
deleted file mode 100644
index b731e55260cac42d6e56edec37f4f6cfb65627f7..0000000000000000000000000000000000000000
--- a/bob/learn/tensorflow/dataset/image.py
+++ /dev/null
@@ -1,221 +0,0 @@
-#!/usr/bin/env python
-# vim: set fileencoding=utf-8 :
-# @author: Tiago de Freitas Pereira <tiago.pereira@idiap.ch>
-
-import tensorflow as tf
-from functools import partial
-from . import append_image_augmentation, from_filename_to_tensor
-
-
-def shuffle_data_and_labels_image_augmentation(filenames,
-                                               labels,
-                                               data_shape,
-                                               data_type,
-                                               batch_size,
-                                               epochs=None,
-                                               buffer_size=10**3,
-                                               gray_scale=False,
-                                               output_shape=None,
-                                               random_flip=False,
-                                               random_brightness=False,
-                                               random_contrast=False,
-                                               random_saturation=False,
-                                               random_rotate=False,
-                                               per_image_normalization=True,
-                                               extension=None):
-    """
-    Dump random batches from a list of image paths and labels:
-
-    The list of files and labels should be in the same order e.g.
-    filenames = ['class_1_img1', 'class_1_img2', 'class_2_img1']
-    labels = [0, 0, 1]
-
-    **Parameters**
-
-       filenames:
-          List containing the path of the images
-
-       labels:
-          List containing the labels (needs to be in EXACT same order as filenames)
-
-       data_shape:
-          Samples shape saved in the tf-record
-
-       data_type:
-          tf data type(https://www.tensorflow.org/versions/r0.12/resources/dims_types#data_types)
-
-       batch_size:
-          Size of the batch
-
-       epochs:
-           Number of epochs to be batched
-
-       buffer_size:
-            Size of the shuffle bucket
-
-       gray_scale:
-          Convert to gray scale?
-
-       output_shape:
-          If set, will randomly crop the image given the output shape
-
-       random_flip:
-          Randomly flip an image horizontally  (https://www.tensorflow.org/api_docs/python/tf/image/random_flip_left_right)
-
-       random_brightness:
-           Adjust the brightness of an RGB image by a random factor (https://www.tensorflow.org/api_docs/python/tf/image/random_brightness)
-
-       random_contrast:
-           Adjust the contrast of an RGB image by a random factor (https://www.tensorflow.org/api_docs/python/tf/image/random_contrast)
-
-       random_saturation:
-           Adjust the saturation of an RGB image by a random factor (https://www.tensorflow.org/api_docs/python/tf/image/random_saturation)
-
-       random_rotate:
-           Randomly rotate face images between -5 and 5 degrees
-
-       per_image_normalization:
-           Linearly scales image to have zero mean and unit norm.
-
-       extension:
-           If None, will load files using `tf.image.decode..` if set to `hdf5`, will load with `bob.io.base.load`
-
-    """
-
-    dataset = create_dataset_from_path_augmentation(
-        filenames,
-        labels,
-        data_shape,
-        data_type,
-        gray_scale=gray_scale,
-        output_shape=output_shape,
-        random_flip=random_flip,
-        random_brightness=random_brightness,
-        random_contrast=random_contrast,
-        random_saturation=random_saturation,
-        random_rotate=random_rotate,
-        per_image_normalization=per_image_normalization,
-        extension=extension)
-
-    dataset = dataset.shuffle(buffer_size).batch(batch_size).repeat(epochs)
-
-    data, labels = dataset.make_one_shot_iterator().get_next()
-    return data, labels
-
-
-def create_dataset_from_path_augmentation(filenames,
-                                          labels,
-                                          data_shape,
-                                          data_type,
-                                          gray_scale=False,
-                                          output_shape=None,
-                                          random_flip=False,
-                                          random_brightness=False,
-                                          random_contrast=False,
-                                          random_saturation=False,
-                                          random_rotate=False,
-                                          per_image_normalization=True,
-                                          extension=None):
-    """
-    Create dataset from a list of tf-record files
-
-    **Parameters**
-
-       filenames:
-          List containing the path of the images
-
-       labels:
-          List containing the labels (needs to be in EXACT same order as filenames)
-
-       data_shape:
-          Samples shape saved in the tf-record
-
-       data_type:
-          tf data type(https://www.tensorflow.org/versions/r0.12/resources/dims_types#data_types)
-
-       feature:
-
-    """
-
-    parser = partial(
-        image_augmentation_parser,
-        data_shape=data_shape,
-        data_type=data_type,
-        gray_scale=gray_scale,
-        output_shape=output_shape,
-        random_flip=random_flip,
-        random_brightness=random_brightness,
-        random_contrast=random_contrast,
-        random_saturation=random_saturation,
-        random_rotate=random_rotate,
-        per_image_normalization=per_image_normalization,
-        extension=extension)
-
-    dataset = tf.data.Dataset.from_tensor_slices((filenames, labels))
-    dataset = dataset.map(parser)
-    return dataset
-
-
-def image_augmentation_parser(filename,
-                              label,
-                              data_shape,
-                              data_type,
-                              gray_scale=False,
-                              output_shape=None,
-                              random_flip=False,
-                              random_brightness=False,
-                              random_contrast=False,
-                              random_saturation=False,
-                              random_rotate=False,
-                              per_image_normalization=True,
-                              extension=None):
-    """
-    Parses a single tf.Example into image and label tensors.
-    """
-
-    # Convert the image data from string back to the numbers
-    image = from_filename_to_tensor(filename, extension=extension)
-
-    # Reshape image data into the original shape
-    image = tf.reshape(image, data_shape)
-
-    #Applying image augmentation
-    image = append_image_augmentation(
-        image,
-        gray_scale=gray_scale,
-        output_shape=output_shape,
-        random_flip=random_flip,
-        random_brightness=random_brightness,
-        random_contrast=random_contrast,
-        random_saturation=random_saturation,
-        random_rotate=random_rotate,
-        per_image_normalization=per_image_normalization)
-
-    label = tf.cast(label, tf.int64)
-    features = dict()
-    features['data'] = image
-    features['key'] = filename
-
-    return features, label
-
-
-def load_pngs(img_path, img_shape):
-    """Read png files using tensorflow API
-    You must know the shape of the image beforehand to use this function.
-
-    Parameters
-    ----------
-    img_path : str
-        Path to the image
-    img_shape : list
-        A list or tuple that contains image's shape in channels_last format
-
-    Returns
-    -------
-    object
-        The loaded png file
-    """
-    img_raw = tf.read_file(img_path)
-    img_tensor = tf.image.decode_png(img_raw, channels=img_shape[-1])
-    img_final = tf.reshape(img_tensor, img_shape)
-    return img_final
diff --git a/bob/learn/tensorflow/dataset/siamese_image.py b/bob/learn/tensorflow/dataset/siamese_image.py
deleted file mode 100644
index 51a56a64b5290b3d853a3be849cefa261e4e4266..0000000000000000000000000000000000000000
--- a/bob/learn/tensorflow/dataset/siamese_image.py
+++ /dev/null
@@ -1,254 +0,0 @@
-#!/usr/bin/env python
-# vim: set fileencoding=utf-8 :
-# @author: Tiago de Freitas Pereira <tiago.pereira@idiap.ch>
-
-import tensorflow as tf
-from functools import partial
-from . import append_image_augmentation, siamease_pairs_generator, from_filename_to_tensor
-
-
-def shuffle_data_and_labels_image_augmentation(filenames,
-                                               labels,
-                                               data_shape,
-                                               data_type,
-                                               batch_size,
-                                               epochs=None,
-                                               buffer_size=10**3,
-                                               gray_scale=False,
-                                               output_shape=None,
-                                               random_flip=False,
-                                               random_brightness=False,
-                                               random_contrast=False,
-                                               random_saturation=False,
-                                               random_rotate=False,
-                                               per_image_normalization=True,
-                                               extension=None):
-    """
-    Dump random batches for siamese networks from a list of image paths and labels:
-
-    The list of files and labels should be in the same order e.g.
-    filenames = ['class_1_img1', 'class_1_img2', 'class_2_img1']
-    labels = [0, 0, 1]
-
-    The batches returned with tf.Session.run() with be in the following format:
-    **data** a dictionary containing the keys ['left', 'right'], each one representing
-    one element of the pair and **labels** which is [0, 1] where 0 is the genuine pair
-    and 1 is the impostor pair.
-
-
-    **Parameters**
-
-       filenames:
-          List containing the path of the images
-
-       labels:
-          List containing the labels (needs to be in EXACT same order as filenames)
-
-       data_shape:
-          Samples shape saved in the tf-record
-
-       data_type:
-          tf data type(https://www.tensorflow.org/versions/r0.12/resources/dims_types#data_types)
-
-       batch_size:
-          Size of the batch
-
-       epochs:
-           Number of epochs to be batched
-
-       buffer_size:
-            Size of the shuffle bucket
-
-       gray_scale:
-          Convert to gray scale?
-
-       output_shape:
-          If set, will randomly crop the image given the output shape
-
-       random_flip:
-          Randomly flip an image horizontally  (https://www.tensorflow.org/api_docs/python/tf/image/random_flip_left_right)
-
-       random_brightness:
-           Adjust the brightness of an RGB image by a random factor (https://www.tensorflow.org/api_docs/python/tf/image/random_brightness)
-
-       random_contrast:
-           Adjust the contrast of an RGB image by a random factor (https://www.tensorflow.org/api_docs/python/tf/image/random_contrast)
-
-       random_saturation:
-           Adjust the saturation of an RGB image by a random factor (https://www.tensorflow.org/api_docs/python/tf/image/random_saturation)
-
-       random_rotate:
-           Randomly rotate face images between -5 and 5 degrees
-
-       per_image_normalization:
-           Linearly scales image to have zero mean and unit norm.
-
-       extension:
-           If None, will load files using `tf.image.decode..` if set to `hdf5`, will load with `bob.io.base.load`
-    """
-
-    dataset = create_dataset_from_path_augmentation(
-        filenames,
-        labels,
-        data_shape,
-        data_type,
-        gray_scale=gray_scale,
-        output_shape=output_shape,
-        random_flip=random_flip,
-        random_brightness=random_brightness,
-        random_contrast=random_contrast,
-        random_saturation=random_saturation,
-        random_rotate=random_rotate,
-        per_image_normalization=per_image_normalization,
-        extension=extension)
-
-    dataset = dataset.shuffle(buffer_size).batch(batch_size).repeat(epochs)
-    data, labels = dataset.make_one_shot_iterator().get_next()
-    return data, labels
-
-
-def create_dataset_from_path_augmentation(filenames,
-                                          labels,
-                                          data_shape,
-                                          data_type,
-                                          gray_scale=False,
-                                          output_shape=None,
-                                          random_flip=False,
-                                          random_brightness=False,
-                                          random_contrast=False,
-                                          random_saturation=False,
-                                          random_rotate=False,
-                                          per_image_normalization=True,
-                                          extension=None):
-    """
-    Create dataset from a list of tf-record files
-
-    **Parameters**
-
-       filenames:
-          List containing the path of the images
-
-       labels:
-          List containing the labels (needs to be in EXACT same order as filenames)
-
-       data_shape:
-          Samples shape saved in the tf-record
-
-       data_type:
-          tf data type(https://www.tensorflow.org/versions/r0.12/resources/dims_types#data_types)
-
-       batch_size:
-          Size of the batch
-
-       epochs:
-           Number of epochs to be batched
-
-       buffer_size:
-            Size of the shuffle bucket
-
-       gray_scale:
-          Convert to gray scale?
-
-       output_shape:
-          If set, will randomly crop the image given the output shape
-
-       random_flip:
-          Randomly flip an image horizontally  (https://www.tensorflow.org/api_docs/python/tf/image/random_flip_left_right)
-
-       random_brightness:
-           Adjust the brightness of an RGB image by a random factor (https://www.tensorflow.org/api_docs/python/tf/image/random_brightness)
-
-       random_contrast:
-           Adjust the contrast of an RGB image by a random factor (https://www.tensorflow.org/api_docs/python/tf/image/random_contrast)
-
-       random_saturation:
-           Adjust the saturation of an RGB image by a random factor (https://www.tensorflow.org/api_docs/python/tf/image/random_saturation)
-
-       random_rotate:
-           Randomly rotate face images between -10 and 10 degrees
-
-       per_image_normalization:
-           Linearly scales image to have zero mean and unit norm.
-
-       extension:
-           If None, will load files using `tf.image.decode..` if set to `hdf5`, will load with `bob.io.base.load`
-
-    """
-
-    parser = partial(
-        image_augmentation_parser,
-        data_shape=data_shape,
-        data_type=data_type,
-        gray_scale=gray_scale,
-        output_shape=output_shape,
-        random_flip=random_flip,
-        random_brightness=random_brightness,
-        random_contrast=random_contrast,
-        random_saturation=random_saturation,
-        random_rotate=random_rotate,
-        per_image_normalization=per_image_normalization,
-        extension=extension)
-
-    left_data, right_data, siamese_labels = siamease_pairs_generator(
-        filenames, labels)
-    dataset = tf.data.Dataset.from_tensor_slices((left_data, right_data,
-                                                  siamese_labels))
-    dataset = dataset.map(parser)
-    return dataset
-
-
-def image_augmentation_parser(filename_left,
-                              filename_right,
-                              label,
-                              data_shape,
-                              data_type,
-                              gray_scale=False,
-                              output_shape=None,
-                              random_flip=False,
-                              random_brightness=False,
-                              random_contrast=False,
-                              random_saturation=False,
-                              random_rotate=False,
-                              per_image_normalization=True,
-                              extension=None):
-    """
-    Parses a single tf.Example into image and label tensors.
-    """
-
-    # Convert the image data from string back to the numbers
-    image_left = from_filename_to_tensor(filename_left, extension=extension)
-    image_right = from_filename_to_tensor(filename_right, extension=extension)
-
-    # Reshape image data into the original shape
-    image_left = tf.reshape(image_left, data_shape)
-    image_right = tf.reshape(image_right, data_shape)
-
-    #Applying image augmentation
-    image_left = append_image_augmentation(
-        image_left,
-        gray_scale=gray_scale,
-        output_shape=output_shape,
-        random_flip=random_flip,
-        random_brightness=random_brightness,
-        random_contrast=random_contrast,
-        random_saturation=random_saturation,
-        random_rotate=random_rotate,
-        per_image_normalization=per_image_normalization)
-
-    image_right = append_image_augmentation(
-        image_right,
-        gray_scale=gray_scale,
-        output_shape=output_shape,
-        random_flip=random_flip,
-        random_brightness=random_brightness,
-        random_contrast=random_contrast,
-        random_saturation=random_saturation,
-        random_rotate=random_rotate,
-        per_image_normalization=per_image_normalization)
-
-    image = dict()
-    image['left'] = image_left
-    image['right'] = image_right
-    label = tf.cast(label, tf.int64)
-
-    return image, label
diff --git a/bob/learn/tensorflow/dataset/tfrecords.py b/bob/learn/tensorflow/dataset/tfrecords.py
deleted file mode 100644
index 45201b88c0b732de17d87c8510f158cf653c4af9..0000000000000000000000000000000000000000
--- a/bob/learn/tensorflow/dataset/tfrecords.py
+++ /dev/null
@@ -1,666 +0,0 @@
-"""Utilities for TFRecords
-"""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from functools import partial
-import json
-import logging
-import os
-import sys
-
-import tensorflow as tf
-
-from . import append_image_augmentation, DEFAULT_FEATURE
-
-
-logger = logging.getLogger(__name__)
-TFRECORDS_EXT = ".tfrecords"
-
-
-def tfrecord_name_and_json_name(output):
-    output = normalize_tfrecords_path(output)
-    json_output = output[: -len(TFRECORDS_EXT)] + ".json"
-    return output, json_output
-
-
-def normalize_tfrecords_path(output):
-    if not output.endswith(TFRECORDS_EXT):
-        output += TFRECORDS_EXT
-    return output
-
-
-def bytes_feature(value):
-    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
-
-
-def int64_feature(value):
-    return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
-
-
-def dataset_to_tfrecord(dataset, output):
-    """Writes a tf.data.Dataset into a TFRecord file.
-
-    Parameters
-    ----------
-    dataset : ``tf.data.Dataset``
-        The tf.data.Dataset that you want to write into a TFRecord file.
-    output : str
-        Path to the TFRecord file. Besides this file, a .json file is also created.
-        This json file is needed when you want to convert the TFRecord file back into
-        a dataset.
-
-    Returns
-    -------
-    ``tf.Operation``
-        A tf.Operation that, when run, writes contents of dataset to a file. When
-        running in eager mode, calling this function will write the file. Otherwise, you
-        have to call session.run() on the returned operation.
-    """
-    output, json_output = tfrecord_name_and_json_name(output)
-    # dump the structure so that we can read it back
-    meta = {
-        "output_types": repr(dataset.output_types),
-        "output_shapes": repr(dataset.output_shapes),
-    }
-    with open(json_output, "w") as f:
-        json.dump(meta, f)
-
-    # create a custom map function that serializes the dataset
-    def serialize_example_pyfunction(*args):
-        feature = {}
-        for i, f in enumerate(args):
-            key = f"feature{i}"
-            feature[key] = bytes_feature(f)
-        example_proto = tf.train.Example(features=tf.train.Features(feature=feature))
-        return example_proto.SerializeToString()
-
-    def tf_serialize_example(*args):
-        args = tf.contrib.framework.nest.flatten(args)
-        args = [tf.serialize_tensor(f) for f in args]
-        tf_string = tf.py_func(serialize_example_pyfunction, args, tf.string)
-        return tf.reshape(tf_string, ())  # The result is a scalar
-
-    dataset = dataset.map(tf_serialize_example)
-    writer = tf.data.experimental.TFRecordWriter(output)
-    return writer.write(dataset)
-
-
-def dataset_from_tfrecord(tfrecord, num_parallel_reads=None):
-    """Reads TFRecords and returns a dataset.
-    The TFRecord file must have been created using the :any:`dataset_to_tfrecord`
-    function.
-
-    Parameters
-    ----------
-    tfrecord : str or list
-        Path to the TFRecord file. Pass a list if you are sure several tfrecords need
-        the same map function.
-    num_parallel_reads: int
-        A `tf.int64` scalar representing the number of files to read in parallel.
-        Defaults to reading files sequentially.
-
-    Returns
-    -------
-    ``tf.data.Dataset``
-        A dataset that contains the data from the TFRecord file.
-    """
-    # these imports are needed so that eval can work
-    from tensorflow import TensorShape, Dimension
-
-    if isinstance(tfrecord, str):
-        tfrecord = [tfrecord]
-    tfrecord = [tfrecord_name_and_json_name(path) for path in tfrecord]
-    json_output = tfrecord[0][1]
-    tfrecord = [path[0] for path in tfrecord]
-    raw_dataset = tf.data.TFRecordDataset(
-        tfrecord, num_parallel_reads=num_parallel_reads
-    )
-
-    with open(json_output) as f:
-        meta = json.load(f)
-    for k, v in meta.items():
-        meta[k] = eval(v)
-    output_types = tf.contrib.framework.nest.flatten(meta["output_types"])
-    output_shapes = tf.contrib.framework.nest.flatten(meta["output_shapes"])
-    feature_description = {}
-    for i in range(len(output_types)):
-        key = f"feature{i}"
-        feature_description[key] = tf.FixedLenFeature([], tf.string)
-
-    def _parse_function(example_proto):
-        # Parse the input tf.Example proto using the dictionary above.
-        args = tf.parse_single_example(example_proto, feature_description)
-        args = tf.contrib.framework.nest.flatten(args)
-        args = [tf.parse_tensor(v, t) for v, t in zip(args, output_types)]
-        args = [tf.reshape(v, s) for v, s in zip(args, output_shapes)]
-        return tf.contrib.framework.nest.pack_sequence_as(meta["output_types"], args)
-
-    return raw_dataset.map(_parse_function)
-
-
-def write_a_sample(writer, data, label, key, feature=None, size_estimate=False):
-    if feature is None:
-        feature = {
-            "data": bytes_feature(data.tostring()),
-            "label": int64_feature(label),
-            "key": bytes_feature(key),
-        }
-
-    example = tf.train.Example(features=tf.train.Features(feature=feature))
-    example = example.SerializeToString()
-    if not size_estimate:
-        writer.write(example)
-    return sys.getsizeof(example)
-
-
-def example_parser(serialized_example, feature, data_shape, data_type):
-    """
-  Parses a single tf.Example into image and label tensors.
-
-  """
-    # Decode the record read by the reader
-    features = tf.parse_single_example(serialized_example, features=feature)
-    # Convert the image data from string back to the numbers
-    image = tf.decode_raw(features["data"], data_type)
-    # Cast label data into int64
-    label = tf.cast(features["label"], tf.int64)
-    # Reshape image data into the original shape
-    image = tf.reshape(image, data_shape)
-    key = tf.cast(features["key"], tf.string)
-    return image, label, key
-
-
-def image_augmentation_parser(
-    serialized_example,
-    feature,
-    data_shape,
-    data_type,
-    gray_scale=False,
-    output_shape=None,
-    random_flip=False,
-    random_brightness=False,
-    random_contrast=False,
-    random_saturation=False,
-    random_rotate=False,
-    per_image_normalization=True,
-    random_gamma=False,
-    random_crop=False,
-):
-    """
-  Parses a single tf.Example into image and label tensors.
-
-  """
-    # Decode the record read by the reader
-    features = tf.parse_single_example(serialized_example, features=feature)
-    # Convert the image data from string back to the numbers
-    image = tf.decode_raw(features["data"], data_type)
-
-    # Reshape image data into the original shape
-    image = tf.reshape(image, data_shape)
-
-    # Applying image augmentation
-    image = append_image_augmentation(
-        image,
-        gray_scale=gray_scale,
-        output_shape=output_shape,
-        random_flip=random_flip,
-        random_brightness=random_brightness,
-        random_contrast=random_contrast,
-        random_saturation=random_saturation,
-        random_rotate=random_rotate,
-        per_image_normalization=per_image_normalization,
-        random_gamma=random_gamma,
-        random_crop=random_crop,
-    )
-
-    # Cast label data into int64
-    label = tf.cast(features["label"], tf.int64)
-    key = tf.cast(features["key"], tf.string)
-
-    return image, label, key
-
-
-def read_and_decode(filename_queue, data_shape, data_type=tf.float32, feature=None):
-    """
-  Simples parse possible for a tfrecord.
-  It assumes that you have the pair **train/data** and **train/label**
-  """
-
-    if feature is None:
-        feature = DEFAULT_FEATURE
-    # Define a reader and read the next record
-    reader = tf.TFRecordReader()
-    _, serialized_example = reader.read(filename_queue)
-    return example_parser(serialized_example, feature, data_shape, data_type)
-
-
-def create_dataset_from_records(
-    tfrecord_filenames, data_shape, data_type, feature=None
-):
-    """
-  Create dataset from a list of tf-record files
-
-  **Parameters**
-
-     tfrecord_filenames:
-        List containing the tf-record paths
-
-     data_shape:
-        Samples shape saved in the tf-record
-
-     data_type:
-        tf data type(https://www.tensorflow.org/versions/r0.12/resources/dims_types#data_types)
-
-     feature:
-
-  """
-
-    if feature is None:
-        feature = DEFAULT_FEATURE
-    dataset = tf.data.TFRecordDataset(tfrecord_filenames)
-    parser = partial(
-        example_parser, feature=feature, data_shape=data_shape, data_type=data_type
-    )
-    dataset = dataset.map(parser)
-    return dataset
-
-
-def create_dataset_from_records_with_augmentation(
-    tfrecord_filenames,
-    data_shape,
-    data_type,
-    feature=None,
-    gray_scale=False,
-    output_shape=None,
-    random_flip=False,
-    random_brightness=False,
-    random_contrast=False,
-    random_saturation=False,
-    random_rotate=False,
-    per_image_normalization=True,
-    random_gamma=False,
-    random_crop=False,
-):
-    """
-  Create dataset from a list of tf-record files
-
-  **Parameters**
-
-     tfrecord_filenames:
-        List containing the tf-record paths
-
-     data_shape:
-        Samples shape saved in the tf-record
-
-     data_type:
-        tf data type(https://www.tensorflow.org/versions/r0.12/resources/dims_types#data_types)
-
-     feature:
-
-  """
-
-    if feature is None:
-        feature = DEFAULT_FEATURE
-    if isinstance(tfrecord_filenames, str) and os.path.isdir(tfrecord_filenames):
-        tfrecord_filenames = [
-            os.path.join(tfrecord_filenames, f) for f in os.listdir(tfrecord_filenames)
-        ]
-    dataset = tf.data.TFRecordDataset(tfrecord_filenames)
-    parser = partial(
-        image_augmentation_parser,
-        feature=feature,
-        data_shape=data_shape,
-        data_type=data_type,
-        gray_scale=gray_scale,
-        output_shape=output_shape,
-        random_flip=random_flip,
-        random_brightness=random_brightness,
-        random_contrast=random_contrast,
-        random_saturation=random_saturation,
-        random_rotate=random_rotate,
-        per_image_normalization=per_image_normalization,
-        random_gamma=random_gamma,
-        random_crop=random_crop,
-    )
-    dataset = dataset.map(parser)
-    return dataset
-
-
-def shuffle_data_and_labels_image_augmentation(
-    tfrecord_filenames,
-    data_shape,
-    data_type,
-    batch_size,
-    epochs=None,
-    buffer_size=10 ** 3,
-    gray_scale=False,
-    output_shape=None,
-    random_flip=False,
-    random_brightness=False,
-    random_contrast=False,
-    random_saturation=False,
-    random_rotate=False,
-    per_image_normalization=True,
-    random_gamma=False,
-    random_crop=False,
-    drop_remainder=False,
-):
-    """Dump random batches from a list of tf-record files and applies some image augmentation
-
-    Parameters
-    ----------
-
-      tfrecord_filenames:
-        List containing the tf-record paths
-
-      data_shape:
-        Samples shape saved in the tf-record
-
-      data_type:
-        tf data type(https://www.tensorflow.org/versions/r0.12/resources/dims_types#data_types)
-
-      batch_size:
-        Size of the batch
-
-      epochs:
-        Number of epochs to be batched
-
-      buffer_size:
-        Size of the shuffle bucket
-
-      gray_scale:
-        Convert to gray scale?
-
-      output_shape:
-        If set, will randomly crop the image given the output shape
-
-      random_flip:
-        Randomly flip an image horizontally  (https://www.tensorflow.org/api_docs/python/tf/image/random_flip_left_right)
-
-      random_brightness:
-        Adjust the brightness of an RGB image by a random factor (https://www.tensorflow.org/api_docs/python/tf/image/random_brightness)
-
-      random_contrast:
-        Adjust the contrast of an RGB image by a random factor (https://www.tensorflow.org/api_docs/python/tf/image/random_contrast)
-
-      random_saturation:
-        Adjust the saturation of an RGB image by a random factor (https://www.tensorflow.org/api_docs/python/tf/image/random_saturation)
-
-      random_rotate:
-        Randomly rotate face images between -5 and 5 degrees
-
-      per_image_normalization:
-        Linearly scales image to have zero mean and unit norm.
-
-      drop_remainder:
-        If True, the last remaining batch that has smaller size than batch_size will be dropped.
-    """
-
-    dataset = create_dataset_from_records_with_augmentation(
-        tfrecord_filenames,
-        data_shape,
-        data_type,
-        gray_scale=gray_scale,
-        output_shape=output_shape,
-        random_flip=random_flip,
-        random_brightness=random_brightness,
-        random_contrast=random_contrast,
-        random_saturation=random_saturation,
-        random_rotate=random_rotate,
-        per_image_normalization=per_image_normalization,
-        random_gamma=random_gamma,
-        random_crop=random_crop,
-    )
-
-    dataset = dataset.shuffle(buffer_size)
-    dataset = dataset.batch(batch_size, drop_remainder=drop_remainder)
-    dataset = dataset.repeat(epochs)
-
-    dataset = dataset.map(lambda d, l, k: ({"data": d, "key": k}, l))
-
-    return dataset
-
-
-def shuffle_data_and_labels(
-    tfrecord_filenames,
-    data_shape,
-    data_type,
-    batch_size,
-    epochs=None,
-    buffer_size=10 ** 3,
-):
-    """
-  Dump random batches from a list of tf-record files
-
-  **Parameters**
-
-     tfrecord_filenames:
-        List containing the tf-record paths
-
-     data_shape:
-        Samples shape saved in the tf-record
-
-     data_type:
-        tf data type(https://www.tensorflow.org/versions/r0.12/resources/dims_types#data_types)
-
-     batch_size:
-        Size of the batch
-
-     epochs:
-         Number of epochs to be batched
-
-     buffer_size:
-          Size of the shuffle bucket
-
-  """
-
-    dataset = create_dataset_from_records(tfrecord_filenames, data_shape, data_type)
-    dataset = dataset.shuffle(buffer_size).batch(batch_size).repeat(epochs)
-
-    data, labels, key = dataset.make_one_shot_iterator().get_next()
-    features = dict()
-    features["data"] = data
-    features["key"] = key
-
-    return features, labels
-
-
-def batch_data_and_labels(
-    tfrecord_filenames, data_shape, data_type, batch_size, epochs=1
-):
-    """
-  Dump in order batches from a list of tf-record files
-
-  Parameters
-  ----------
-
-     tfrecord_filenames:
-        List containing the tf-record paths
-
-     data_shape:
-        Samples shape saved in the tf-record
-
-     data_type:
-        tf data type(https://www.tensorflow.org/versions/r0.12/resources/dims_types#data_types)
-
-     batch_size:
-        Size of the batch
-
-     epochs:
-         Number of epochs to be batched
-
-  """
-    dataset = create_dataset_from_records(tfrecord_filenames, data_shape, data_type)
-    dataset = dataset.batch(batch_size).repeat(epochs)
-
-    data, labels, key = dataset.make_one_shot_iterator().get_next()
-    features = dict()
-    features["data"] = data
-    features["key"] = key
-
-    return features, labels
-
-
-def batch_data_and_labels_image_augmentation(
-    tfrecord_filenames,
-    data_shape,
-    data_type,
-    batch_size,
-    epochs=1,
-    gray_scale=False,
-    output_shape=None,
-    random_flip=False,
-    random_brightness=False,
-    random_contrast=False,
-    random_saturation=False,
-    random_rotate=False,
-    per_image_normalization=True,
-    random_gamma=False,
-    random_crop=False,
-    drop_remainder=False,
-):
-    """
-    Dump in order batches from a list of tf-record files
-
-    Parameters
-    ----------
-
-       tfrecord_filenames:
-          List containing the tf-record paths
-
-       data_shape:
-          Samples shape saved in the tf-record
-
-       data_type:
-          tf data type(https://www.tensorflow.org/versions/r0.12/resources/dims_types#data_types)
-
-       batch_size:
-          Size of the batch
-
-       epochs:
-           Number of epochs to be batched
-
-       drop_remainder:
-           If True, the last remaining batch that has smaller size than batch_size will be dropped.
-    """
-
-    dataset = create_dataset_from_records_with_augmentation(
-        tfrecord_filenames,
-        data_shape,
-        data_type,
-        gray_scale=gray_scale,
-        output_shape=output_shape,
-        random_flip=random_flip,
-        random_brightness=random_brightness,
-        random_contrast=random_contrast,
-        random_saturation=random_saturation,
-        random_rotate=random_rotate,
-        per_image_normalization=per_image_normalization,
-        random_gamma=random_gamma,
-        random_crop=random_crop,
-    )
-
-    dataset = dataset.batch(batch_size, drop_remainder=drop_remainder)
-    dataset = dataset.repeat(epochs)
-
-    data, labels, key = dataset.make_one_shot_iterator().get_next()
-    features = dict()
-    features["data"] = data
-    features["key"] = key
-
-    return features, labels
-
-
-def describe_tf_record(tf_record_path, shape, batch_size=1):
-    """
-  Describe the number of samples and the number of classes of a tf-record
-
-  Parameters
-  ----------
-
-  tf_record_path: str
-    Base path containing your tf-record files
-
-  shape: tuple
-     Shape inside of the tf-record
-
-  batch_size: int
-    Well, batch size
-
-
-  Returns
-  -------
-
-  n_samples: int
-     Total number of samples
-
-  n_classes: int
-     Total number of classes
-
-  """
-
-    tf_records = [os.path.join(tf_record_path, f) for f in os.listdir(tf_record_path)]
-    filename_queue = tf.train.string_input_producer(
-        tf_records, num_epochs=1, name="input"
-    )
-
-    feature = {
-        "data": tf.FixedLenFeature([], tf.string),
-        "label": tf.FixedLenFeature([], tf.int64),
-        "key": tf.FixedLenFeature([], tf.string),
-    }
-
-    # Define a reader and read the next record
-    reader = tf.TFRecordReader()
-
-    _, serialized_example = reader.read(filename_queue)
-
-    # Decode the record read by the reader
-    features = tf.parse_single_example(serialized_example, features=feature)
-
-    # Convert the image data from string back to the numbers
-    image = tf.decode_raw(features["data"], tf.uint8)
-
-    # Cast label data into int32
-    label = tf.cast(features["label"], tf.int64)
-    img_name = tf.cast(features["key"], tf.string)
-
-    # Reshape image data into the original shape
-    image = tf.reshape(image, shape)
-
-    # Getting the batches in order
-    data_ph, label_ph, img_name_ph = tf.train.batch(
-        [image, label, img_name],
-        batch_size=batch_size,
-        capacity=1000,
-        num_threads=5,
-        name="shuffle_batch",
-    )
-
-    # Start the reading
-    session = tf.Session()
-    tf.local_variables_initializer().run(session=session)
-    tf.global_variables_initializer().run(session=session)
-
-    # Preparing the batches
-    thread_pool = tf.train.Coordinator()
-    threads = tf.train.start_queue_runners(coord=thread_pool, sess=session)
-
-    logger.info("Counting in %s", tf_record_path)
-    labels = set()
-    counter = 0
-    try:
-        while True:
-            _, label, _ = session.run([data_ph, label_ph, img_name_ph])
-            counter += len(label)
-
-            for i in set(label):
-                labels.add(i)
-
-    except tf.errors.OutOfRangeError:
-        pass
-
-    thread_pool.request_stop()
-    return counter, len(labels)
diff --git a/bob/learn/tensorflow/dataset/triplet_image.py b/bob/learn/tensorflow/dataset/triplet_image.py
deleted file mode 100644
index 944641100a8a2da2791b363e957beea0dac65a51..0000000000000000000000000000000000000000
--- a/bob/learn/tensorflow/dataset/triplet_image.py
+++ /dev/null
@@ -1,210 +0,0 @@
-#!/usr/bin/env python
-# vim: set fileencoding=utf-8 :
-# @author: Tiago de Freitas Pereira <tiago.pereira@idiap.ch>
-
-import tensorflow as tf
-from functools import partial
-from . import append_image_augmentation, triplets_random_generator, from_filename_to_tensor
-
-
-def shuffle_data_and_labels_image_augmentation(filenames,
-                                               labels,
-                                               data_shape,
-                                               data_type,
-                                               batch_size,
-                                               epochs=None,
-                                               buffer_size=10**3,
-                                               gray_scale=False,
-                                               output_shape=None,
-                                               random_flip=False,
-                                               random_brightness=False,
-                                               random_contrast=False,
-                                               random_saturation=False,
-                                               random_rotate=False,
-                                               per_image_normalization=True,
-                                               extension=None):
-    """
-    Dump random batches for triplee networks from a list of image paths and labels:
-
-    The list of files and labels should be in the same order e.g.
-    filenames = ['class_1_img1', 'class_1_img2', 'class_2_img1']
-    labels = [0, 0, 1]
-
-    The batches returned with tf.Session.run() with be in the following format:
-    **data** a dictionary containing the keys ['anchor', 'positive', 'negative'].
-
-
-    **Parameters**
-
-       filenames:
-          List containing the path of the images
-
-       labels:
-          List containing the labels (needs to be in EXACT same order as filenames)
-
-       data_shape:
-          Samples shape saved in the tf-record
-
-       data_type:
-          tf data type(https://www.tensorflow.org/versions/r0.12/resources/dims_types#data_types)
-
-       batch_size:
-          Size of the batch
-
-       epochs:
-           Number of epochs to be batched
-
-       buffer_size:
-            Size of the shuffle bucket
-
-       gray_scale:
-          Convert to gray scale?
-
-       output_shape:
-          If set, will randomly crop the image given the output shape
-
-       random_flip:
-          Randomly flip an image horizontally  (https://www.tensorflow.org/api_docs/python/tf/image/random_flip_left_right)
-
-       random_brightness:
-           Adjust the brightness of an RGB image by a random factor (https://www.tensorflow.org/api_docs/python/tf/image/random_brightness)
-
-       random_contrast:
-           Adjust the contrast of an RGB image by a random factor (https://www.tensorflow.org/api_docs/python/tf/image/random_contrast)
-
-       random_saturation:
-           Adjust the saturation of an RGB image by a random factor (https://www.tensorflow.org/api_docs/python/tf/image/random_saturation)
-
-       random_rotate:
-           Randomly rotate face images between -5 and 5 degrees
-
-       per_image_normalization:
-           Linearly scales image to have zero mean and unit norm.
-
-       extension:
-           If None, will load files using `tf.image.decode..` if set to `hdf5`, will load with `bob.io.base.load`
-
-    """
-
-    dataset = create_dataset_from_path_augmentation(
-        filenames,
-        labels,
-        data_shape,
-        data_type,
-        gray_scale=gray_scale,
-        output_shape=output_shape,
-        random_flip=random_flip,
-        random_brightness=random_brightness,
-        random_contrast=random_contrast,
-        random_saturation=random_saturation,
-        random_rotate=random_rotate,
-        per_image_normalization=per_image_normalization,
-        extension=extension)
-
-    dataset = dataset.shuffle(buffer_size).batch(batch_size).repeat(epochs)
-    #dataset = dataset.batch(buffer_size).batch(batch_size).repeat(epochs)
-
-    data = dataset.make_one_shot_iterator().get_next()
-    return data
-
-
-def create_dataset_from_path_augmentation(filenames,
-                                          labels,
-                                          data_shape,
-                                          data_type=tf.float32,
-                                          gray_scale=False,
-                                          output_shape=None,
-                                          random_flip=False,
-                                          random_brightness=False,
-                                          random_contrast=False,
-                                          random_saturation=False,
-                                          random_rotate=False,
-                                          per_image_normalization=True,
-                                          extension=None):
-    """
-    Create dataset from a list of tf-record files
-
-    **Parameters**
-
-       filenames:
-          List containing the path of the images
-
-       labels:
-          List containing the labels (needs to be in EXACT same order as filenames)
-
-       data_shape:
-          Samples shape saved in the tf-record
-
-       data_type:
-          tf data type(https://www.tensorflow.org/versions/r0.12/resources/dims_types#data_types)
-
-       feature:
-
-    """
-
-    parser = partial(
-        image_augmentation_parser,
-        data_shape=data_shape,
-        data_type=data_type,
-        gray_scale=gray_scale,
-        output_shape=output_shape,
-        random_flip=random_flip,
-        random_brightness=random_brightness,
-        random_contrast=random_contrast,
-        random_saturation=random_saturation,
-        random_rotate=random_rotate,
-        per_image_normalization=per_image_normalization,
-        extension=extension)
-
-    anchor_data, positive_data, negative_data = triplets_random_generator(
-        filenames, labels)
-
-    dataset = tf.data.Dataset.from_tensor_slices((anchor_data, positive_data,
-                                                  negative_data))
-    dataset = dataset.map(parser)
-    return dataset
-
-
-def image_augmentation_parser(anchor,
-                              positive,
-                              negative,
-                              data_shape,
-                              data_type=tf.float32,
-                              gray_scale=False,
-                              output_shape=None,
-                              random_flip=False,
-                              random_brightness=False,
-                              random_contrast=False,
-                              random_saturation=False,
-                              random_rotate=False,
-                              per_image_normalization=True,
-                              extension=None):
-    """
-    Parses a single tf.Example into image and label tensors.
-    """
-
-    triplet = dict()
-    for n, v in zip(['anchor', 'positive', 'negative'],
-                    [anchor, positive, negative]):
-
-        # Convert the image data from string back to the numbers
-        image = from_filename_to_tensor(v, extension=extension)
-
-        # Reshape image data into the original shape
-        image = tf.reshape(image, data_shape)
-
-        # Applying image augmentation
-        image = append_image_augmentation(
-            image,
-            gray_scale=gray_scale,
-            output_shape=output_shape,
-            random_flip=random_flip,
-            random_brightness=random_brightness,
-            random_contrast=random_contrast,
-            random_saturation=random_saturation,
-            random_rotate=random_rotate,
-            per_image_normalization=per_image_normalization)
-
-        triplet[n] = image
-
-    return triplet
diff --git a/bob/learn/tensorflow/estimators/EPSC.py b/bob/learn/tensorflow/estimators/EPSC.py
deleted file mode 100644
index 8665830bd1ccf73808892853c1e3bee7f360f74b..0000000000000000000000000000000000000000
--- a/bob/learn/tensorflow/estimators/EPSC.py
+++ /dev/null
@@ -1,489 +0,0 @@
-# vim: set fileencoding=utf-8 :
-# @author: Amir Mohammadi <amir.mohammadi@idiap.ch>
-
-from . import check_features, get_trainable_variables
-from .Logits import moving_average_scaffold
-from ..network.utils import append_logits
-from ..utils import predict_using_tensors
-from ..loss.epsc import epsc_metric, siamese_loss
-from tensorflow.python.estimator import estimator
-import tensorflow as tf
-import logging
-
-logger = logging.getLogger(__name__)
-
-
-class EPSCBase:
-    """A base class for EPSC based estimators"""
-
-    def _get_loss(self, bio_logits, pad_logits, bio_labels, pad_labels, mode):
-        main_loss = self.loss_op(
-            bio_logits=bio_logits,
-            pad_logits=pad_logits,
-            bio_labels=bio_labels,
-            pad_labels=pad_labels,
-        )
-        total_loss = main_loss
-
-        if self.add_regularization_losses:
-
-            regularization_losses = tf.get_collection(
-                tf.GraphKeys.REGULARIZATION_LOSSES
-            )
-            regularization_losses = [
-                tf.cast(l, main_loss.dtype) for l in regularization_losses
-            ]
-
-            regularization_losses = tf.add_n(
-                regularization_losses, name="regularization_losses"
-            )
-            tf.summary.scalar("regularization_losses", regularization_losses)
-
-            total_loss = tf.add_n([main_loss, regularization_losses], name="total_loss")
-
-        if self.vat_loss is not None:
-            vat_loss = self.vat_loss(
-                self.end_points["features"],
-                self.end_points["Logits/PAD"],
-                self.pad_architecture,
-                mode,
-            )
-            total_loss = tf.add_n([main_loss, vat_loss], name="total_loss")
-
-        return total_loss
-
-
-class EPSCLogits(EPSCBase, estimator.Estimator):
-    """An logits estimator for epsc problems"""
-
-    def __init__(
-        self,
-        architecture,
-        optimizer,
-        loss_op,
-        n_classes,
-        config=None,
-        embedding_validation=False,
-        model_dir="",
-        validation_batch_size=None,
-        extra_checkpoint=None,
-        apply_moving_averages=True,
-        add_histograms="train",
-        add_regularization_losses=True,
-        vat_loss=None,
-        optimize_loss=tf.contrib.layers.optimize_loss,
-        optimize_loss_learning_rate=None,
-    ):
-
-        self.architecture = architecture
-        self.n_classes = n_classes
-        self.loss_op = loss_op
-        self.loss = None
-        self.embedding_validation = embedding_validation
-        self.extra_checkpoint = extra_checkpoint
-        self.add_regularization_losses = add_regularization_losses
-        self.apply_moving_averages = apply_moving_averages
-        self.vat_loss = vat_loss
-        self.optimize_loss = optimize_loss
-        self.optimize_loss_learning_rate = optimize_loss_learning_rate
-
-        if apply_moving_averages and isinstance(optimizer, tf.train.Optimizer):
-            logger.info(
-                "Encapsulating the optimizer with " "the MovingAverageOptimizer"
-            )
-            optimizer = tf.contrib.opt.MovingAverageOptimizer(optimizer)
-
-        self.optimizer = optimizer
-
-        def _model_fn(features, labels, mode):
-
-            check_features(features)
-            data = features["data"]
-            key = features["key"]
-
-            # Checking if we have some variables/scope that we may want to shut
-            # down
-            trainable_variables = get_trainable_variables(
-                self.extra_checkpoint, mode=mode
-            )
-            prelogits, end_points = self.architecture(
-                data, mode=mode, trainable_variables=trainable_variables
-            )
-
-            name = "Logits/Bio"
-            bio_logits = append_logits(
-                prelogits, n_classes, trainable_variables=trainable_variables, name=name
-            )
-            end_points[name] = bio_logits
-
-            name = "Logits/PAD"
-            pad_logits = append_logits(
-                prelogits, 2, trainable_variables=trainable_variables, name=name
-            )
-            end_points[name] = pad_logits
-
-            self.end_points = end_points
-
-            # for vat_loss
-            self.end_points["features"] = data
-
-            def pad_architecture(features, mode, reuse):
-                prelogits, end_points = self.architecture(
-                    features,
-                    mode=mode,
-                    trainable_variables=trainable_variables,
-                    reuse=reuse,
-                )
-                pad_logits = append_logits(
-                    prelogits,
-                    2,
-                    reuse=reuse,
-                    trainable_variables=trainable_variables,
-                    name="Logits/PAD",
-                )
-                return pad_logits, end_points
-
-            self.pad_architecture = pad_architecture
-
-            if self.embedding_validation and mode != tf.estimator.ModeKeys.TRAIN:
-
-                # Compute the embeddings
-                embeddings = tf.nn.l2_normalize(prelogits, 1)
-                predictions = {"embeddings": embeddings}
-            else:
-                predictions = {
-                    # Generate predictions (for PREDICT and EVAL mode)
-                    "bio_classes": tf.argmax(input=bio_logits, axis=1),
-                    # Add `softmax_tensor` to the graph. It is used for PREDICT
-                    # and by the `logging_hook`.
-                    "bio_probabilities": tf.nn.softmax(
-                        bio_logits, name="bio_softmax_tensor"
-                    ),
-                }
-
-            predictions.update(
-                {
-                    "pad_classes": tf.argmax(input=pad_logits, axis=1),
-                    "pad_probabilities": tf.nn.softmax(
-                        pad_logits, name="pad_softmax_tensor"
-                    ),
-                    "key": key,
-                }
-            )
-
-            # add predictions to end_points
-            self.end_points.update(predictions)
-
-            if mode == tf.estimator.ModeKeys.PREDICT:
-                return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)
-
-            bio_labels = labels["bio"]
-            pad_labels = labels["pad"]
-
-            if self.embedding_validation and mode != tf.estimator.ModeKeys.TRAIN:
-                bio_predictions_op = predict_using_tensors(
-                    predictions["embeddings"], bio_labels, num=validation_batch_size
-                )
-            else:
-                bio_predictions_op = predictions["bio_classes"]
-
-            pad_predictions_op = predictions["pad_classes"]
-
-            metrics = {
-                "bio_accuracy": tf.metrics.accuracy(
-                    labels=bio_labels, predictions=bio_predictions_op
-                ),
-                "pad_accuracy": tf.metrics.accuracy(
-                    labels=pad_labels, predictions=pad_predictions_op
-                ),
-            }
-
-            if mode == tf.estimator.ModeKeys.EVAL:
-                self.loss = self._get_loss(
-                    bio_logits, pad_logits, bio_labels, pad_labels, mode=mode
-                )
-                return tf.estimator.EstimatorSpec(
-                    mode=mode,
-                    predictions=predictions,
-                    loss=self.loss,
-                    train_op=None,
-                    eval_metric_ops=metrics,
-                )
-
-            # restore the model from an extra_checkpoint
-            if self.extra_checkpoint is not None:
-                if "Logits/" not in self.extra_checkpoint["scopes"]:
-                    logger.warning(
-                        '"Logits/" (which are automatically added by this '
-                        "Logits class are not in the scopes of "
-                        "extra_checkpoint). Did you mean to restore the "
-                        "Logits variables as well?"
-                    )
-
-                logger.info(
-                    "Restoring model from %s in scopes %s",
-                    self.extra_checkpoint["checkpoint_path"],
-                    self.extra_checkpoint["scopes"],
-                )
-                tf.train.init_from_checkpoint(
-                    ckpt_dir_or_file=self.extra_checkpoint["checkpoint_path"],
-                    assignment_map=self.extra_checkpoint["scopes"],
-                )
-
-            # Calculate Loss
-            self.loss = self._get_loss(
-                bio_logits, pad_logits, bio_labels, pad_labels, mode=mode
-            )
-
-            # Compute the moving average of all individual losses and the total
-            # loss.
-            loss_averages = tf.train.ExponentialMovingAverage(0.9, name="avg")
-            loss_averages_op = loss_averages.apply(
-                tf.get_collection(tf.GraphKeys.LOSSES)
-            )
-            tf.add_to_collection(tf.GraphKeys.UPDATE_OPS, loss_averages_op)
-
-            with tf.name_scope("train"):
-                train_op = self.optimize_loss(
-                    loss=self.loss,
-                    global_step=tf.train.get_or_create_global_step(),
-                    optimizer=self.optimizer,
-                    learning_rate=self.optimize_loss_learning_rate,
-                )
-
-                # Get the moving average saver after optimizer.minimize is called
-                if self.apply_moving_averages:
-                    self.saver, self.scaffold = moving_average_scaffold(
-                        self.optimizer.optimizer
-                        if hasattr(self.optimizer, "optimizer")
-                        else self.optimizer,
-                        config,
-                    )
-                else:
-                    self.saver, self.scaffold = None, None
-
-                # Log accuracy and loss
-                with tf.name_scope("train_metrics"):
-                    tf.summary.scalar("bio_accuracy", metrics["bio_accuracy"][1])
-                    tf.summary.scalar("pad_accuracy", metrics["pad_accuracy"][1])
-                    for l in tf.get_collection(tf.GraphKeys.LOSSES):
-                        tf.summary.scalar(
-                            l.op.name + "_averaged", loss_averages.average(l)
-                        )
-
-            # add histograms summaries
-            if add_histograms == "all":
-                for v in tf.all_variables():
-                    tf.summary.histogram(v.name, v)
-            elif add_histograms == "train":
-                for v in tf.trainable_variables():
-                    tf.summary.histogram(v.name, v)
-
-            return tf.estimator.EstimatorSpec(
-                mode=mode,
-                predictions=predictions,
-                loss=self.loss,
-                train_op=train_op,
-                eval_metric_ops=metrics,
-                scaffold=self.scaffold,
-            )
-
-        super().__init__(model_fn=_model_fn, model_dir=model_dir, config=config)
-
-
-class EPSCSiamese(EPSCBase, estimator.Estimator):
-    """An siamese estimator for epsc problems"""
-
-    def __init__(
-        self,
-        architecture,
-        optimizer,
-        loss_op=siamese_loss,
-        config=None,
-        model_dir="",
-        validation_batch_size=None,
-        extra_checkpoint=None,
-        apply_moving_averages=True,
-        add_histograms="train",
-        add_regularization_losses=True,
-        vat_loss=None,
-        optimize_loss=tf.contrib.layers.optimize_loss,
-        optimize_loss_learning_rate=None,
-    ):
-
-        self.architecture = architecture
-        self.loss_op = loss_op
-        self.loss = None
-        self.extra_checkpoint = extra_checkpoint
-        self.add_regularization_losses = add_regularization_losses
-        self.apply_moving_averages = apply_moving_averages
-        self.vat_loss = vat_loss
-        self.optimize_loss = optimize_loss
-        self.optimize_loss_learning_rate = optimize_loss_learning_rate
-
-        if self.apply_moving_averages and isinstance(optimizer, tf.train.Optimizer):
-            logger.info(
-                "Encapsulating the optimizer with " "the MovingAverageOptimizer"
-            )
-            optimizer = tf.contrib.opt.MovingAverageOptimizer(optimizer)
-
-        self.optimizer = optimizer
-
-        def _model_fn(features, labels, mode):
-
-            if mode != tf.estimator.ModeKeys.TRAIN:
-                check_features(features)
-                data = features["data"]
-                key = features["key"]
-            else:
-                if "left" not in features or "right" not in features:
-                    raise ValueError(
-                        "The input features needs to be a dictionary "
-                        "with the keys `left` and `right`"
-                    )
-                data_right = features["right"]["data"]
-                labels_right = labels["right"]
-                data = features["left"]["data"]
-                labels = labels_left = labels["left"]
-
-            # Checking if we have some variables/scope that we may want to shut
-            # down
-            trainable_variables = get_trainable_variables(
-                self.extra_checkpoint, mode=mode
-            )
-
-            prelogits, end_points = self.architecture(
-                data, mode=mode, trainable_variables=trainable_variables
-            )
-
-            self.end_points = end_points
-
-            predictions = dict(
-                bio_embeddings=tf.nn.l2_normalize(prelogits, 1),
-                pad_probabilities=tf.math.exp(-tf.norm(prelogits, ord=2, axis=-1)),
-            )
-
-            if mode == tf.estimator.ModeKeys.PREDICT:
-                predictions["key"] = key
-
-            # add predictions to end_points
-            self.end_points.update(predictions)
-
-            if mode == tf.estimator.ModeKeys.PREDICT:
-                return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)
-
-            metrics = None
-            if mode != tf.estimator.ModeKeys.TRAIN:
-                assert validation_batch_size is not None
-                bio_labels = labels["bio"]
-                pad_labels = labels["pad"]
-
-                metrics = epsc_metric(
-                    predictions["bio_embeddings"],
-                    predictions["pad_probabilities"],
-                    bio_labels,
-                    pad_labels,
-                    validation_batch_size,
-                )
-
-            if mode == tf.estimator.ModeKeys.EVAL:
-                self.loss = tf.reduce_mean(0)
-                return tf.estimator.EstimatorSpec(
-                    mode=mode,
-                    predictions=predictions,
-                    loss=self.loss,
-                    train_op=None,
-                    eval_metric_ops=metrics,
-                )
-
-            # now that we are in TRAIN mode, build the right graph too
-            prelogits_left = prelogits
-            prelogits_right, _ = self.architecture(
-                data_right,
-                mode=mode,
-                reuse=True,
-                trainable_variables=trainable_variables,
-            )
-
-            bio_logits = {"left": prelogits_left, "right": prelogits_right}
-            pad_logits = bio_logits
-
-            bio_labels = {"left": labels_left["bio"], "right": labels_right["bio"]}
-
-            pad_labels = {"left": labels_left["pad"], "right": labels_right["pad"]}
-
-            # restore the model from an extra_checkpoint
-            if self.extra_checkpoint is not None:
-                logger.info(
-                    "Restoring model from %s in scopes %s",
-                    self.extra_checkpoint["checkpoint_path"],
-                    self.extra_checkpoint["scopes"],
-                )
-                tf.train.init_from_checkpoint(
-                    ckpt_dir_or_file=self.extra_checkpoint["checkpoint_path"],
-                    assignment_map=self.extra_checkpoint["scopes"],
-                )
-
-            global_step = tf.train.get_or_create_global_step()
-
-            # Some layer like tf.layers.batch_norm need this:
-            update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
-
-            with tf.control_dependencies(update_ops), tf.name_scope("train"):
-
-                # Calculate Loss
-                self.loss = self._get_loss(
-                    bio_logits, pad_logits, bio_labels, pad_labels, mode=mode
-                )
-
-                # Compute the moving average of all individual losses
-                # and the total loss.
-                loss_averages = tf.train.ExponentialMovingAverage(0.9, name="avg")
-                loss_averages_op = loss_averages.apply(
-                    tf.get_collection(tf.GraphKeys.LOSSES)
-                )
-
-                train_op = tf.group(
-                    self.optimize_loss(
-                        loss=self.loss,
-                        global_step=tf.train.get_or_create_global_step(),
-                        optimizer=self.optimizer,
-                        learning_rate=self.optimize_loss_learning_rate,
-                    ),
-                    loss_averages_op,
-                )
-
-                # Get the moving average saver after optimizer.minimize is called
-                if apply_moving_averages:
-                    self.saver, self.scaffold = moving_average_scaffold(
-                        self.optimizer.optimizer
-                        if hasattr(self.optimizer, "optimizer")
-                        else self.optimizer,
-                        config,
-                    )
-                else:
-                    self.saver, self.scaffold = None, None
-
-            # Log moving average of losses
-            with tf.name_scope("train_metrics"):
-                for l in tf.get_collection(tf.GraphKeys.LOSSES):
-                    tf.summary.scalar(l.op.name + "_averaged", loss_averages.average(l))
-
-            # add histograms summaries
-            if add_histograms == "all":
-                for v in tf.all_variables():
-                    tf.summary.histogram(v.name, v)
-            elif add_histograms == "train":
-                for v in tf.trainable_variables():
-                    tf.summary.histogram(v.name, v)
-
-            return tf.estimator.EstimatorSpec(
-                mode=mode,
-                predictions=predictions,
-                loss=self.loss,
-                train_op=train_op,
-                eval_metric_ops=metrics,
-                scaffold=self.scaffold,
-            )
-
-        super().__init__(model_fn=_model_fn, model_dir=model_dir, config=config)
diff --git a/bob/learn/tensorflow/estimators/Logits.py b/bob/learn/tensorflow/estimators/Logits.py
deleted file mode 100755
index 7a7570bf4a935533900573eb9cb5d31d55061eb6..0000000000000000000000000000000000000000
--- a/bob/learn/tensorflow/estimators/Logits.py
+++ /dev/null
@@ -1,545 +0,0 @@
-#!/usr/bin/env python
-# vim: set fileencoding=utf-8 :
-# @author: Tiago de Freitas Pereira <tiago.pereira@idiap.ch>
-
-import tensorflow as tf
-from bob.learn.tensorflow.network.utils import append_logits
-from tensorflow.python.estimator import estimator
-from bob.learn.tensorflow.utils import predict_using_tensors
-from bob.learn.tensorflow.loss import (
-    mean_cross_entropy_center_loss,
-    balanced_softmax_cross_entropy_loss_weights,
-    balanced_sigmoid_cross_entropy_loss_weights,
-)
-
-from . import check_features, get_trainable_variables
-
-import logging
-
-logger = logging.getLogger(__name__)
-
-
-class Logits(estimator.Estimator):
-    """Logits estimator.
-
-    NN estimator with `Cross entropy loss
-    <https://www.tensorflow.org/api_docs/python/tf/nn/softmax_cross_entropy_with_logits>`_
-    in the hot-encoded layer
-    :py:class:`bob.learn.tensorflow.estimators.Logits`.
-
-    The architecture function should follow the following pattern::
-
-      def my_beautiful_architecture(placeholder, **kwargs):
-
-        end_points = dict()
-        graph = convXX(placeholder)
-        end_points['conv'] = graph
-
-      return graph, end_points
-
-
-    The **loss** function should follow the following pattern::
-
-      def my_beautiful_loss(logits, labels, **kwargs):
-        return loss_set_of_ops(logits, labels)
-
-
-    Attributes
-    ----------
-
-      architecture:
-         Pointer to a function that builds the graph.
-
-      optimizer:
-         One of the tensorflow solvers
-
-      config:
-
-      n_classes:
-         Number of classes of your problem. The logits will be appended in this
-         class
-
-      loss_op:
-         Pointer to a function that computes the loss.
-
-      embedding_validation:
-         Run the validation using embeddings?? [default: False]
-
-      model_dir:
-        Model path
-
-      validation_batch_size:
-        Size of the batch for validation. This value is used when the
-        validation with embeddings is used. This is a hack.
-
-      params:
-        Extra params for the model function (please see
-        https://www.tensorflow.org/extend/estimators for more info)
-
-      extra_checkpoint: dict
-        In case you want to use other model to initialize some variables.
-        This argument should be in the following format::
-
-          extra_checkpoint = {
-            "checkpoint_path": <YOUR_CHECKPOINT>,
-            "scopes": dict({"<SOURCE_SCOPE>/": "<TARGET_SCOPE>/"}),
-            "trainable_variables": [<LIST OF VARIABLES OR SCOPES THAT YOU WANT TO RETRAIN>]
-          }
-
-      apply_moving_averages: bool
-        Apply exponential moving average in the training variables and in the loss.
-        https://www.tensorflow.org/api_docs/python/tf/train/ExponentialMovingAverage
-        By default the decay for the variable averages is 0.9999 and for the loss is 0.9
-    """
-
-    def __init__(
-        self,
-        architecture,
-        optimizer,
-        loss_op,
-        n_classes,
-        config=None,
-        embedding_validation=False,
-        model_dir="",
-        validation_batch_size=None,
-        params=None,
-        extra_checkpoint=None,
-        apply_moving_averages=True,
-        add_histograms=None,
-        vat_loss=None,
-        architecture_has_logits=False,
-        balanced_loss_weight=False,
-        use_sigmoid=False,
-        labels_are_one_hot=False,
-        optimize_loss=tf.contrib.layers.optimize_loss,
-        optimize_loss_learning_rate=None,
-    ):
-
-        self.architecture = architecture
-        self.n_classes = n_classes
-        self.loss_op = loss_op
-        self.loss = None
-        self.embedding_validation = embedding_validation
-        self.extra_checkpoint = extra_checkpoint
-        self.apply_moving_averages = apply_moving_averages
-
-        if self.apply_moving_averages and isinstance(optimizer, tf.train.Optimizer):
-            logger.info("Encapsulating the optimizer with the MovingAverageOptimizer")
-            optimizer = tf.contrib.opt.MovingAverageOptimizer(optimizer)
-
-        self.optimizer = optimizer
-        self.vat_loss = vat_loss
-        self.balanced_loss_weight = balanced_loss_weight
-        self.use_sigmoid = use_sigmoid
-        self.labels_are_one_hot = labels_are_one_hot
-        self.optimize_loss = optimize_loss
-        self.optimize_loss_learning_rate = optimize_loss_learning_rate
-
-        if self.n_classes < 3 and not self.use_sigmoid:
-            logger.error(
-                "n_classes is less than 3 and use_sigmoid is False. Are you sure? "
-                "It's better to use sigmoid activation on your logits for binary "
-                "classification tasks. Don't forget to change your loss to a sigmoid "
-                "loss too! Change n_classes to 1 too if not already."
-            )
-
-        def _model_fn(features, labels, mode, config):
-
-            check_features(features)
-            data = features["data"]
-            key = features["key"]
-
-            # Checking if we have some variables/scope that we may want to shut
-            # down
-            trainable_variables = get_trainable_variables(
-                self.extra_checkpoint, mode=mode
-            )
-            prelogits, end_points = self.architecture(
-                data, mode=mode, trainable_variables=trainable_variables
-            )
-            if architecture_has_logits:
-                logits, prelogits = prelogits, end_points["prelogits"]
-            else:
-                logits = append_logits(
-                    prelogits, n_classes, trainable_variables=trainable_variables
-                )
-
-            # for vat_loss
-            def whole_architecture(features, mode, reuse):
-                prelogits, end_points = self.architecture(
-                    features,
-                    mode=mode,
-                    trainable_variables=trainable_variables,
-                    reuse=reuse,
-                )
-                logits = append_logits(
-                    prelogits,
-                    n_classes,
-                    reuse=reuse,
-                    trainable_variables=trainable_variables,
-                )
-                return logits, end_points
-
-            if architecture_has_logits:
-                whole_architecture = self.architecture
-
-            if self.embedding_validation and mode != tf.estimator.ModeKeys.TRAIN:
-
-                # Compute the embeddings
-                embeddings = tf.nn.l2_normalize(prelogits, 1)
-                predictions = {"embeddings": embeddings, "key": key}
-            else:
-                predictions = {
-                    # Generate predictions (for PREDICT and EVAL mode)
-                    "classes": tf.argmax(input=logits, axis=1)
-                    if self.n_classes > 1
-                    else logits >= 0.5,
-                    # Add `softmax_tensor` to the graph. It is used for PREDICT
-                    # and by the `logging_hook`.
-                    "probabilities": tf.nn.sigmoid(logits, name="sigmoid_tensor")
-                    if self.use_sigmoid
-                    else tf.nn.softmax(logits, name="softmax_tensor"),
-                    "key": key,
-                }
-
-            if mode == tf.estimator.ModeKeys.PREDICT:
-                return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)
-
-            if self.embedding_validation and mode != tf.estimator.ModeKeys.TRAIN:
-                predictions_op = predict_using_tensors(
-                    predictions["embeddings"], labels, num=validation_batch_size
-                )
-            else:
-                predictions_op = predictions["classes"]
-
-            accuracy = tf.metrics.accuracy(labels=labels, predictions=predictions_op)
-            metrics = {"accuracy": accuracy}
-
-            loss_kwargs = {}
-            loss_kwargs["logits"] = logits
-            if self.balanced_loss_weight:
-                weights_fn = (
-                    balanced_sigmoid_cross_entropy_loss_weights
-                    if self.use_sigmoid
-                    else balanced_softmax_cross_entropy_loss_weights
-                )
-                labels_one_hot = labels
-                if (not self.labels_are_one_hot) and (not self.use_sigmoid):
-                    labels_one_hot = tf.one_hot(labels, depth=self.n_classes)
-                weights = weights_fn(labels_one_hot, logits.dtype)
-                loss_kwargs["weights"] = weights
-
-            if self.use_sigmoid:
-                loss_kwargs["multi_class_labels"] = labels
-            else:
-                loss_kwargs["labels"] = labels
-
-            if mode == tf.estimator.ModeKeys.EVAL:
-                self.loss = self.loss_op(**loss_kwargs)
-                return tf.estimator.EstimatorSpec(
-                    mode=mode,
-                    predictions=predictions,
-                    loss=self.loss,
-                    train_op=None,
-                    eval_metric_ops=metrics,
-                )
-
-            # restore the model from an extra_checkpoint
-            if extra_checkpoint is not None:
-                if (
-                    "Logits/" not in extra_checkpoint["scopes"]
-                    and not architecture_has_logits
-                ):
-                    logger.warning(
-                        '"Logits/" (which are automatically added by this '
-                        "Logits class are not in the scopes of "
-                        "extra_checkpoint). Did you mean to restore the "
-                        "Logits variables as well?"
-                    )
-                tf.train.init_from_checkpoint(
-                    ckpt_dir_or_file=extra_checkpoint["checkpoint_path"],
-                    assignment_map=extra_checkpoint["scopes"],
-                )
-
-            # Calculate Loss
-            self.loss = self.loss_op(**loss_kwargs)
-
-            if self.vat_loss is not None:
-                vat_loss = self.vat_loss(data, logits, whole_architecture, mode)
-                self.loss = tf.add_n([self.loss, vat_loss], name="total_loss_with_vat")
-
-            # Compute the moving average of all individual losses and the total loss.
-            loss_averages = tf.train.ExponentialMovingAverage(0.9, name="avg")
-            loss_averages_op = loss_averages.apply(
-                tf.get_collection(tf.GraphKeys.LOSSES)
-            )
-
-            train_op = tf.group(
-                self.optimize_loss(
-                    loss=self.loss,
-                    global_step=tf.train.get_or_create_global_step(),
-                    optimizer=self.optimizer,
-                    learning_rate=self.optimize_loss_learning_rate,
-                ),
-                loss_averages_op,
-            )
-
-            # Get the moving average saver after optimizer.minimize is called
-            if self.apply_moving_averages:
-                self.saver, self.scaffold = moving_average_scaffold(
-                    self.optimizer.optimizer
-                    if hasattr(self.optimizer, "optimizer")
-                    else self.optimizer,
-                    config,
-                )
-            else:
-                self.saver, self.scaffold = None, None
-
-            # Log accuracy and loss
-            with tf.name_scope("train_metrics"):
-                tf.summary.scalar("accuracy", accuracy[1])
-                for l in tf.get_collection(tf.GraphKeys.LOSSES):
-                    tf.summary.scalar(l.op.name + "_averaged", loss_averages.average(l))
-
-            # add histograms summaries
-            if add_histograms == "all":
-                for v in tf.all_variables():
-                    tf.summary.histogram(v.name, v)
-            elif add_histograms == "train":
-                for v in tf.trainable_variables():
-                    tf.summary.histogram(v.name, v)
-
-            return tf.estimator.EstimatorSpec(
-                mode=mode,
-                predictions=predictions,
-                loss=self.loss,
-                train_op=train_op,
-                eval_metric_ops=metrics,
-                scaffold=self.scaffold,
-            )
-
-        super(Logits, self).__init__(
-            model_fn=_model_fn, model_dir=model_dir, params=params, config=config
-        )
-
-
-class LogitsCenterLoss(estimator.Estimator):
-    """Logits estimator with center loss.
-
-    NN estimator with `Cross entropy loss
-    <https://www.tensorflow.org/api_docs/python/tf/nn/softmax_cross_entropy_with_logits>`_
-    in the hot-encoded layer :py:class:`bob.learn.tensorflow.estimators.Logits`
-    plus the center loss implemented in: "Wen, Yandong, et al. "A
-    discriminative feature learning approach for deep face recognition."
-    European Conference on Computer Vision. Springer, Cham, 2016."
-
-    See :any:`Logits` for the description of parameters.
-    """
-
-    def __init__(
-        self,
-        architecture=None,
-        optimizer=None,
-        config=None,
-        n_classes=0,
-        embedding_validation=False,
-        model_dir="",
-        alpha=0.9,
-        factor=0.01,
-        validation_batch_size=None,
-        params=None,
-        extra_checkpoint=None,
-        apply_moving_averages=True,
-        optimize_loss=tf.contrib.layers.optimize_loss,
-        optimize_loss_learning_rate=None,
-    ):
-
-        self.architecture = architecture
-        self.n_classes = n_classes
-        self.alpha = alpha
-        self.factor = factor
-        self.loss = None
-        self.embedding_validation = embedding_validation
-        self.extra_checkpoint = extra_checkpoint
-        self.optimize_loss = optimize_loss
-        self.optimize_loss_learning_rate = optimize_loss_learning_rate
-        self.apply_moving_averages = apply_moving_averages
-
-        if self.architecture is None:
-            raise ValueError("Please specify a function to build the architecture !!")
-
-        if optimizer is None:
-            raise ValueError(
-                "Please specify a optimizer (https://www.tensorflow.org/"
-                "api_guides/python/train) !!"
-            )
-
-        if self.n_classes <= 0:
-            raise ValueError("Number of classes must be greated than 0")
-
-        if self.apply_moving_averages and isinstance(optimizer, tf.train.Optimizer):
-            logger.info("Encapsulating the optimizer with the MovingAverageOptimizer")
-            optimizer = tf.contrib.opt.MovingAverageOptimizer(optimizer)
-        self.optimizer = optimizer
-
-        def _model_fn(features, labels, mode, config):
-
-            check_features(features)
-            data = features["data"]
-            key = features["key"]
-
-            # Configure the Training Op (for TRAIN mode)
-            if mode == tf.estimator.ModeKeys.TRAIN:
-                # Building the training graph
-
-                # Checking if we have some variables/scope that we may want to shut down
-                trainable_variables = get_trainable_variables(self.extra_checkpoint)
-                prelogits = self.architecture(
-                    data, mode=mode, trainable_variables=trainable_variables
-                )[0]
-                logits = append_logits(prelogits, n_classes)
-
-                global_step = tf.train.get_or_create_global_step()
-
-                # Compute the moving average of all individual losses and the total loss.
-                if self.apply_moving_averages:
-                    variable_averages = tf.train.ExponentialMovingAverage(
-                        0.9999, global_step
-                    )
-                    variable_averages_op = variable_averages.apply(
-                        tf.trainable_variables()
-                    )
-                else:
-                    variable_averages_op = tf.no_op(name="noop")
-
-                update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
-
-                # Compute Loss (for TRAIN mode)
-                loss_dict = mean_cross_entropy_center_loss(
-                    logits,
-                    prelogits,
-                    labels,
-                    self.n_classes,
-                    alpha=self.alpha,
-                    factor=self.factor,
-                )
-
-                self.loss = loss_dict["loss"]
-                centers = loss_dict["centers"]
-
-                # Compute the moving average of all individual losses and the total loss.
-                loss_averages = tf.train.ExponentialMovingAverage(0.9, name="avg")
-                loss_averages_op = loss_averages.apply(
-                    tf.get_collection(tf.GraphKeys.LOSSES)
-                )
-
-                for l in tf.get_collection(tf.GraphKeys.LOSSES):
-                    tf.summary.scalar(l.op.name, loss_averages.average(l))
-
-                if self.extra_checkpoint is not None:
-                    tf.contrib.framework.init_from_checkpoint(
-                        self.extra_checkpoint["checkpoint_path"],
-                        self.extra_checkpoint["scopes"],
-                    )
-
-                train_op = self.optimize_loss(
-                    self.loss,
-                    global_step,
-                    self.optimize_loss_learning_rate,
-                    optimizer,
-                    update_ops=[variable_averages_op] + update_ops,
-                )
-
-                # Get the moving average saver after optimizer.minimize is called
-                if self.apply_moving_averages:
-                    self.saver, self.scaffold = moving_average_scaffold(
-                        self.optimizer.optimizer
-                        if hasattr(self.optimizer, "optimizer")
-                        else self.optimizer,
-                        config,
-                    )
-                else:
-                    self.saver, self.scaffold = None, None
-
-                train_op = tf.group(
-                    train_op, centers, variable_averages_op, loss_averages_op
-                )
-                return tf.estimator.EstimatorSpec(
-                    mode=mode, loss=self.loss, train_op=train_op
-                )
-
-            # Building the training graph for PREDICTION OR VALIDATION
-            prelogits, end_points = self.architecture(data, mode=mode)
-            logits = append_logits(prelogits, n_classes)
-            end_points["logits"] = logits
-
-            if self.embedding_validation:
-                # Compute the embeddings
-                embeddings = tf.nn.l2_normalize(prelogits, 1)
-                predictions = {"embeddings": embeddings, "key": key}
-            else:
-                predictions = {
-                    # Generate predictions (for PREDICT and EVAL mode)
-                    "classes": tf.argmax(input=logits, axis=1),
-                    # Add `softmax_tensor` to the graph. It is used for PREDICT and by the
-                    # `logging_hook`.
-                    "probabilities": tf.nn.softmax(logits, name="softmax_tensor"),
-                    "key": key,
-                }
-            end_points.update(predictions)
-            self.end_points = end_points
-
-            if mode == tf.estimator.ModeKeys.PREDICT:
-                return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)
-
-            # IF Validation
-            loss_dict = mean_cross_entropy_center_loss(
-                logits,
-                prelogits,
-                labels,
-                self.n_classes,
-                alpha=self.alpha,
-                factor=self.factor,
-            )
-            self.loss = loss_dict["loss"]
-
-            if self.embedding_validation:
-                predictions_op = predict_using_tensors(
-                    predictions["embeddings"], labels, num=validation_batch_size
-                )
-                eval_metric_ops = {
-                    "accuracy": tf.metrics.accuracy(
-                        labels=labels, predictions=predictions_op
-                    )
-                }
-                return tf.estimator.EstimatorSpec(
-                    mode=mode, loss=self.loss, eval_metric_ops=eval_metric_ops
-                )
-
-            else:
-                # Add evaluation metrics (for EVAL mode)
-                eval_metric_ops = {
-                    "accuracy": tf.metrics.accuracy(
-                        labels=labels, predictions=predictions["classes"]
-                    )
-                }
-                return tf.estimator.EstimatorSpec(
-                    mode=mode, loss=self.loss, eval_metric_ops=eval_metric_ops
-                )
-
-        super(LogitsCenterLoss, self).__init__(
-            model_fn=_model_fn, model_dir=model_dir, config=config
-        )
-
-
-def moving_average_scaffold(optimizer, config):
-    max_to_keep = 5 if config is None else config.keep_checkpoint_max
-    keep_checkpoint_every_n_hours = (
-        10000.0 if config is None else config.keep_checkpoint_every_n_hours
-    )
-    saver = optimizer.swapping_saver(
-        max_to_keep=max_to_keep,
-        keep_checkpoint_every_n_hours=keep_checkpoint_every_n_hours,
-    )
-    scaffold = tf.train.Scaffold(saver=saver)
-    return saver, scaffold
diff --git a/bob/learn/tensorflow/estimators/Regressor.py b/bob/learn/tensorflow/estimators/Regressor.py
deleted file mode 100644
index 7788def1d969e10270eff18650ef23813e8faa6e..0000000000000000000000000000000000000000
--- a/bob/learn/tensorflow/estimators/Regressor.py
+++ /dev/null
@@ -1,171 +0,0 @@
-from . import check_features, get_trainable_variables
-from .Logits import moving_average_scaffold
-from bob.learn.tensorflow.network.utils import append_logits
-from tensorflow.python.estimator import estimator
-import tensorflow as tf
-import logging
-
-logger = logging.getLogger(__name__)
-
-
-class Regressor(estimator.Estimator):
-    """An estimator for regression problems"""
-
-    def __init__(
-        self,
-        architecture,
-        optimizer=tf.train.AdamOptimizer(),
-        loss_op=tf.losses.mean_squared_error,
-        label_dimension=1,
-        config=None,
-        model_dir=None,
-        apply_moving_averages=True,
-        add_regularization_losses=True,
-        extra_checkpoint=None,
-        add_histograms=None,
-        optimize_loss=tf.contrib.layers.optimize_loss,
-        optimize_loss_learning_rate=None,
-        architecture_has_logits=False,
-    ):
-        self.architecture = architecture
-        self.label_dimension = label_dimension
-        self.loss_op = loss_op
-        self.add_regularization_losses = add_regularization_losses
-        self.apply_moving_averages = apply_moving_averages
-
-        if self.apply_moving_averages and isinstance(optimizer, tf.train.Optimizer):
-            logger.info(
-                "Encapsulating the optimizer with " "the MovingAverageOptimizer"
-            )
-            optimizer = tf.contrib.opt.MovingAverageOptimizer(optimizer)
-
-        self.optimizer = optimizer
-        self.optimize_loss = optimize_loss
-        self.optimize_loss_learning_rate = optimize_loss_learning_rate
-
-        def _model_fn(features, labels, mode, config):
-
-            check_features(features)
-            data = features["data"]
-            key = features["key"]
-
-            # Checking if we have some variables/scope that we may want to shut
-            # down
-            trainable_variables = get_trainable_variables(extra_checkpoint, mode=mode)
-            prelogits, end_points = self.architecture(
-                data, mode=mode, trainable_variables=trainable_variables
-            )
-            if architecture_has_logits:
-                logits, prelogits = prelogits, end_points["prelogits"]
-            else:
-                logits = append_logits(
-                    prelogits, label_dimension, trainable_variables=trainable_variables
-                )
-
-            predictions = {"predictions": logits, "key": key}
-
-            if mode == tf.estimator.ModeKeys.PREDICT:
-                return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)
-
-            # in PREDICT mode logits rank must be 2 but in EVAL and TRAIN the
-            # rank should be 1 for the loss function!
-            predictions["predictions"] = tf.squeeze(logits)
-
-            predictions_op = predictions["predictions"]
-
-            # Calculate root mean squared error
-            rmse = tf.metrics.root_mean_squared_error(labels, predictions_op)
-            metrics = {"rmse": rmse}
-
-            if mode == tf.estimator.ModeKeys.EVAL:
-                self.loss = self._get_loss(predictions=predictions_op, labels=labels)
-                return tf.estimator.EstimatorSpec(
-                    mode=mode,
-                    predictions=predictions,
-                    loss=self.loss,
-                    train_op=None,
-                    eval_metric_ops=metrics,
-                )
-
-            # restore the model from an extra_checkpoint
-            if extra_checkpoint is not None:
-                if "Logits/" not in extra_checkpoint["scopes"]:
-                    logger.warning(
-                        '"Logits/" (which are automatically added by this '
-                        "Regressor class are not in the scopes of "
-                        "extra_checkpoint). Did you mean to restore the "
-                        "Logits variables as well?"
-                    )
-                tf.train.init_from_checkpoint(
-                    ckpt_dir_or_file=extra_checkpoint["checkpoint_path"],
-                    assignment_map=extra_checkpoint["scopes"],
-                )
-
-            # Calculate Loss
-            self.loss = self._get_loss(predictions=predictions_op, labels=labels)
-
-            # Compute the moving average of all individual losses
-            # and the total loss.
-            loss_averages = tf.train.ExponentialMovingAverage(0.9, name="avg")
-            loss_averages_op = loss_averages.apply(
-                tf.get_collection(tf.GraphKeys.LOSSES)
-            )
-
-            train_op = tf.group(
-                self.optimize_loss(
-                    loss=self.loss,
-                    global_step=tf.train.get_or_create_global_step(),
-                    optimizer=self.optimizer,
-                    learning_rate=self.optimize_loss_learning_rate,
-                ),
-                loss_averages_op,
-            )
-
-            # Get the moving average saver after optimizer.minimize is called
-            if self.apply_moving_averages:
-                self.saver, self.scaffold = moving_average_scaffold(
-                    self.optimizer.optimizer
-                    if hasattr(self.optimizer, "optimizer")
-                    else self.optimizer,
-                    config,
-                )
-            else:
-                self.saver, self.scaffold = None, None
-
-            # Log rmse and loss
-            with tf.name_scope("train_metrics"):
-                tf.summary.scalar("rmse", rmse[1])
-                for l in tf.get_collection(tf.GraphKeys.LOSSES):
-                    tf.summary.scalar(l.op.name + "_averaged", loss_averages.average(l))
-
-            # add histograms summaries
-            if add_histograms == "all":
-                for v in tf.all_variables():
-                    tf.summary.histogram(v.name, v)
-            elif add_histograms == "train":
-                for v in tf.trainable_variables():
-                    tf.summary.histogram(v.name, v)
-
-            return tf.estimator.EstimatorSpec(
-                mode=mode,
-                predictions=predictions,
-                loss=self.loss,
-                train_op=train_op,
-                eval_metric_ops=metrics,
-                scaffold=self.scaffold,
-            )
-
-        super(Regressor, self).__init__(
-            model_fn=_model_fn, model_dir=model_dir, config=config
-        )
-
-    def _get_loss(self, predictions, labels):
-        main_loss = self.loss_op(predictions=predictions, labels=labels)
-        if not self.add_regularization_losses:
-            return main_loss
-        regularization_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
-        regularization_losses = [
-            tf.cast(l, main_loss.dtype) for l in regularization_losses
-        ]
-        total_loss = tf.add_n([main_loss] + regularization_losses, name="total_loss")
-        return total_loss
diff --git a/bob/learn/tensorflow/estimators/Siamese.py b/bob/learn/tensorflow/estimators/Siamese.py
deleted file mode 100755
index f9be6e084eb1dc157ae3f72793835ee51266226d..0000000000000000000000000000000000000000
--- a/bob/learn/tensorflow/estimators/Siamese.py
+++ /dev/null
@@ -1,161 +0,0 @@
-#!/usr/bin/env python
-# vim: set fileencoding=utf-8 :
-# @author: Tiago de Freitas Pereira <tiago.pereira@idiap.ch>
-
-import tensorflow as tf
-from tensorflow.python.estimator import estimator
-from bob.learn.tensorflow.utils import predict_using_tensors
-from . import check_features, get_trainable_variables
-
-import logging
-
-logger = logging.getLogger(__name__)
-
-
-class Siamese(estimator.Estimator):
-    """NN estimator for Siamese Networks.
-    Proposed in: "Chopra, Sumit, Raia Hadsell, and Yann LeCun. "Learning a
-    similarity metric discriminatively, with application to face verification."
-    Computer Vision and Pattern Recognition, 2005. CVPR 2005. IEEE Computer
-    Society Conference on. Vol. 1. IEEE, 2005."
-
-    See :any:`Logits` for the description of parameters.
-    """
-
-    def __init__(
-        self,
-        architecture=None,
-        optimizer=None,
-        config=None,
-        loss_op=None,
-        model_dir="",
-        validation_batch_size=None,
-        params=None,
-        extra_checkpoint=None,
-        add_histograms=None,
-        add_regularization_losses=True,
-        optimize_loss=tf.contrib.layers.optimize_loss,
-        optimize_loss_learning_rate=None,
-    ):
-
-        self.architecture = architecture
-        self.optimizer = optimizer
-        self.loss_op = loss_op
-        self.loss = None
-        self.extra_checkpoint = extra_checkpoint
-        self.add_regularization_losses = add_regularization_losses
-
-        self.optimize_loss = optimize_loss
-        self.optimize_loss_learning_rate = optimize_loss_learning_rate
-
-        if self.architecture is None:
-            raise ValueError("Please specify a function to build the architecture !!")
-
-        if self.optimizer is None:
-            raise ValueError(
-                "Please specify a optimizer (https://www.tensorflow.org/api_guides/python/train) !!"
-            )
-
-        if self.loss_op is None:
-            raise ValueError("Please specify a function to build the loss !!")
-
-        def _model_fn(features, labels, mode):
-            if mode == tf.estimator.ModeKeys.TRAIN:
-
-                # Building one graph, by default everything is trainable
-                # The input function needs to have dictionary pair with the `left` and `right` keys
-                if "left" not in features.keys() or "right" not in features.keys():
-                    raise ValueError(
-                        "The input function needs to contain a dictionary with the keys `left` and `right` "
-                    )
-
-                # Building one graph
-                trainable_variables = get_trainable_variables(self.extra_checkpoint)
-                data_left = features["left"]
-                data_left = (
-                    data_left["data"] if isinstance(data_left, dict) else data_left
-                )
-                data_right = features["right"]
-                data_right = (
-                    data_right["data"] if isinstance(data_right, dict) else data_right
-                )
-                prelogits_left, end_points_left = self.architecture(
-                    data_left, mode=mode, trainable_variables=trainable_variables
-                )
-                prelogits_right, end_points_right = self.architecture(
-                    data_right,
-                    reuse=True,
-                    mode=mode,
-                    trainable_variables=trainable_variables,
-                )
-
-                if self.extra_checkpoint is not None:
-                    tf.contrib.framework.init_from_checkpoint(
-                        self.extra_checkpoint["checkpoint_path"],
-                        self.extra_checkpoint["scopes"],
-                    )
-
-                # Compute Loss (for both TRAIN and EVAL modes)
-                labels = (
-                    tf.not_equal(labels["left"], labels["right"])
-                    if isinstance(labels, dict)
-                    else labels
-                )
-                self.loss = self.loss_op(prelogits_left, prelogits_right, labels)
-                if self.add_regularization_losses:
-                    regularization_losses = tf.get_collection(
-                        tf.GraphKeys.REGULARIZATION_LOSSES
-                    )
-                    regularization_losses = [
-                        tf.cast(l, self.loss.dtype) for l in regularization_losses
-                    ]
-                    self.loss = tf.add_n(
-                        [self.loss] + regularization_losses, name="total_loss"
-                    )
-                train_op = self.optimize_loss(
-                    loss=self.loss,
-                    global_step=tf.train.get_or_create_global_step(),
-                    optimizer=self.optimizer,
-                    learning_rate=self.optimize_loss_learning_rate,
-                )
-
-                # add histograms summaries
-                if add_histograms == "all":
-                    for v in tf.all_variables():
-                        tf.summary.histogram(v.name, v)
-                elif add_histograms == "train":
-                    for v in tf.trainable_variables():
-                        tf.summary.histogram(v.name, v)
-
-                return tf.estimator.EstimatorSpec(
-                    mode=mode, loss=self.loss, train_op=train_op
-                )
-
-            check_features(features)
-            data = features["data"]
-            key = features["key"]
-
-            # Compute the embeddings
-            prelogits = self.architecture(data, mode=mode)[0]
-            embeddings = tf.nn.l2_normalize(prelogits, 1)
-            predictions = {"embeddings": embeddings, "key": key}
-
-            if mode == tf.estimator.ModeKeys.PREDICT:
-                return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)
-
-            predictions_op = predict_using_tensors(
-                predictions["embeddings"], labels, num=validation_batch_size
-            )
-            eval_metric_ops = {
-                "accuracy": tf.metrics.accuracy(
-                    labels=labels, predictions=predictions_op
-                )
-            }
-
-            return tf.estimator.EstimatorSpec(
-                mode=mode, loss=tf.reduce_mean(1), eval_metric_ops=eval_metric_ops
-            )
-
-        super(Siamese, self).__init__(
-            model_fn=_model_fn, model_dir=model_dir, params=params, config=config
-        )
diff --git a/bob/learn/tensorflow/estimators/Triplet.py b/bob/learn/tensorflow/estimators/Triplet.py
deleted file mode 100644
index c34a964652af7236aea6c853fca61acae511317e..0000000000000000000000000000000000000000
--- a/bob/learn/tensorflow/estimators/Triplet.py
+++ /dev/null
@@ -1,141 +0,0 @@
-#!/usr/bin/env python
-# vim: set fileencoding=utf-8 :
-# @author: Tiago de Freitas Pereira <tiago.pereira@idiap.ch>
-
-import tensorflow as tf
-from tensorflow.python.estimator import estimator
-from bob.learn.tensorflow.utils import predict_using_tensors
-from bob.learn.tensorflow.loss import triplet_loss
-from . import check_features, get_trainable_variables
-
-import logging
-
-logger = logging.getLogger(__name__)
-
-
-class Triplet(estimator.Estimator):
-    """NN estimator for Triplet networks.
-
-    Schroff, Florian, Dmitry Kalenichenko, and James Philbin. "Facenet: A
-    unified embedding for face recognition and clustering." Proceedings of the
-    IEEE Conference on Computer Vision and Pattern Recognition. 2015.
-
-    See :any:`Logits` for the description of parameters.
-    """
-
-    def __init__(
-        self,
-        architecture=None,
-        optimizer=None,
-        config=None,
-        loss_op=triplet_loss,
-        model_dir="",
-        validation_batch_size=None,
-        extra_checkpoint=None,
-        optimize_loss=tf.contrib.layers.optimize_loss,
-        optimize_loss_learning_rate=None,
-    ):
-
-        self.architecture = architecture
-        self.optimizer = optimizer
-        self.loss_op = loss_op
-        self.loss = None
-        self.extra_checkpoint = extra_checkpoint
-        self.optimize_loss = optimize_loss
-        self.optimize_loss_learning_rate = optimize_loss_learning_rate
-
-        if self.architecture is None:
-            raise ValueError("Please specify a function to build the architecture !!")
-
-        if self.optimizer is None:
-            raise ValueError(
-                "Please specify a optimizer (https://www.tensorflow.org/api_guides/python/train) !!"
-            )
-
-        if self.loss_op is None:
-            raise ValueError("Please specify a function to build the loss !!")
-
-        def _model_fn(features, labels, mode, params, config):
-
-            if mode == tf.estimator.ModeKeys.TRAIN:
-
-                # The input function needs to have dictionary pair with the `left` and `right` keys
-                if (
-                    "anchor" not in features.keys()
-                    or "positive" not in features.keys()
-                    or "negative" not in features.keys()
-                ):
-                    raise ValueError(
-                        "The input function needs to contain a dictionary with the "
-                        "keys `anchor`, `positive` and `negative` "
-                    )
-
-                # Building one graph
-                trainable_variables = get_trainable_variables(self.extra_checkpoint)
-                prelogits_anchor = self.architecture(
-                    features["anchor"],
-                    mode=mode,
-                    trainable_variables=trainable_variables,
-                )[0]
-                prelogits_positive = self.architecture(
-                    features["positive"],
-                    reuse=True,
-                    mode=mode,
-                    trainable_variables=trainable_variables,
-                )[0]
-                prelogits_negative = self.architecture(
-                    features["negative"],
-                    reuse=True,
-                    mode=mode,
-                    trainable_variables=trainable_variables,
-                )[0]
-
-                if self.extra_checkpoint is not None:
-                    tf.contrib.framework.init_from_checkpoint(
-                        self.extra_checkpoint["checkpoint_path"],
-                        self.extra_checkpoint["scopes"],
-                    )
-
-                # Compute Loss (for both TRAIN and EVAL modes)
-                self.loss = self.loss_op(
-                    prelogits_anchor, prelogits_positive, prelogits_negative
-                )
-                # Configure the Training Op (for TRAIN mode)
-                global_step = tf.train.get_or_create_global_step()
-                train_op = self.optimize_loss(
-                    loss=self.loss,
-                    global_step=global_step,
-                    optimizer=self.optimizer,
-                    learning_rate=self.optimize_loss_learning_rate,
-                )
-                return tf.estimator.EstimatorSpec(
-                    mode=mode, loss=self.loss, train_op=train_op
-                )
-
-            check_features(features)
-            data = features["data"]
-
-            # Compute the embeddings
-            prelogits = self.architecture(data, mode=mode)[0]
-            embeddings = tf.nn.l2_normalize(prelogits, 1)
-            predictions = {"embeddings": embeddings}
-
-            if mode == tf.estimator.ModeKeys.PREDICT:
-                return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)
-
-            predictions_op = predict_using_tensors(
-                predictions["embeddings"], labels, num=validation_batch_size
-            )
-            eval_metric_ops = {
-                "accuracy": tf.metrics.accuracy(
-                    labels=labels, predictions=predictions_op
-                )
-            }
-
-            return tf.estimator.EstimatorSpec(
-                mode=mode, loss=tf.reduce_mean(1), eval_metric_ops=eval_metric_ops
-            )
-
-        super(Triplet, self).__init__(
-            model_fn=_model_fn, model_dir=model_dir, config=config
-        )
diff --git a/bob/learn/tensorflow/estimators/__init__.py b/bob/learn/tensorflow/estimators/__init__.py
deleted file mode 100644
index 10f500c531d9c10e19204272d5dd1a9fb27355a8..0000000000000000000000000000000000000000
--- a/bob/learn/tensorflow/estimators/__init__.py
+++ /dev/null
@@ -1,34 +0,0 @@
-#!/usr/bin/env python
-# vim: set fileencoding=utf-8 :
-# @author: Tiago de Freitas Pereira <tiago.pereira@idiap.ch>
-
-import tensorflow as tf
-
-from ..utils import get_trainable_variables, check_features
-from .utils import MovingAverageOptimizer, learning_rate_decay_fn
-from .Logits import Logits, LogitsCenterLoss
-from .Siamese import Siamese
-from .Triplet import Triplet
-from .Regressor import Regressor
-
-
-# gets sphinx autodoc done right - don't remove it
-def __appropriate__(*args):
-    """Says object was actually declared here, an not on the import module.
-
-  Parameters:
-
-    *args: An iterable of objects to modify
-
-  Resolves `Sphinx referencing issues
-  <https://github.com/sphinx-doc/sphinx/issues/3048>`
-  """
-
-    for obj in args:
-        obj.__module__ = __name__
-
-
-__appropriate__(
-    Logits, LogitsCenterLoss, Siamese, Triplet, Regressor, MovingAverageOptimizer
-)
-__all__ = [_ for _ in dir() if not _.startswith("_")]
diff --git a/bob/learn/tensorflow/estimators/utils.py b/bob/learn/tensorflow/estimators/utils.py
deleted file mode 100644
index fc202a06f70f10055cdcab2f3fa651ee325ba768..0000000000000000000000000000000000000000
--- a/bob/learn/tensorflow/estimators/utils.py
+++ /dev/null
@@ -1,72 +0,0 @@
-import tensorflow as tf
-import logging
-
-logger = logging.getLogger(__name__)
-
-
-class MovingAverageOptimizer:
-
-    """Creates a callable that can be given to bob.learn.tensorflow.estimators
-
-    This class is useful when you want to have a learning_rate_decay_fn **and** a moving
-    average optimizer **and** use bob.learn.tensorflow.estimators
-
-    Attributes
-    ----------
-    optimizer : object
-        A tf.train.Optimizer that is created and wrapped with
-        tf.contrib.opt.MovingAverageOptimizer.
-
-    Example
-    -------
-    >>> import tensorflow as tf
-    >>> from bob.learn.tensorflow.estimators import MovingAverageOptimizer
-    >>> optimizer = MovingAverageOptimizer("adam")
-    >>> actual_optimizer = optimizer(lr=1e-3)
-    >>> isinstance(actual_optimizer, tf.train.Optimizer)
-    True
-    >>> actual_optimizer is optimizer.optimizer
-    True
-    """
-
-    def __init__(self, optimizer, **kwargs):
-        super().__init__(**kwargs)
-        assert isinstance(optimizer, str), optimizer
-        self._optimizer = optimizer
-
-    def __call__(self, lr):
-        logger.info("Encapsulating the optimizer with the MovingAverageOptimizer")
-
-        if self._optimizer == "sgd":
-            optimizer = tf.train.GradientDescentOptimizer(learning_rate=lr)
-        elif self._optimizer == "adam":
-            optimizer = tf.train.AdamOptimizer(learning_rate=lr)
-
-        self.optimizer = tf.contrib.opt.MovingAverageOptimizer(optimizer)
-
-        return self.optimizer
-
-
-def learning_rate_decay_fn(
-    learning_rate, global_step, decay_steps, decay_rate, staircase=False
-):
-    """A simple learning_rate_decay_fn.
-
-    To use it with ``tf.contrib.layer.optimize_loss``:
-
-    >>> from bob.learn.tensorflow.estimators import learning_rate_decay_fn
-    >>> from functools import partial
-    >>> learning_rate_decay_fn = partial(
-    ...     learning_rate_decay_fn,
-    ...     decay_steps=1000,
-    ...     decay_rate=0.9,
-    ...     staircase=True,
-    ... )
-    """
-    return tf.train.exponential_decay(
-        learning_rate,
-        global_step=global_step,
-        decay_steps=decay_steps,
-        decay_rate=decay_rate,
-        staircase=staircase,
-    )
diff --git a/bob/learn/tensorflow/examples/mnist/mnist_config.py b/bob/learn/tensorflow/examples/mnist/mnist_config.py
deleted file mode 100644
index 38e8da80cd299e97e80acb1843462e0a1fa3b120..0000000000000000000000000000000000000000
--- a/bob/learn/tensorflow/examples/mnist/mnist_config.py
+++ /dev/null
@@ -1,233 +0,0 @@
-#  Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-"""Convolutional Neural Network Estimator for MNIST, built with tf.layers."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-# create reproducible nets:
-from bob.learn.tensorflow.utils.reproducible import run_config
-import tensorflow as tf
-from bob.db.mnist import Database
-
-model_dir = '/tmp/mnist_model'
-train_tfrecords = ['/tmp/mnist_data/train.tfrecords']
-eval_tfrecords = ['/tmp/mnist_data/test.tfrecords']
-
-run_config = run_config.replace(keep_checkpoint_max=10**3)
-run_config = run_config.replace(save_checkpoints_secs=60)
-
-
-def input_fn(mode, batch_size=1):
-    """A simple input_fn using the contrib.data input pipeline."""
-
-    def example_parser(serialized_example):
-        """Parses a single tf.Example into image and label tensors."""
-        features = tf.parse_single_example(
-            serialized_example,
-            features={
-                'data': tf.FixedLenFeature([], tf.string),
-                'label': tf.FixedLenFeature([], tf.int64),
-                'key': tf.FixedLenFeature([], tf.string),
-            })
-        image = tf.decode_raw(features['data'], tf.uint8)
-        image.set_shape([28 * 28])
-
-        # Normalize the values of the image from the range
-        # [0, 255] to [-0.5, 0.5]
-        image = tf.cast(image, tf.float32) / 255 - 0.5
-        label = tf.cast(features['label'], tf.int32)
-
-        key = tf.cast(features['key'], tf.string)
-        return image, tf.one_hot(label, 10), key
-
-    if mode == tf.estimator.ModeKeys.TRAIN:
-        tfrecords_files = train_tfrecords
-    elif mode == tf.estimator.ModeKeys.EVAL:
-        tfrecords_files = eval_tfrecords
-    else:
-        assert mode == tf.estimator.ModeKeys.PREDICT, 'invalid mode'
-        tfrecords_files = eval_tfrecords
-
-    for tfrecords_file in tfrecords_files:
-        assert tf.gfile.Exists(tfrecords_file), (
-            'Run github.com:tensorflow/models/official/mnist/'
-            'convert_to_records.py first to convert the MNIST data to '
-            'TFRecord file format.')
-
-    dataset = tf.data.TFRecordDataset(tfrecords_files)
-
-    # For training, repeat the dataset forever
-    if mode == tf.estimator.ModeKeys.TRAIN:
-        dataset = dataset.repeat()
-
-    # Map example_parser over dataset, and batch results by up to batch_size
-    dataset = dataset.map(
-        example_parser, num_parallel_calls=1).prefetch(batch_size)
-    dataset = dataset.batch(batch_size)
-    images, labels, keys = dataset.make_one_shot_iterator().get_next()
-
-    return {'images': images, 'keys': keys}, labels
-
-
-def train_input_fn():
-    return input_fn(tf.estimator.ModeKeys.TRAIN)
-
-
-def eval_input_fn():
-    return input_fn(tf.estimator.ModeKeys.EVAL)
-
-
-def predict_input_fn():
-    return input_fn(tf.estimator.ModeKeys.PREDICT)
-
-
-def mnist_model(inputs, mode):
-    """Takes the MNIST inputs and mode and outputs a tensor of logits."""
-    # Input Layer
-    # Reshape X to 4-D tensor: [batch_size, width, height, channels]
-    # MNIST images are 28x28 pixels, and have one color channel
-    inputs = tf.reshape(inputs, [-1, 28, 28, 1])
-    data_format = 'channels_last'
-
-    if tf.test.is_built_with_cuda():
-        # When running on GPU, transpose the data from channels_last (NHWC) to
-        # channels_first (NCHW) to improve performance. See
-        # https://www.tensorflow.org/performance/performance_guide#data_formats
-        data_format = 'channels_first'
-        inputs = tf.transpose(inputs, [0, 3, 1, 2])
-
-    # Convolutional Layer #1
-    # Computes 32 features using a 5x5 filter with ReLU activation.
-    # Padding is added to preserve width and height.
-    # Input Tensor Shape: [batch_size, 28, 28, 1]
-    # Output Tensor Shape: [batch_size, 28, 28, 32]
-    conv1 = tf.layers.conv2d(
-        inputs=inputs,
-        filters=32,
-        kernel_size=[5, 5],
-        padding='same',
-        activation=tf.nn.relu,
-        data_format=data_format)
-
-    # Pooling Layer #1
-    # First max pooling layer with a 2x2 filter and stride of 2
-    # Input Tensor Shape: [batch_size, 28, 28, 32]
-    # Output Tensor Shape: [batch_size, 14, 14, 32]
-    pool1 = tf.layers.max_pooling2d(
-        inputs=conv1, pool_size=[2, 2], strides=2, data_format=data_format)
-
-    # Convolutional Layer #2
-    # Computes 64 features using a 5x5 filter.
-    # Padding is added to preserve width and height.
-    # Input Tensor Shape: [batch_size, 14, 14, 32]
-    # Output Tensor Shape: [batch_size, 14, 14, 64]
-    conv2 = tf.layers.conv2d(
-        inputs=pool1,
-        filters=64,
-        kernel_size=[5, 5],
-        padding='same',
-        activation=tf.nn.relu,
-        data_format=data_format)
-
-    # Pooling Layer #2
-    # Second max pooling layer with a 2x2 filter and stride of 2
-    # Input Tensor Shape: [batch_size, 14, 14, 64]
-    # Output Tensor Shape: [batch_size, 7, 7, 64]
-    pool2 = tf.layers.max_pooling2d(
-        inputs=conv2, pool_size=[2, 2], strides=2, data_format=data_format)
-
-    # Flatten tensor into a batch of vectors
-    # Input Tensor Shape: [batch_size, 7, 7, 64]
-    # Output Tensor Shape: [batch_size, 7 * 7 * 64]
-    pool2_flat = tf.reshape(pool2, [-1, 7 * 7 * 64])
-
-    # Dense Layer
-    # Densely connected layer with 1024 neurons
-    # Input Tensor Shape: [batch_size, 7 * 7 * 64]
-    # Output Tensor Shape: [batch_size, 1024]
-    dense = tf.layers.dense(
-        inputs=pool2_flat, units=1024, activation=tf.nn.relu)
-
-    # Add dropout operation; 0.6 probability that element will be kept
-    dropout = tf.layers.dropout(
-        inputs=dense, rate=0.4, training=(mode == tf.estimator.ModeKeys.TRAIN))
-
-    # Logits layer
-    # Input Tensor Shape: [batch_size, 1024]
-    # Output Tensor Shape: [batch_size, 10]
-    logits = tf.layers.dense(inputs=dropout, units=10)
-    return logits
-
-
-def model_fn(features, labels=None, mode=tf.estimator.ModeKeys.TRAIN):
-    """Model function for MNIST."""
-    keys = features['keys']
-    features = features['images']
-    logits = mnist_model(features, mode)
-
-    predictions = {
-        'classes': tf.argmax(input=logits, axis=1),
-        'probabilities': tf.nn.softmax(logits, name='softmax_tensor'),
-        'keys': keys,
-    }
-
-    if mode == tf.estimator.ModeKeys.PREDICT:
-        return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)
-
-    loss = tf.losses.softmax_cross_entropy(onehot_labels=labels, logits=logits)
-
-    # Configure the training op
-    if mode == tf.estimator.ModeKeys.TRAIN:
-        optimizer = tf.train.AdamOptimizer(learning_rate=1e-4)
-        train_op = optimizer.minimize(loss,
-                                      tf.train.get_or_create_global_step())
-    else:
-        train_op = None
-
-    accuracy = tf.metrics.accuracy(
-        tf.argmax(labels, axis=1), predictions['classes'])
-    metrics = {'accuracy': accuracy}
-
-    with tf.name_scope('train_metrics'):
-        # Create a tensor named train_accuracy for logging purposes
-        tf.summary.scalar('train_accuracy', accuracy[1])
-
-        tf.summary.scalar('train_loss', loss)
-
-    return tf.estimator.EstimatorSpec(
-        mode=mode,
-        predictions=predictions,
-        loss=loss,
-        train_op=train_op,
-        eval_metric_ops=metrics)
-
-
-estimator = tf.estimator.Estimator(
-    model_fn=model_fn, model_dir=model_dir, params=None, config=run_config)
-
-output = train_tfrecords[0]
-db = Database()
-data, labels = db.data(groups='train')
-
-# output = eval_tfrecords[0]
-# db = Database()
-# data, labels = db.data(groups='test')
-
-samples = zip(data, labels, (str(i) for i in range(len(data))))
-
-
-def reader(sample):
-    return sample
diff --git a/bob/learn/tensorflow/examples/mnist/tfrecords.py b/bob/learn/tensorflow/examples/mnist/tfrecords.py
deleted file mode 100644
index 2a4eb216d7e4275ee339a961a1667c6705086bda..0000000000000000000000000000000000000000
--- a/bob/learn/tensorflow/examples/mnist/tfrecords.py
+++ /dev/null
@@ -1,62 +0,0 @@
-# Required objects:
-
-# you need a database object that inherits from
-# bob.bio.base.database.BioDatabase (PAD dbs work too)
-database = Database()
-
-# the directory pointing to where the processed data is:
-data_dir = '/idiap/temp/user/database_name/sub_directory/preprocessed'
-
-# the directory to save the tfrecords in:
-output_dir = '/idiap/temp/user/database_name/sub_directory'
-
-
-# A function that converts a BioFile or a PadFile to a label:
-# Example for PAD
-def file_to_label(f):
-    return f.attack_type is None
-
-
-# Example for Bio (You may want to run this script for groups=['world'] only
-# in biometric recognition experiments.)
-CLIENT_IDS = (str(f.client_id) for f in db.all_files(groups=groups))
-CLIENT_IDS = list(set(CLIENT_IDS))
-CLIENT_IDS = dict(zip(CLIENT_IDS, range(len(CLIENT_IDS))))
-
-
-def file_to_label(f):
-    return CLIENT_IDS[str(f.client_id)]
-
-
-# Optional objects:
-
-# The groups that you want to create tfrecords for. It should be a list of
-# 'world' ('train' in bob.pad.base), 'dev', and 'eval' values. [default:
-# 'world']
-groups = ['world']
-
-# you need a reader function that reads the preprocessed files. [default:
-# bob.bio.base.utils.load]
-reader = Preprocessor().read_data
-reader = Extractor().read_feature
-# or
-from bob.bio.base.utils import load as reader
-
-# or a reader that casts images to uint8:
-
-
-def reader(path):
-    data = bob.bio.base.utils.load(path)
-    return data.astype("uint8")
-
-
-# extension of the preprocessed files. [default: '.hdf5']
-data_extension = '.hdf5'
-
-# Shuffle the files before writing them into a tfrecords. [default: False]
-shuffle = True
-
-# Whether the each file contains one sample or more. [default: True] If
-# this is False, the loaded samples from a file are iterated over and each
-# of them is saved as an independent feature.
-one_file_one_sample = True
diff --git a/bob/learn/tensorflow/extractors/Base.py b/bob/learn/tensorflow/extractors/Base.py
deleted file mode 100644
index c4e1064ff9278b9db778149dc4d2717f70d0ca06..0000000000000000000000000000000000000000
--- a/bob/learn/tensorflow/extractors/Base.py
+++ /dev/null
@@ -1,71 +0,0 @@
-import tensorflow as tf
-import os
-import numpy as np
-import logging
-
-logger = logging.getLogger(__name__)
-
-
-def normalize_checkpoint_path(path):
-    if os.path.splitext(path)[1] == ".meta":
-        filename = os.path.splitext(path)[0]
-    elif os.path.isdir(path):
-        filename = tf.train.latest_checkpoint(path)
-    else:
-        filename = path
-
-    return filename
-
-
-class Base:
-    def __init__(self, output_name, input_shape, checkpoint, scopes,
-                 input_transform=None, output_transform=None,
-                 input_dtype='float32', extra_feed=None, **kwargs):
-
-        self.output_name = output_name
-        self.input_shape = input_shape
-        self.checkpoint = normalize_checkpoint_path(checkpoint)
-        self.scopes = scopes
-        self.input_transform = input_transform
-        self.output_transform = output_transform
-        self.input_dtype = input_dtype
-        self.extra_feed = extra_feed
-        self.session = None
-        super().__init__(**kwargs)
-
-    def load(self):
-        self.session = tf.Session(graph=tf.Graph())
-
-        with self.session.as_default(), self.session.graph.as_default():
-
-            self.input = data = tf.placeholder(self.input_dtype, self.input_shape)
-
-            if self.input_transform is not None:
-                data = self.input_transform(data)
-
-            self.output = self.get_output(data, tf.estimator.ModeKeys.PREDICT)
-
-            if self.output_transform is not None:
-                self.output = self.output_transform(self.output)
-
-            tf.train.init_from_checkpoint(
-                ckpt_dir_or_file=self.checkpoint,
-                assignment_map=self.scopes,
-            )
-            # global_variables_initializer must run after init_from_checkpoint
-            self.session.run(tf.global_variables_initializer())
-            logger.info('Restored the model from %s', self.checkpoint)
-
-    def __call__(self, data):
-        if self.session is None:
-            self.load()
-
-        data = np.ascontiguousarray(data, dtype=self.input_dtype)
-        feed_dict = {self.input: data}
-        if self.extra_feed is not None:
-            feed_dict.update(self.extra_feed)
-
-        return self.session.run(self.output, feed_dict=feed_dict)
-
-    def get_output(self, data, mode):
-        raise NotImplementedError()
diff --git a/bob/learn/tensorflow/extractors/Estimator.py b/bob/learn/tensorflow/extractors/Estimator.py
deleted file mode 100644
index 768b539f1906d2a70f5b8a71537d16a1924ac472..0000000000000000000000000000000000000000
--- a/bob/learn/tensorflow/extractors/Estimator.py
+++ /dev/null
@@ -1,16 +0,0 @@
-import tensorflow as tf
-from .Base import Base
-
-
-class Estimator(Base):
-    def __init__(self, estimator, **kwargs):
-        self.estimator = estimator
-        kwargs['checkpoint'] = kwargs.get('checkpoint', estimator.model_dir)
-        super().__init__(**kwargs)
-
-    def get_output(self, data, mode):
-        features = {'data': data, 'key': tf.constant(['key'])}
-        self.estimator_spec = self.estimator._call_model_fn(
-            features, None, mode, None)
-        self.end_points = self.estimator.end_points
-        return self.end_points[self.output_name]
diff --git a/bob/learn/tensorflow/extractors/Generic.py b/bob/learn/tensorflow/extractors/Generic.py
deleted file mode 100644
index 3aab2573317c0916531eea2a748329147e543f87..0000000000000000000000000000000000000000
--- a/bob/learn/tensorflow/extractors/Generic.py
+++ /dev/null
@@ -1,12 +0,0 @@
-from .Base import Base
-
-
-class Generic(Base):
-    def __init__(self, architecture, **kwargs):
-
-        self.architecture = architecture
-        super().__init__(**kwargs)
-
-    def get_output(self, data, mode):
-        self.end_points = self.architecture(data, mode=mode)[1]
-        return self.end_points[self.output_name]
diff --git a/bob/learn/tensorflow/gan/__init__.py b/bob/learn/tensorflow/gan/__init__.py
deleted file mode 100644
index 502898e8d514775462a0e9de662ffab6c6cc4c25..0000000000000000000000000000000000000000
--- a/bob/learn/tensorflow/gan/__init__.py
+++ /dev/null
@@ -1,2 +0,0 @@
-from . import spectral_normalization
-from . import losses
diff --git a/bob/learn/tensorflow/gan/losses.py b/bob/learn/tensorflow/gan/losses.py
deleted file mode 100644
index ec378245953d60c96d4634eb797d6d1c30e2dec0..0000000000000000000000000000000000000000
--- a/bob/learn/tensorflow/gan/losses.py
+++ /dev/null
@@ -1,171 +0,0 @@
-import tensorflow as tf
-
-
-def relativistic_discriminator_loss(
-    discriminator_real_outputs,
-    discriminator_gen_outputs,
-    label_smoothing=0.25,
-    real_weights=1.0,
-    generated_weights=1.0,
-    scope=None,
-    loss_collection=tf.GraphKeys.LOSSES,
-    reduction=tf.losses.Reduction.SUM_BY_NONZERO_WEIGHTS,
-    add_summaries=False,
-):
-    """Relativistic (average) loss
-
-  Args:
-    discriminator_real_outputs: Discriminator output on real data.
-    discriminator_gen_outputs: Discriminator output on generated data. Expected
-      to be in the range of (-inf, inf).
-    label_smoothing: The amount of smoothing for positive labels. This technique
-      is taken from `Improved Techniques for Training GANs`
-      (https://arxiv.org/abs/1606.03498). `0.0` means no smoothing.
-    real_weights: Optional `Tensor` whose rank is either 0, or the same rank as
-      `real_data`, and must be broadcastable to `real_data` (i.e., all
-      dimensions must be either `1`, or the same as the corresponding
-      dimension).
-    generated_weights: Same as `real_weights`, but for `generated_data`.
-    scope: The scope for the operations performed in computing the loss.
-    loss_collection: collection to which this loss will be added.
-    reduction: A `tf.compat.v1.losses.Reduction` to apply to loss.
-    add_summaries: Whether or not to add summaries for the loss.
-
-  Returns:
-    A loss Tensor. The shape depends on `reduction`.
-  """
-    with tf.name_scope(
-        scope,
-        "discriminator_relativistic_loss",
-        (
-            discriminator_real_outputs,
-            discriminator_gen_outputs,
-            real_weights,
-            generated_weights,
-            label_smoothing,
-        ),
-    ) as scope:
-
-        real_logit = discriminator_real_outputs - tf.reduce_mean(
-            discriminator_gen_outputs
-        )
-        fake_logit = discriminator_gen_outputs - tf.reduce_mean(
-            discriminator_real_outputs
-        )
-
-        loss_on_real = tf.losses.sigmoid_cross_entropy(
-            tf.ones_like(real_logit),
-            real_logit,
-            real_weights,
-            label_smoothing,
-            scope,
-            loss_collection=None,
-            reduction=reduction,
-        )
-        loss_on_generated = tf.losses.sigmoid_cross_entropy(
-            tf.zeros_like(fake_logit),
-            fake_logit,
-            generated_weights,
-            scope=scope,
-            loss_collection=None,
-            reduction=reduction,
-        )
-
-        loss = loss_on_real + loss_on_generated
-        tf.losses.add_loss(loss, loss_collection)
-
-        if add_summaries:
-            tf.summary.scalar("discriminator_gen_relativistic_loss", loss_on_generated)
-            tf.summary.scalar("discriminator_real_relativistic_loss", loss_on_real)
-            tf.summary.scalar("discriminator_relativistic_loss", loss)
-
-    return loss
-
-
-def relativistic_generator_loss(
-    discriminator_real_outputs,
-    discriminator_gen_outputs,
-    label_smoothing=0.0,
-    real_weights=1.0,
-    generated_weights=1.0,
-    scope=None,
-    loss_collection=tf.GraphKeys.LOSSES,
-    reduction=tf.losses.Reduction.SUM_BY_NONZERO_WEIGHTS,
-    add_summaries=False,
-    confusion_labels=False,
-):
-    """Relativistic (average) loss
-
-  Args:
-    discriminator_real_outputs: Discriminator output on real data.
-    discriminator_gen_outputs: Discriminator output on generated data. Expected
-      to be in the range of (-inf, inf).
-    label_smoothing: The amount of smoothing for positive labels. This technique
-      is taken from `Improved Techniques for Training GANs`
-      (https://arxiv.org/abs/1606.03498). `0.0` means no smoothing.
-    real_weights: Optional `Tensor` whose rank is either 0, or the same rank as
-      `real_data`, and must be broadcastable to `real_data` (i.e., all
-      dimensions must be either `1`, or the same as the corresponding
-      dimension).
-    generated_weights: Same as `real_weights`, but for `generated_data`.
-    scope: The scope for the operations performed in computing the loss.
-    loss_collection: collection to which this loss will be added.
-    reduction: A `tf.compat.v1.losses.Reduction` to apply to loss.
-    add_summaries: Whether or not to add summaries for the loss.
-
-  Returns:
-    A loss Tensor. The shape depends on `reduction`.
-  """
-    with tf.name_scope(
-        scope,
-        "generator_relativistic_loss",
-        (
-            discriminator_real_outputs,
-            discriminator_gen_outputs,
-            real_weights,
-            generated_weights,
-            label_smoothing,
-        ),
-    ) as scope:
-
-        real_logit = discriminator_real_outputs - tf.reduce_mean(
-            discriminator_gen_outputs
-        )
-        fake_logit = discriminator_gen_outputs - tf.reduce_mean(
-            discriminator_real_outputs
-        )
-
-        if confusion_labels:
-            real_labels = tf.ones_like(real_logit) / 2
-            fake_labels = tf.ones_like(fake_logit) / 2
-        else:
-            real_labels = tf.zeros_like(real_logit)
-            fake_labels = tf.ones_like(fake_logit)
-
-        loss_on_real = tf.losses.sigmoid_cross_entropy(
-            real_labels,
-            real_logit,
-            real_weights,
-            label_smoothing,
-            scope,
-            loss_collection=None,
-            reduction=reduction,
-        )
-        loss_on_generated = tf.losses.sigmoid_cross_entropy(
-            fake_labels,
-            fake_logit,
-            generated_weights,
-            scope=scope,
-            loss_collection=None,
-            reduction=reduction,
-        )
-
-        loss = loss_on_real + loss_on_generated
-        tf.losses.add_loss(loss, loss_collection)
-
-        if add_summaries:
-            tf.summary.scalar("generator_gen_relativistic_loss", loss_on_generated)
-            tf.summary.scalar("generator_real_relativistic_loss", loss_on_real)
-            tf.summary.scalar("generator_relativistic_loss", loss)
-
-    return loss
diff --git a/bob/learn/tensorflow/gan/spectral_normalization.py b/bob/learn/tensorflow/gan/spectral_normalization.py
deleted file mode 100644
index ad2ecfaa4487892592f733858c88f0c24b569912..0000000000000000000000000000000000000000
--- a/bob/learn/tensorflow/gan/spectral_normalization.py
+++ /dev/null
@@ -1,316 +0,0 @@
-# Copied from: https://github.com/tensorflow/tensorflow/blob/c4f40aea1d4f916aa3dfeb79f024c495ac609106/tensorflow/contrib/gan/python/features/python/spectral_normalization_impl.py
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Keras-like layers and utilities that implement Spectral Normalization.
-
-Based on "Spectral Normalization for Generative Adversarial Networks" by Miyato,
-et al in ICLR 2018. https://openreview.net/pdf?id=B1QRgziT-
-"""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import contextlib
-import numbers
-import re
-
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import ops
-from tensorflow.python.keras.engine import base_layer_utils as keras_base_layer_utils
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import init_ops
-from tensorflow.python.ops import math_ops
-from tensorflow.python.ops import nn
-from tensorflow.python.ops import variable_scope
-from tensorflow.python.platform import tf_logging as logging
-
-__all__ = [
-    'compute_spectral_norm', 'spectral_normalize', 'spectral_norm_regularizer',
-    'spectral_normalization_custom_getter', 'keras_spectral_normalization'
-]
-
-# tf.bfloat16 should work, but tf.matmul converts those to tf.float32 which then
-# can't directly be assigned back to the tf.bfloat16 variable.
-_OK_DTYPES_FOR_SPECTRAL_NORM = (dtypes.float16, dtypes.float32, dtypes.float64)
-_PERSISTED_U_VARIABLE_SUFFIX = 'spectral_norm_u'
-
-
-def compute_spectral_norm(w_tensor, power_iteration_rounds=1, name=None):
-  """Estimates the largest singular value in the weight tensor.
-
-  Args:
-    w_tensor: The weight matrix whose spectral norm should be computed.
-    power_iteration_rounds: The number of iterations of the power method to
-      perform. A higher number yields a better approximation.
-    name: An optional scope name.
-
-  Returns:
-    The largest singular value (the spectral norm) of w.
-  """
-  with variable_scope.variable_scope(name, 'spectral_norm'):
-    # The paper says to flatten convnet kernel weights from
-    # (C_out, C_in, KH, KW) to (C_out, C_in * KH * KW). But TensorFlow's Conv2D
-    # kernel weight shape is (KH, KW, C_in, C_out), so it should be reshaped to
-    # (KH * KW * C_in, C_out), and similarly for other layers that put output
-    # channels as last dimension.
-    # n.b. this means that w here is equivalent to w.T in the paper.
-    w = array_ops.reshape(w_tensor, (-1, w_tensor.get_shape()[-1]))
-
-    # Persisted approximation of first left singular vector of matrix `w`.
-    u_var = variable_scope.get_variable(
-        _PERSISTED_U_VARIABLE_SUFFIX,
-        shape=(w.shape[0], 1),
-        dtype=w.dtype,
-        initializer=init_ops.random_normal_initializer(),
-        trainable=False)
-    u = u_var
-
-    # Use power iteration method to approximate spectral norm.
-    for _ in range(power_iteration_rounds):
-      # `v` approximates the first right singular vector of matrix `w`.
-      v = nn.l2_normalize(math_ops.matmul(array_ops.transpose(w), u))
-      u = nn.l2_normalize(math_ops.matmul(w, v))
-
-    # Update persisted approximation.
-    with ops.control_dependencies([u_var.assign(u, name='update_u')]):
-      u = array_ops.identity(u)
-
-    u = array_ops.stop_gradient(u)
-    v = array_ops.stop_gradient(v)
-
-    # Largest singular value of `w`.
-    spectral_norm = math_ops.matmul(
-        math_ops.matmul(array_ops.transpose(u), w), v)
-    spectral_norm.shape.assert_is_fully_defined()
-    spectral_norm.shape.assert_is_compatible_with([1, 1])
-
-    return spectral_norm[0][0]
-
-
-def spectral_normalize(w, power_iteration_rounds=1, name=None):
-  """Normalizes a weight matrix by its spectral norm.
-
-  Args:
-    w: The weight matrix to be normalized.
-    power_iteration_rounds: The number of iterations of the power method to
-      perform. A higher number yields a better approximation.
-    name: An optional scope name.
-
-  Returns:
-    A normalized weight matrix tensor.
-  """
-  with variable_scope.variable_scope(name, 'spectral_normalize'):
-    w_normalized = w / compute_spectral_norm(
-        w, power_iteration_rounds=power_iteration_rounds)
-    return array_ops.reshape(w_normalized, w.get_shape())
-
-
-def spectral_norm_regularizer(scale, power_iteration_rounds=1, scope=None):
-  """Returns a functions that can be used to apply spectral norm regularization.
-
-  Small spectral norms enforce a small Lipschitz constant, which is necessary
-  for Wasserstein GANs.
-
-  Args:
-    scale: A scalar multiplier. 0.0 disables the regularizer.
-    power_iteration_rounds: The number of iterations of the power method to
-      perform. A higher number yields a better approximation.
-    scope: An optional scope name.
-
-  Returns:
-    A function with the signature `sn(weights)` that applies spectral norm
-    regularization.
-
-  Raises:
-    ValueError: If scale is negative or if scale is not a float.
-  """
-  if isinstance(scale, numbers.Integral):
-    raise ValueError('scale cannot be an integer: %s' % scale)
-  if isinstance(scale, numbers.Real):
-    if scale < 0.0:
-      raise ValueError(
-          'Setting a scale less than 0 on a regularizer: %g' % scale)
-    if scale == 0.0:
-      logging.info('Scale of 0 disables regularizer.')
-      return lambda _: None
-
-  def sn(weights, name=None):
-    """Applies spectral norm regularization to weights."""
-    with ops.name_scope(scope, 'SpectralNormRegularizer', [weights]) as name:
-      scale_t = ops.convert_to_tensor(
-          scale, dtype=weights.dtype.base_dtype, name='scale')
-      return math_ops.multiply(
-          scale_t,
-          compute_spectral_norm(
-              weights, power_iteration_rounds=power_iteration_rounds),
-          name=name)
-
-  return sn
-
-
-def _default_name_filter(name):
-  """A filter function to identify common names of weight variables.
-
-  Args:
-    name: The variable name.
-
-  Returns:
-    Whether `name` is a standard name for a weight/kernel variables used in the
-    Keras, tf.layers, tf.contrib.layers or tf.contrib.slim libraries.
-  """
-  match = re.match(r'(.*\/)?(depthwise_|pointwise_)?(weights|kernel)$', name)
-  return match is not None
-
-
-def spectral_normalization_custom_getter(name_filter=_default_name_filter,
-                                         power_iteration_rounds=1):
-  """Custom getter that performs Spectral Normalization on a weight tensor.
-
-  Specifically it divides the weight tensor by its largest singular value. This
-  is intended to stabilize GAN training, by making the discriminator satisfy a
-  local 1-Lipschitz constraint.
-
-  Based on [Spectral Normalization for Generative Adversarial Networks][sn-gan].
-
-  [sn-gan]: https://openreview.net/forum?id=B1QRgziT-
-
-  To reproduce an SN-GAN, apply this custom_getter to every weight tensor of
-  your discriminator. The last dimension of the weight tensor must be the number
-  of output channels.
-
-  Apply this to layers by supplying this as the `custom_getter` of a
-  `tf.compat.v1.variable_scope`. For example:
-
-    with tf.compat.v1.variable_scope('discriminator',
-                           custom_getter=spectral_norm_getter()):
-      net = discriminator_fn(net)
-
-  IMPORTANT: Keras does not respect the custom_getter supplied by the
-  VariableScope, so Keras users should use `keras_spectral_normalization`
-  instead of (or in addition to) this approach.
-
-  It is important to carefully select to which weights you want to apply
-  Spectral Normalization. In general you want to normalize the kernels of
-  convolution and dense layers, but you do not want to normalize biases. You
-  also want to avoid normalizing batch normalization (and similar) variables,
-  but in general such layers play poorly with Spectral Normalization, since the
-  gamma can cancel out the normalization in other layers. By default we supply a
-  filter that matches the kernel variable names of the dense and convolution
-  layers of the tf.layers, tf.contrib.layers, tf.keras and tf.contrib.slim
-  libraries. If you are using anything else you'll need a custom `name_filter`.
-
-  This custom getter internally creates a variable used to compute the spectral
-  norm by power iteration. It will update every time the variable is accessed,
-  which means the normalized discriminator weights may change slightly whilst
-  training the generator. Whilst unusual, this matches how the paper's authors
-  implement it, and in general additional rounds of power iteration can't hurt.
-
-  Args:
-    name_filter: Optionally, a method that takes a Variable name as input and
-      returns whether this Variable should be normalized.
-    power_iteration_rounds: The number of iterations of the power method to
-      perform per step. A higher number yields a better approximation of the
-      true spectral norm.
-
-  Returns:
-    A custom getter function that applies Spectral Normalization to all
-    Variables whose names match `name_filter`.
-
-  Raises:
-    ValueError: If name_filter is not callable.
-  """
-  if not callable(name_filter):
-    raise ValueError('name_filter must be callable')
-
-  def _internal_getter(getter, name, *args, **kwargs):
-    """A custom getter function that applies Spectral Normalization.
-
-    Args:
-      getter: The true getter to call.
-      name: Name of new/existing variable, in the same format as
-        tf.get_variable.
-      *args: Other positional arguments, in the same format as tf.get_variable.
-      **kwargs: Keyword arguments, in the same format as tf.get_variable.
-
-    Returns:
-      The return value of `getter(name, *args, **kwargs)`, spectrally
-      normalized.
-
-    Raises:
-      ValueError: If used incorrectly, or if `dtype` is not supported.
-    """
-    if not name_filter(name):
-      return getter(name, *args, **kwargs)
-
-    if name.endswith(_PERSISTED_U_VARIABLE_SUFFIX):
-      raise ValueError(
-          'Cannot apply Spectral Normalization to internal variables created '
-          'for Spectral Normalization. Tried to normalized variable [%s]' %
-          name)
-
-    if kwargs['dtype'] not in _OK_DTYPES_FOR_SPECTRAL_NORM:
-      raise ValueError('Disallowed data type {}'.format(kwargs['dtype']))
-
-    # This layer's weight Variable/PartitionedVariable.
-    w_tensor = getter(name, *args, **kwargs)
-
-    if len(w_tensor.get_shape()) < 2:
-      raise ValueError(
-          'Spectral norm can only be applied to multi-dimensional tensors')
-
-    return spectral_normalize(
-        w_tensor,
-        power_iteration_rounds=power_iteration_rounds,
-        name=(name + '/spectral_normalize'))
-
-  return _internal_getter
-
-
-@contextlib.contextmanager
-def keras_spectral_normalization(name_filter=_default_name_filter,
-                                 power_iteration_rounds=1):
-  """A context manager that enables Spectral Normalization for Keras.
-
-  Keras doesn't respect the `custom_getter` in the VariableScope, so this is a
-  bit of a hack to make things work.
-
-  Usage:
-    with keras_spectral_normalization():
-      net = discriminator_fn(net)
-
-  Args:
-    name_filter: Optionally, a method that takes a Variable name as input and
-      returns whether this Variable should be normalized.
-    power_iteration_rounds: The number of iterations of the power method to
-      perform per step. A higher number yields a better approximation of the
-      true spectral norm.
-
-  Yields:
-    A context manager that wraps the standard Keras variable creation method
-    with the `spectral_normalization_custom_getter`.
-  """
-  original_make_variable = keras_base_layer_utils.make_variable
-  sn_getter = spectral_normalization_custom_getter(
-      name_filter=name_filter, power_iteration_rounds=power_iteration_rounds)
-
-  def make_variable_wrapper(name, *args, **kwargs):
-    return sn_getter(original_make_variable, name, *args, **kwargs)
-
-  keras_base_layer_utils.make_variable = make_variable_wrapper
-
-  yield
-
-  keras_base_layer_utils.make_variable = original_make_variable
diff --git a/bob/learn/tensorflow/image/__init__.py b/bob/learn/tensorflow/image/__init__.py
deleted file mode 100644
index 9a8962844d659e6b1e13f6eeeb5e1d11dc698e8d..0000000000000000000000000000000000000000
--- a/bob/learn/tensorflow/image/__init__.py
+++ /dev/null
@@ -1,19 +0,0 @@
-from .filter import gaussian_kernel, GaussianFilter
-
-# gets sphinx autodoc done right - don't remove it
-def __appropriate__(*args):
-    """Says object was actually declared here, an not on the import module.
-
-  Parameters:
-
-    *args: An iterable of objects to modify
-
-  Resolves `Sphinx referencing issues
-  <https://github.com/sphinx-doc/sphinx/issues/3048>`
-  """
-    for obj in args:
-        obj.__module__ = __name__
-
-
-__appropriate__(GaussianFilter)
-__all__ = [_ for _ in dir() if not _.startswith("_")]
diff --git a/bob/learn/tensorflow/image/filter.py b/bob/learn/tensorflow/image/filter.py
deleted file mode 100644
index 3ac149db3113cf5166d46fe5c4ca80ed11052c2c..0000000000000000000000000000000000000000
--- a/bob/learn/tensorflow/image/filter.py
+++ /dev/null
@@ -1,38 +0,0 @@
-import tensorflow as tf
-
-
-def gaussian_kernel(size: int, mean: float, std: float):
-    """Makes 2D gaussian Kernel for convolution.
-    Code adapted from: https://stackoverflow.com/a/52012658/1286165"""
-
-    d = tf.distributions.Normal(mean, std)
-
-    vals = d.prob(tf.range(start=-size, limit=size + 1, dtype=tf.float32))
-
-    gauss_kernel = tf.einsum("i,j->ij", vals, vals)
-
-    return gauss_kernel / tf.reduce_sum(gauss_kernel)
-
-
-class GaussianFilter:
-    """A class for blurring images"""
-
-    def __init__(self, size=13, mean=0.0, std=3.0, **kwargs):
-        super().__init__(**kwargs)
-        self.size = size
-        self.mean = mean
-        self.std = std
-        self.gauss_kernel = gaussian_kernel(size, mean, std)[:, :, None, None]
-
-    def __call__(self, image):
-        shape = tf.shape(image)
-        image = tf.reshape(image, [-1, shape[-3], shape[-2], shape[-1]])
-        input_channels = shape[-1]
-        gauss_kernel = tf.tile(self.gauss_kernel, [1, 1, input_channels, 1])
-        return tf.nn.depthwise_conv2d(
-            image,
-            gauss_kernel,
-            strides=[1, 1, 1, 1],
-            padding="SAME",
-            data_format="NHWC",
-        )
diff --git a/bob/learn/tensorflow/layers/Maxout.py b/bob/learn/tensorflow/layers/Maxout.py
deleted file mode 100644
index 69c05520a52e2ad31b5703bcee6d928bcb5fc2e2..0000000000000000000000000000000000000000
--- a/bob/learn/tensorflow/layers/Maxout.py
+++ /dev/null
@@ -1,74 +0,0 @@
-#!/usr/bin/env python
-# vim: set fileencoding=utf-8 :
-# @author: Tiago de Freitas Pereira <tiago.pereira@idiap.ch>
-# @date:  Fri 04 Aug 2017 14:14:22 CEST
-
-# MAXOUT IMPLEMENTED FOR TENSORFLOW
-
-from tensorflow.python.layers import base
-import tensorflow as tf
-
-
-def maxout(inputs, num_units, axis=-1, name=None):
-    return Maxout(num_units=num_units, axis=axis, name=name)(inputs)
-
-
-class Maxout(base.Layer):
-    """
-     Adds a maxout op from
-
-    "Maxout Networks"
-
-    Ian J. Goodfellow, David Warde-Farley, Mehdi Mirza, Aaron Courville, Yoshua
-    Bengio
-
-    Usually the operation is performed in the filter/channel dimension. This can also be
-    used after fully-connected layers to reduce number of features.
-
-    **Parameters**
-    inputs: Tensor input
-
-    num_units: Specifies how many features will remain after maxout in the `axis` dimension (usually channel).
-    This must be multiple of number of `axis`.
-
-    axis: The dimension where max pooling will be performed. Default is the
-      last dimension.
-
-    name: Optional scope for name_scope.
-    """
-
-    def __init__(self, num_units, axis=-1, name=None, **kwargs):
-        super(Maxout, self).__init__(name=name, trainable=False, **kwargs)
-        self.axis = axis
-        self.num_units = num_units
-
-    def call(self, inputs, training=False):
-        inputs = tf.convert_to_tensor(inputs)
-        shape = inputs.get_shape().as_list()
-        # Dealing with batches with arbitrary sizes
-        for i in range(len(shape)):
-            if shape[i] is None:
-                shape[i] = tf.shape(inputs)[i]
-
-        num_channels = shape[self.axis]
-        if not isinstance(num_channels, tf.Tensor) and num_channels % self.num_units:
-            raise ValueError(
-                "number of features({}) is not "
-                "a multiple of num_units({})".format(num_channels, self.num_units)
-            )
-
-        if self.axis < 0:
-            axis = self.axis + len(shape)
-        else:
-            axis = self.axis
-        assert axis >= 0, "Find invalid axis: {}".format(self.axis)
-
-        expand_shape = shape[:]
-        expand_shape[axis] = self.num_units
-        k = num_channels // self.num_units
-        expand_shape.insert(axis, k)
-
-        outputs = tf.math.reduce_max(
-            tf.reshape(inputs, expand_shape), axis, keepdims=False
-        )
-        return outputs
diff --git a/bob/learn/tensorflow/layers/__init__.py b/bob/learn/tensorflow/layers/__init__.py
deleted file mode 100644
index a392020c2c59553c47af419005a0e4b27aea5b6f..0000000000000000000000000000000000000000
--- a/bob/learn/tensorflow/layers/__init__.py
+++ /dev/null
@@ -1,24 +0,0 @@
-from .Maxout import Maxout, maxout
-
-
-# gets sphinx autodoc done right - don't remove it
-def __appropriate__(*args):
-    """Says object was actually declared here, an not on the import module.
-
-  Parameters:
-
-    *args: An iterable of objects to modify
-
-  Resolves `Sphinx referencing issues
-  <https://github.com/sphinx-doc/sphinx/issues/3048>`
-  """
-
-    for obj in args:
-        obj.__module__ = __name__
-
-
-__appropriate__(
-    Maxout,
-    maxout,
-)
-__all__ = [_ for _ in dir() if not _.startswith('_')]
diff --git a/bob/learn/tensorflow/loss/BaseLoss.py b/bob/learn/tensorflow/loss/BaseLoss.py
deleted file mode 100644
index 50e83e14a089466c25933301d56504a0ad66ce9b..0000000000000000000000000000000000000000
--- a/bob/learn/tensorflow/loss/BaseLoss.py
+++ /dev/null
@@ -1,104 +0,0 @@
-#!/usr/bin/env python
-# vim: set fileencoding=utf-8 :
-# @author: Tiago de Freitas Pereira <tiago.pereira@idiap.ch>
-
-import logging
-import tensorflow as tf
-logger = logging.getLogger(__name__)
-
-slim = tf.contrib.slim
-
-
-def mean_cross_entropy_loss(logits, labels, add_regularization_losses=True):
-    """
-    Simple CrossEntropy loss.
-    Basically it wrapps the function tf.nn.sparse_softmax_cross_entropy_with_logits.
-
-    **Parameters**
-      logits:
-      labels:
-      add_regularization_losses: Regulize the loss???
-
-    """
-
-    with tf.variable_scope('cross_entropy_loss'):
-        cross_loss = tf.reduce_mean(
-            tf.nn.sparse_softmax_cross_entropy_with_logits(
-                logits=logits, labels=labels),
-            name="cross_entropy_loss")
-
-        tf.summary.scalar('cross_entropy_loss', cross_loss)
-        tf.add_to_collection(tf.GraphKeys.LOSSES, cross_loss)
-
-        if add_regularization_losses:
-            regularization_losses = tf.get_collection(
-                tf.GraphKeys.REGULARIZATION_LOSSES)
-
-            total_loss = tf.add_n(
-                [cross_loss] + regularization_losses, name="total_loss")
-            return total_loss
-        else:
-            return cross_loss
-
-
-def mean_cross_entropy_center_loss(logits,
-                                   prelogits,
-                                   labels,
-                                   n_classes,
-                                   alpha=0.9,
-                                   factor=0.01):
-    """
-    Implementation of the CrossEntropy + Center Loss from the paper
-    "A Discriminative Feature Learning Approach for Deep Face Recognition"(http://ydwen.github.io/papers/WenECCV16.pdf)
-
-    **Parameters**
-      logits:
-      prelogits:
-      labels:
-      n_classes: Number of classes of your task
-      alpha: Alpha factor ((1-alpha)*centers-prelogits)
-      factor: Weight factor of the center loss
-
-    """
-    # Cross entropy
-    with tf.variable_scope('cross_entropy_loss'):
-        cross_loss = tf.reduce_mean(
-            tf.nn.sparse_softmax_cross_entropy_with_logits(
-                logits=logits, labels=labels),
-            name="cross_entropy_loss")
-        tf.add_to_collection(tf.GraphKeys.LOSSES, cross_loss)
-        tf.summary.scalar('loss_cross_entropy', cross_loss)
-
-    # Appending center loss
-    with tf.variable_scope('center_loss'):
-        n_features = prelogits.get_shape()[1]
-
-        centers = tf.get_variable(
-            'centers', [n_classes, n_features],
-            dtype=tf.float32,
-            initializer=tf.constant_initializer(0),
-            trainable=False)
-
-        # label = tf.reshape(labels, [-1])
-        centers_batch = tf.gather(centers, labels)
-        diff = (1 - alpha) * (centers_batch - prelogits)
-        centers = tf.scatter_sub(centers, labels, diff)
-        center_loss = tf.reduce_mean(tf.square(prelogits - centers_batch))
-        tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES,
-                             center_loss * factor)
-        tf.summary.scalar('loss_center', center_loss)
-
-    # Adding the regularizers in the loss
-    with tf.variable_scope('total_loss'):
-        regularization_losses = tf.get_collection(
-            tf.GraphKeys.REGULARIZATION_LOSSES)
-        total_loss = tf.add_n(
-            [cross_loss] + regularization_losses, name="total_loss")
-        tf.add_to_collection(tf.GraphKeys.LOSSES, total_loss)
-        tf.summary.scalar('loss_total', total_loss)
-
-    loss = dict()
-    loss['loss'] = total_loss
-    loss['centers'] = centers
-
-    return loss
diff --git a/bob/learn/tensorflow/loss/ContrastiveLoss.py b/bob/learn/tensorflow/loss/ContrastiveLoss.py
deleted file mode 100644
index 6fa29f1aefe140d6e09de5c876958735f5e5508b..0000000000000000000000000000000000000000
--- a/bob/learn/tensorflow/loss/ContrastiveLoss.py
+++ /dev/null
@@ -1,70 +0,0 @@
-#!/usr/bin/env python
-# vim: set fileencoding=utf-8 :
-# @author: Tiago de Freitas Pereira <tiago.pereira@idiap.ch>
-
-import logging
-import tensorflow as tf
-from bob.learn.tensorflow.utils import compute_euclidean_distance
-
-logger = logging.getLogger(__name__)
-
-
-def contrastive_loss(left_embedding, right_embedding, labels, contrastive_margin=2.0):
-    """
-    Compute the contrastive loss as in
-
-    http://yann.lecun.com/exdb/publis/pdf/hadsell-chopra-lecun-06.pdf
-
-    :math:`L = 0.5 * (1-Y) * D^2 + 0.5 * (Y) * {max(0, margin - D)}^2`
-
-    where, `0` are assign for pairs from the same class and `1` from pairs from different classes.
-
-
-    **Parameters**
-
-    left_feature:
-      First element of the pair
-
-    right_feature:
-      Second element of the pair
-
-    labels:
-      Label of the pair (0 or 1)
-
-    margin:
-      Contrastive margin
-
-    """
-
-    with tf.name_scope("contrastive_loss"):
-        labels = tf.to_float(labels)
-
-        left_embedding = tf.nn.l2_normalize(left_embedding, 1)
-        right_embedding = tf.nn.l2_normalize(right_embedding, 1)
-
-        d = compute_euclidean_distance(left_embedding, right_embedding)
-
-        with tf.name_scope("within_class"):
-            one = tf.constant(1.0)
-            within_class = tf.multiply(one - labels, tf.square(d))  # (1-Y)*(d^2)
-            within_class_loss = tf.reduce_mean(within_class, name="within_class")
-            tf.add_to_collection(tf.GraphKeys.LOSSES, within_class_loss)
-
-        with tf.name_scope("between_class"):
-            max_part = tf.square(tf.maximum(contrastive_margin - d, 0))
-            between_class = tf.multiply(
-                labels, max_part
-            )  # (Y) * max((margin - d)^2, 0)
-            between_class_loss = tf.reduce_mean(between_class, name="between_class")
-            tf.add_to_collection(tf.GraphKeys.LOSSES, between_class_loss)
-
-        with tf.name_scope("total_loss"):
-            loss = 0.5 * (within_class + between_class)
-            loss = tf.reduce_mean(loss, name="contrastive_loss")
-            tf.add_to_collection(tf.GraphKeys.LOSSES, loss)
-
-        tf.summary.scalar("contrastive_loss", loss)
-        tf.summary.scalar("between_class", between_class_loss)
-        tf.summary.scalar("within_class", within_class_loss)
-
-        return loss
diff --git a/bob/learn/tensorflow/loss/StyleLoss.py b/bob/learn/tensorflow/loss/StyleLoss.py
deleted file mode 100644
index 43cc655e291d5634f4cbe2032f6ebcf154b2a465..0000000000000000000000000000000000000000
--- a/bob/learn/tensorflow/loss/StyleLoss.py
+++ /dev/null
@@ -1,96 +0,0 @@
-#!/usr/bin/env python
-# vim: set fileencoding=utf-8 :
-# @author: Tiago de Freitas Pereira <tiago.pereira@idiap.ch>
-
-import logging
-import tensorflow as tf
-import functools
-logger = logging.getLogger(__name__)
-
-
-def content_loss(noises, content_features):
-    """
-
-    Implements the content loss from:
-
-    Gatys, Leon A., Alexander S. Ecker, and Matthias Bethge. "A neural algorithm of artistic style." arXiv preprint arXiv:1508.06576 (2015).
-
-    For a given noise signal :math:`n`, content image :math:`c` and convolved with the DCNN :math:`\phi` until the layer :math:`l` the content loss is defined as:
-
-    :math:`L(n,c) = \sum_{l=?}^{?}({\phi^l(n) - \phi^l(c)})^2`
-
-
-    Parameters
-    ----------
-
-     noises: :any:`list`
-        A list of tf.Tensor containing all the noises convolved
-
-     content_features: :any:`list`
-        A list of numpy.array containing all the content_features convolved
-
-    """
-
-    content_losses = []
-    for n,c in zip(noises, content_features):
-        content_losses.append((2 * tf.nn.l2_loss(n - c) / c.size))
-    return functools.reduce(tf.add, content_losses)
-
-
-def linear_gram_style_loss(noises, gram_style_features):
-    """
-
-    Implements the style loss from:
-
-    Gatys, Leon A., Alexander S. Ecker, and Matthias Bethge. "A neural algorithm of artistic style." arXiv preprint arXiv:1508.06576 (2015).
-
-    For a given noise signal :math:`n`, content image :math:`c` and convolved with the DCNN :math:`\phi` until the layer :math:`l` the STYLE loss is defined as
-
-    :math:`L(n,c) = \\sum_{l=?}^{?}\\frac{({\phi^l(n)^T*\\phi^l(n) - \\phi^l(c)^T*\\phi^l(c)})^2}{N*M}`
-
-
-    Parameters
-    ----------
-
-     noises: :any:`list`
-        A list of tf.Tensor containing all the noises convolved
-
-     gram_style_features: :any:`list`
-        A list of numpy.array containing all the content_features convolved
-
-    """
-
-    style_losses = []
-    for n,s in zip(noises, gram_style_features):
-        style_losses.append((2 * tf.nn.l2_loss(n - s)) / s.size)
-
-    return functools.reduce(tf.add, style_losses)
-
-
-
-def denoising_loss(noise):
-    """
-    Computes the denoising loss as in:
-
-    Gatys, Leon A., Alexander S. Ecker, and Matthias Bethge. "A neural algorithm of artistic style." arXiv preprint arXiv:1508.06576 (2015).
-
-    Parameters
-    ----------
-
-       noise:
-          Input noise
-
-    """
-    def _tensor_size(tensor):
-        from operator import mul
-        return functools.reduce(mul, (d.value for d in tensor.get_shape()), 1)
-
-    shape = noise.get_shape().as_list()
-
-    noise_y_size = _tensor_size(noise[:,1:,:,:])
-    noise_x_size = _tensor_size(noise[:,:,1:,:])
-    denoise_loss = 2 * ( (tf.nn.l2_loss(noise[:,1:,:,:] - noise[:,:shape[1]-1,:,:]) / noise_y_size) +
-                    (tf.nn.l2_loss(noise[:,:,1:,:] - noise[:,:,:shape[2]-1,:]) / noise_x_size))
-
-    return denoise_loss
-
diff --git a/bob/learn/tensorflow/loss/TripletLoss.py b/bob/learn/tensorflow/loss/TripletLoss.py
deleted file mode 100644
index d2616d6aa0394f1ea024b65b9199292221951ca4..0000000000000000000000000000000000000000
--- a/bob/learn/tensorflow/loss/TripletLoss.py
+++ /dev/null
@@ -1,180 +0,0 @@
-#!/usr/bin/env python
-# vim: set fileencoding=utf-8 :
-# @author: Tiago de Freitas Pereira <tiago.pereira@idiap.ch>
-
-import logging
-logger = logging.getLogger(__name__)
-import tensorflow as tf
-
-from bob.learn.tensorflow.utils import compute_euclidean_distance
-
-
-def triplet_loss(anchor_embedding,
-                 positive_embedding,
-                 negative_embedding,
-                 margin=5.0):
-    """
-    Compute the triplet loss as in
-
-    Schroff, Florian, Dmitry Kalenichenko, and James Philbin.
-    "Facenet: A unified embedding for face recognition and clustering."
-    Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. 2015.
-
-    :math:`L  = sum(  |f_a - f_p|^2 - |f_a - f_n|^2  + \lambda)`
-
-    **Parameters**
-
-    left_feature:
-      First element of the pair
-
-    right_feature:
-      Second element of the pair
-
-    label:
-      Label of the pair (0 or 1)
-
-    margin:
-      Contrastive margin
-
-    """
-
-    with tf.name_scope("triplet_loss"):
-        # Normalize
-        anchor_embedding = tf.nn.l2_normalize(
-            anchor_embedding, 1, 1e-10, name="anchor")
-        positive_embedding = tf.nn.l2_normalize(
-            positive_embedding, 1, 1e-10, name="positive")
-        negative_embedding = tf.nn.l2_normalize(
-            negative_embedding, 1, 1e-10, name="negative")
-
-        d_positive = tf.reduce_sum(
-            tf.square(tf.subtract(anchor_embedding, positive_embedding)), 1)
-        d_negative = tf.reduce_sum(
-            tf.square(tf.subtract(anchor_embedding, negative_embedding)), 1)
-
-        basic_loss = tf.add(tf.subtract(d_positive, d_negative), margin)
-
-        with tf.name_scope("TripletLoss"):
-            # Between
-            between_class_loss = tf.reduce_mean(d_negative)
-            tf.summary.scalar('loss_between_class', between_class_loss)
-            tf.add_to_collection(tf.GraphKeys.LOSSES, between_class_loss)
-
-            # Within
-            within_class_loss = tf.reduce_mean(d_positive)
-            tf.summary.scalar('loss_within_class', within_class_loss)
-            tf.add_to_collection(tf.GraphKeys.LOSSES, within_class_loss)
-
-            # Total loss
-            loss = tf.reduce_mean(
-                tf.maximum(basic_loss, 0.0), 0, name="total_loss")
-            tf.add_to_collection(tf.GraphKeys.LOSSES, loss)
-            tf.summary.scalar('loss_triplet', loss)
-
-        return loss
-
-
-def triplet_fisher_loss(anchor_embedding, positive_embedding,
-                        negative_embedding):
-
-    with tf.name_scope("triplet_loss"):
-        # Normalize
-        anchor_embedding = tf.nn.l2_normalize(
-            anchor_embedding, 1, 1e-10, name="anchor")
-        positive_embedding = tf.nn.l2_normalize(
-            positive_embedding, 1, 1e-10, name="positive")
-        negative_embedding = tf.nn.l2_normalize(
-            negative_embedding, 1, 1e-10, name="negative")
-
-        average_class = tf.reduce_mean(anchor_embedding, 0)
-        average_total = tf.div(tf.add(tf.reduce_mean(anchor_embedding, axis=0),\
-                        tf.reduce_mean(negative_embedding, axis=0)), 2)
-
-        length = anchor_embedding.get_shape().as_list()[0]
-        dim = anchor_embedding.get_shape().as_list()[1]
-        split_positive = tf.unstack(positive_embedding, num=length, axis=0)
-        split_negative = tf.unstack(negative_embedding, num=length, axis=0)
-
-        Sw = None
-        Sb = None
-        for s in zip(split_positive, split_negative):
-            positive = s[0]
-            negative = s[1]
-
-            buffer_sw = tf.reshape(
-                tf.subtract(positive, average_class), shape=(dim, 1))
-            buffer_sw = tf.matmul(buffer_sw,
-                                  tf.reshape(buffer_sw, shape=(1, dim)))
-
-            buffer_sb = tf.reshape(
-                tf.subtract(negative, average_total), shape=(dim, 1))
-            buffer_sb = tf.matmul(buffer_sb,
-                                  tf.reshape(buffer_sb, shape=(1, dim)))
-
-            if Sw is None:
-                Sw = buffer_sw
-                Sb = buffer_sb
-            else:
-                Sw = tf.add(Sw, buffer_sw)
-                Sb = tf.add(Sb, buffer_sb)
-
-        # Sw = tf.trace(Sw)
-        # Sb = tf.trace(Sb)
-        #loss = tf.trace(tf.div(Sb, Sw))
-        loss = tf.trace(tf.div(Sw, Sb), name=tf.GraphKeys.LOSSES)
-
-        return loss, tf.trace(Sb), tf.trace(Sw)
-
-
-def triplet_average_loss(anchor_embedding,
-                         positive_embedding,
-                         negative_embedding,
-                         margin=5.0):
-    """
-    Compute the triplet loss as in
-
-    Schroff, Florian, Dmitry Kalenichenko, and James Philbin.
-    "Facenet: A unified embedding for face recognition and clustering."
-    Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. 2015.
-
-    :math:`L  = sum(  |f_a - f_p|^2 - |f_a - f_n|^2  + \lambda)`
-
-    **Parameters**
-
-    left_feature:
-      First element of the pair
-
-    right_feature:
-      Second element of the pair
-
-    label:
-      Label of the pair (0 or 1)
-
-    margin:
-      Contrastive margin
-
-    """
-
-    with tf.name_scope("triplet_loss"):
-        # Normalize
-        anchor_embedding = tf.nn.l2_normalize(
-            anchor_embedding, 1, 1e-10, name="anchor")
-        positive_embedding = tf.nn.l2_normalize(
-            positive_embedding, 1, 1e-10, name="positive")
-        negative_embedding = tf.nn.l2_normalize(
-            negative_embedding, 1, 1e-10, name="negative")
-
-        anchor_mean = tf.reduce_mean(anchor_embedding, 0)
-
-        d_positive = tf.reduce_sum(
-            tf.square(tf.subtract(anchor_mean, positive_embedding)), 1)
-        d_negative = tf.reduce_sum(
-            tf.square(tf.subtract(anchor_mean, negative_embedding)), 1)
-
-        basic_loss = tf.add(tf.subtract(d_positive, d_negative), margin)
-        loss = tf.reduce_mean(
-            tf.maximum(basic_loss, 0.0), 0, name=tf.GraphKeys.LOSSES)
-
-        return loss, tf.reduce_mean(d_negative), tf.reduce_mean(d_positive)
-
-
diff --git a/bob/learn/tensorflow/loss/__init__.py b/bob/learn/tensorflow/loss/__init__.py
deleted file mode 100644
index 7d3937ffcf1364387480e5fdfba054543571f2ed..0000000000000000000000000000000000000000
--- a/bob/learn/tensorflow/loss/__init__.py
+++ /dev/null
@@ -1,39 +0,0 @@
-from .BaseLoss import mean_cross_entropy_loss, mean_cross_entropy_center_loss
-from .ContrastiveLoss import contrastive_loss
-from .TripletLoss import triplet_loss, triplet_average_loss, triplet_fisher_loss
-from .StyleLoss import linear_gram_style_loss, content_loss, denoising_loss
-from .vat import VATLoss
-from .pixel_wise import PixelWise
-from .center_loss import CenterLoss
-from .mmd import *
-from .pairwise_confusion import total_pairwise_confusion
-from .utils import *
-
-
-# gets sphinx autodoc done right - don't remove it
-def __appropriate__(*args):
-    """Says object was actually declared here, an not on the import module.
-
-  Parameters:
-
-    *args: An iterable of objects to modify
-
-  Resolves `Sphinx referencing issues
-  <https://github.com/sphinx-doc/sphinx/issues/3048>`
-  """
-
-    for obj in args:
-        obj.__module__ = __name__
-
-
-__appropriate__(
-    mean_cross_entropy_loss,
-    mean_cross_entropy_center_loss,
-    contrastive_loss,
-    triplet_loss,
-    triplet_average_loss,
-    triplet_fisher_loss,
-    VATLoss,
-    PixelWise,
-)
-__all__ = [_ for _ in dir() if not _.startswith("_")]
diff --git a/bob/learn/tensorflow/loss/center_loss.py b/bob/learn/tensorflow/loss/center_loss.py
deleted file mode 100644
index 00494387c11fb4c13bacc0b4d43e374f34ae7b01..0000000000000000000000000000000000000000
--- a/bob/learn/tensorflow/loss/center_loss.py
+++ /dev/null
@@ -1,39 +0,0 @@
-import tensorflow as tf
-
-# TODO(amir): replace parent class with tf.Module in tensorflow 1.14 and above.
-# * pass ``name`` to parent class
-# * replace get_variable with tf.Variable
-# * replace variable_scope with name_scope
-class CenterLoss:
-    """Center loss."""
-
-    def __init__(self, n_classes, n_features, alpha=0.9, name="center_loss", **kwargs):
-        super().__init__(**kwargs)
-        self.n_classes = n_classes
-        self.n_features = n_features
-        self.alpha = alpha
-        self.name = name
-        with tf.variable_scope(self.name):
-            self.centers = tf.get_variable(
-                "centers",
-                [n_classes, n_features],
-                dtype=tf.float32,
-                initializer=tf.constant_initializer(0.),
-                trainable=False,
-            )
-
-    def __call__(self, sparse_labels, prelogits):
-        with tf.name_scope(self.name):
-            centers_batch = tf.gather(self.centers, sparse_labels)
-            diff = (1 - self.alpha) * (centers_batch - prelogits)
-            self.centers_update_op = tf.scatter_sub(self.centers, sparse_labels, diff)
-            center_loss = tf.reduce_mean(tf.square(prelogits - centers_batch))
-        tf.summary.scalar("loss_center", center_loss)
-        # Add histogram for all centers
-        for i in range(self.n_classes):
-            tf.summary.histogram(f"center_{i}", self.centers[i])
-        return center_loss
-
-    @property
-    def update_ops(self):
-        return [self.centers_update_op]
diff --git a/bob/learn/tensorflow/loss/epsc.py b/bob/learn/tensorflow/loss/epsc.py
deleted file mode 100644
index cfadb012ca54b73d92ffbab3ab63faaf4345db65..0000000000000000000000000000000000000000
--- a/bob/learn/tensorflow/loss/epsc.py
+++ /dev/null
@@ -1,178 +0,0 @@
-import tensorflow as tf
-import bob.measure
-import numpy
-from tensorflow.python.ops.metrics_impl import metric_variable
-from ..utils import norm, predict_using_tensors
-from .ContrastiveLoss import contrastive_loss
-
-
-def logits_loss(
-    bio_logits, pad_logits, bio_labels, pad_labels, bio_loss, pad_loss, alpha=0.5
-):
-
-    with tf.name_scope("Bio_loss"):
-        bio_loss_ = bio_loss(logits=bio_logits, labels=bio_labels)
-
-    with tf.name_scope("PAD_loss"):
-        pad_loss_ = pad_loss(
-            logits=pad_logits, labels=tf.cast(pad_labels, dtype="int32")
-        )
-
-    with tf.name_scope("EPSC_loss"):
-        total_loss = (1 - alpha) * bio_loss_ + alpha * pad_loss_
-
-    tf.add_to_collection(tf.GraphKeys.LOSSES, bio_loss_)
-    tf.add_to_collection(tf.GraphKeys.LOSSES, pad_loss_)
-    tf.add_to_collection(tf.GraphKeys.LOSSES, total_loss)
-
-    tf.summary.scalar("bio_loss", bio_loss_)
-    tf.summary.scalar("pad_loss", pad_loss_)
-    tf.summary.scalar("epsc_loss", total_loss)
-
-    return total_loss
-
-
-def embedding_norm_loss(prelogits_left, prelogits_right, b, c, margin=10.0):
-    with tf.name_scope("embedding_norm_loss"):
-        prelogits_left = norm(prelogits_left)
-        prelogits_right = norm(prelogits_right)
-
-        loss = tf.add_n(
-            [
-                tf.reduce_mean(b * (tf.maximum(prelogits_left - margin, 0))),
-                tf.reduce_mean((1 - b) * (tf.maximum(2 * margin - prelogits_left, 0))),
-                tf.reduce_mean(c * (tf.maximum(prelogits_right - margin, 0))),
-                tf.reduce_mean((1 - c) * (tf.maximum(2 * margin - prelogits_right, 0))),
-            ],
-            name="embedding_norm_loss",
-        )
-        tf.add_to_collection(tf.GraphKeys.LOSSES, loss)
-        tf.summary.scalar("embedding_norm_loss", loss)
-        # log norm of embeddings for BF and PA separately to see how their norm
-        # evolves over time
-        bf_norm = tf.concat(
-            [
-                tf.gather(prelogits_left, tf.where(b > 0.5)),
-                tf.gather(prelogits_right, tf.where(c > 0.5)),
-            ],
-            axis=0,
-        )
-        pa_norm = tf.concat(
-            [
-                tf.gather(prelogits_left, tf.where(b < 0.5)),
-                tf.gather(prelogits_right, tf.where(c < 0.5)),
-            ],
-            axis=0,
-        )
-        tf.summary.histogram("BF_embeddings_norm", bf_norm)
-        tf.summary.histogram("PA_embeddings_norm", pa_norm)
-    return loss
-
-
-def siamese_loss(bio_logits, pad_logits, bio_labels, pad_labels, alpha=0.1):
-    # prepare a, b, c
-    with tf.name_scope("epsc_labels"):
-        a = tf.to_float(
-            tf.math.equal(bio_labels["left"], bio_labels["right"]), name="a"
-        )
-        b = tf.to_float(tf.math.equal(pad_labels["left"], True), name="b")
-        c = tf.to_float(tf.math.equal(pad_labels["right"], True), name="c")
-        tf.summary.scalar("Mean_a", tf.reduce_mean(a))
-        tf.summary.scalar("Mean_b", tf.reduce_mean(b))
-        tf.summary.scalar("Mean_c", tf.reduce_mean(c))
-
-    prelogits_left = bio_logits["left"]
-    prelogits_right = bio_logits["right"]
-
-    bio_loss = contrastive_loss(prelogits_left, prelogits_right, labels=1 - a)
-
-    pad_loss = alpha * embedding_norm_loss(prelogits_left, prelogits_right, b, c)
-
-    with tf.name_scope("epsc_loss"):
-        epsc_loss = (1 - alpha) * bio_loss + alpha * pad_loss
-        tf.add_to_collection(tf.GraphKeys.LOSSES, epsc_loss)
-
-    tf.summary.scalar("epsc_loss", epsc_loss)
-
-    return epsc_loss
-
-
-def py_eer(negatives, positives):
-    def _eer(neg, pos):
-        if neg.size == 0 or pos.size == 0:
-            return numpy.array(0.0, dtype="float64")
-        return bob.measure.eer(neg, pos)
-
-    negatives = tf.reshape(tf.cast(negatives, "float64"), [-1])
-    positives = tf.reshape(tf.cast(positives, "float64"), [-1])
-
-    eer = tf.py_func(_eer, [negatives, positives], tf.float64, name="py_eer")
-
-    return tf.cast(eer, "float32")
-
-
-def epsc_metric(
-    bio_embeddings,
-    pad_probabilities,
-    bio_labels,
-    pad_labels,
-    batch_size,
-    pad_threshold=numpy.exp(-15),
-):
-    # math.exp(-2.0) = 0.1353352832366127
-    # math.exp(-15.0) = 3.059023205018258e-07
-    with tf.name_scope("epsc_metrics"):
-        bio_predictions_op = predict_using_tensors(
-            bio_embeddings, bio_labels, num=batch_size
-        )
-
-        # find the lowest value of bf and highest value of pa
-        # their mean is the threshold
-        # bf_probabilities = tf.gather(pad_probabilities, tf.where(pad_labels))
-        # pa_probabilities = tf.gather(pad_probabilities, tf.where(tf.logical_not(pad_labels)))
-
-        # eer = py_eer(pa_probabilities, bf_probabilities)
-        # acc = 1 - eer
-
-        # pad_threshold = (tf.reduce_max(pa_probabilities) + tf.reduce_min(bf_probabilities)) / 2
-        # true_positives = tf.reduce_sum(tf.to_int32(bf_probabilities >= pad_threshold))
-        # true_negatives = tf.reduce_sum(tf.to_int32(pa_probabilities < pad_threshold))
-        # # pad_accuracy = metric_variable([], tf.float32, name='pad_accuracy')
-        # acc = (true_positives + true_negatives) / batch_size
-
-        # pad_accuracy, pad_update_ops = tf.metrics.mean(acc)
-
-        # print_ops = [
-        #     tf.print(pad_probabilities),
-        #     tf.print(bf_probabilities, pa_probabilities),
-        #     tf.print(pad_threshold),
-        #     tf.print(true_positives, true_negatives),
-        #     tf.print(pad_probabilities.shape[0]),
-        #     tf.print(acc),
-        # ]
-        # update_op = tf.assign_add(pad_accuracy, tf.cast(acc, tf.float32))
-        # update_op = tf.group([update_op] + print_ops)
-
-        tp = tf.metrics.true_positives_at_thresholds(
-            pad_labels, pad_probabilities, [pad_threshold]
-        )
-        fp = tf.metrics.false_positives_at_thresholds(
-            pad_labels, pad_probabilities, [pad_threshold]
-        )
-        tn = tf.metrics.true_negatives_at_thresholds(
-            pad_labels, pad_probabilities, [pad_threshold]
-        )
-        fn = tf.metrics.false_negatives_at_thresholds(
-            pad_labels, pad_probabilities, [pad_threshold]
-        )
-        pad_accuracy = (tp[0] + tn[0]) / (tp[0] + tn[0] + fp[0] + fn[0])
-        pad_accuracy = tf.reduce_mean(pad_accuracy)
-        pad_update_ops = tf.group([x[1] for x in (tp, tn, fp, fn)])
-
-        eval_metric_ops = {
-            "bio_accuracy": tf.metrics.accuracy(
-                labels=bio_labels, predictions=bio_predictions_op
-            ),
-            "pad_accuracy": (pad_accuracy, pad_update_ops),
-        }
-    return eval_metric_ops
diff --git a/bob/learn/tensorflow/loss/mmd.py b/bob/learn/tensorflow/loss/mmd.py
deleted file mode 100644
index 2933d7b1d3cd32b7533c2fa5213e38eeb7192965..0000000000000000000000000000000000000000
--- a/bob/learn/tensorflow/loss/mmd.py
+++ /dev/null
@@ -1,32 +0,0 @@
-import tensorflow as tf
-
-
-def compute_kernel(x, y):
-    """Gaussian kernel.
-    """
-    x_size = tf.shape(x)[0]
-    y_size = tf.shape(y)[0]
-    dim = tf.shape(x)[1]
-    tiled_x = tf.tile(
-        tf.reshape(x, tf.stack([x_size, 1, dim])), tf.stack([1, y_size, 1])
-    )
-    tiled_y = tf.tile(
-        tf.reshape(y, tf.stack([1, y_size, dim])), tf.stack([x_size, 1, 1])
-    )
-    return tf.exp(
-        -tf.reduce_mean(tf.square(tiled_x - tiled_y), axis=2) / tf.cast(dim, tf.float32)
-    )
-
-
-def mmd(x, y):
-    """Maximum Mean Discrepancy with Gaussian kernel.
-    See: https://stats.stackexchange.com/a/276618/49433
-    """
-    x_kernel = compute_kernel(x, x)
-    y_kernel = compute_kernel(y, y)
-    xy_kernel = compute_kernel(x, y)
-    return (
-        tf.reduce_mean(x_kernel)
-        + tf.reduce_mean(y_kernel)
-        - 2 * tf.reduce_mean(xy_kernel)
-    )
diff --git a/bob/learn/tensorflow/loss/pairwise_confusion.py b/bob/learn/tensorflow/loss/pairwise_confusion.py
deleted file mode 100644
index 155b1a299625283ade232af1c84d13e146cceff1..0000000000000000000000000000000000000000
--- a/bob/learn/tensorflow/loss/pairwise_confusion.py
+++ /dev/null
@@ -1,16 +0,0 @@
-import tensorflow as tf
-from ..utils import pdist_safe, upper_triangle
-
-def total_pairwise_confusion(prelogits, name=None):
-    """Total Pairwise Confusion Loss
-
-        [1]X. Tu et al., “Learning Generalizable and Identity-Discriminative
-        Representations for Face Anti-Spoofing,” arXiv preprint arXiv:1901.05602, 2019.
-    """
-    # compute L2 norm between all prelogits and sum them.
-    with tf.name_scope(name, default_name="total_pairwise_confusion"):
-        prelogits = tf.reshape(prelogits, (tf.shape(prelogits)[0], -1))
-        loss_tpc = tf.reduce_mean(upper_triangle(pdist_safe(prelogits)))
-
-    tf.summary.scalar("loss_tpc", loss_tpc)
-    return loss_tpc
diff --git a/bob/learn/tensorflow/loss/pixel_wise.py b/bob/learn/tensorflow/loss/pixel_wise.py
deleted file mode 100644
index b34695045c20273bdc0063a928ceb723324eca6d..0000000000000000000000000000000000000000
--- a/bob/learn/tensorflow/loss/pixel_wise.py
+++ /dev/null
@@ -1,63 +0,0 @@
-from ..dataset import tf_repeat
-from .utils import (
-    balanced_softmax_cross_entropy_loss_weights,
-    balanced_sigmoid_cross_entropy_loss_weights,
-)
-import tensorflow as tf
-
-
-class PixelWise:
-    """A pixel wise loss which is just a cross entropy loss but applied to all pixels"""
-
-    def __init__(
-        self, balance_weights=True, n_one_hot_labels=None, label_smoothing=0.5, **kwargs
-    ):
-        super(PixelWise, self).__init__(**kwargs)
-        self.balance_weights = balance_weights
-        self.n_one_hot_labels = n_one_hot_labels
-        self.label_smoothing = label_smoothing
-
-    def __call__(self, labels, logits):
-        with tf.name_scope("PixelWiseLoss"):
-            flatten = tf.keras.layers.Flatten()
-            logits = flatten(logits)
-            n_pixels = logits.get_shape()[-1]
-            weights = 1.0
-            if self.balance_weights and self.n_one_hot_labels:
-                # use labels to figure out the required loss
-                weights = balanced_softmax_cross_entropy_loss_weights(
-                    labels, dtype=logits.dtype
-                )
-                # repeat weights for all pixels
-                weights = tf_repeat(weights[:, None], [1, n_pixels])
-                weights = tf.reshape(weights, (-1,))
-            elif self.balance_weights and not self.n_one_hot_labels:
-                # use labels to figure out the required loss
-                weights = balanced_sigmoid_cross_entropy_loss_weights(
-                    labels, dtype=logits.dtype
-                )
-                # repeat weights for all pixels
-                weights = tf_repeat(weights[:, None], [1, n_pixels])
-
-            if self.n_one_hot_labels:
-                labels = tf_repeat(labels, [n_pixels, 1])
-                labels = tf.reshape(labels, (-1, self.n_one_hot_labels))
-                # reshape logits too as softmax_cross_entropy is buggy and cannot really
-                # handle higher dimensions
-                logits = tf.reshape(logits, (-1, self.n_one_hot_labels))
-                loss_fn = tf.losses.softmax_cross_entropy
-            else:
-                labels = tf.reshape(labels, (-1, 1))
-                labels = tf_repeat(labels, [n_pixels, 1])
-                labels = tf.reshape(labels, (-1, n_pixels))
-                loss_fn = tf.losses.sigmoid_cross_entropy
-
-            loss_pixel_wise = loss_fn(
-                labels,
-                logits=logits,
-                weights=weights,
-                label_smoothing=self.label_smoothing,
-                reduction=tf.losses.Reduction.MEAN,
-            )
-        tf.summary.scalar("loss_pixel_wise", loss_pixel_wise)
-        return loss_pixel_wise
diff --git a/bob/learn/tensorflow/loss/utils.py b/bob/learn/tensorflow/loss/utils.py
deleted file mode 100644
index 8c0e9eeaa7d1f059216f441017469c2701531426..0000000000000000000000000000000000000000
--- a/bob/learn/tensorflow/loss/utils.py
+++ /dev/null
@@ -1,144 +0,0 @@
-#!/usr/bin/env python
-# vim: set fileencoding=utf-8 :
-# @author: Amir Mohammadi <amir.mohammadi@idiap.ch>
-
-import tensorflow as tf
-
-
-def balanced_softmax_cross_entropy_loss_weights(labels, dtype="float32"):
-    """Computes weights that normalizes your loss per class.
-
-    Labels must be a batch of one-hot encoded labels. The function takes labels and
-    computes the weights per batch. Weights will be smaller for classes that have more
-    samples in this batch. This is useful if you unbalanced classes in your dataset or
-    batch.
-
-    Parameters
-    ----------
-    labels : ``tf.Tensor``
-        Labels of your current input. The shape must be [batch_size, n_classes]. If your
-        labels are not one-hot encoded, you can use ``tf.one_hot`` to convert them first
-        before giving them to this function.
-    dtype : ``tf.dtype``
-        The dtype that weights will have. It should be float. Best is to provide
-        logits.dtype as input.
-
-    Returns
-    -------
-    ``tf.Tensor``
-        Computed weights that will cancel your dataset imbalance per batch.
-
-    Examples
-    --------
-    >>> import numpy
-    >>> import tensorflow as tf
-    >>> from bob.learn.tensorflow.loss import balanced_softmax_cross_entropy_loss_weights
-    >>> labels = numpy.array([[1, 0, 0],
-    ...                 [1, 0, 0],
-    ...                 [0, 0, 1],
-    ...                 [0, 1, 0],
-    ...                 [0, 0, 1],
-    ...                 [1, 0, 0],
-    ...                 [1, 0, 0],
-    ...                 [0, 0, 1],
-    ...                 [1, 0, 0],
-    ...                 [1, 0, 0],
-    ...                 [1, 0, 0],
-    ...                 [1, 0, 0],
-    ...                 [1, 0, 0],
-    ...                 [1, 0, 0],
-    ...                 [0, 1, 0],
-    ...                 [1, 0, 0],
-    ...                 [0, 1, 0],
-    ...                 [1, 0, 0],
-    ...                 [0, 0, 1],
-    ...                 [0, 0, 1],
-    ...                 [1, 0, 0],
-    ...                 [0, 0, 1],
-    ...                 [1, 0, 0],
-    ...                 [1, 0, 0],
-    ...                 [0, 1, 0],
-    ...                 [1, 0, 0],
-    ...                 [1, 0, 0],
-    ...                 [1, 0, 0],
-    ...                 [0, 1, 0],
-    ...                 [1, 0, 0],
-    ...                 [0, 0, 1],
-    ...                 [1, 0, 0]], dtype="int32")
-    >>> session = tf.Session() # Eager execution is also possible check https://www.tensorflow.org/guide/eager
-    >>> session.run(tf.reduce_sum(labels, axis=0))
-    array([20,  5,  7], dtype=int32)
-    >>> session.run(balanced_softmax_cross_entropy_loss_weights(labels, dtype='float32'))
-    array([0.53333336, 0.53333336, 1.5238096 , 2.1333334 , 1.5238096 ,
-           0.53333336, 0.53333336, 1.5238096 , 0.53333336, 0.53333336,
-           0.53333336, 0.53333336, 0.53333336, 0.53333336, 2.1333334 ,
-           0.53333336, 2.1333334 , 0.53333336, 1.5238096 , 1.5238096 ,
-           0.53333336, 1.5238096 , 0.53333336, 0.53333336, 2.1333334 ,
-           0.53333336, 0.53333336, 0.53333336, 2.1333334 , 0.53333336,
-           1.5238096 , 0.53333336], dtype=float32)
-
-    You would use it like this:
-
-    >>> #weights = balanced_softmax_cross_entropy_loss_weights(labels, dtype=logits.dtype)
-    >>> #loss = tf.losses.softmax_cross_entropy(logits=logits, labels=labels, weights=weights)
-    """
-    shape = tf.cast(tf.shape(labels), dtype=dtype)
-    batch_size, n_classes = shape[0], shape[1]
-    weights = tf.cast(tf.reduce_sum(labels, axis=0), dtype=dtype)
-    weights = batch_size / weights / n_classes
-    weights = tf.gather(weights, tf.argmax(labels, axis=1))
-    return weights
-
-
-def balanced_sigmoid_cross_entropy_loss_weights(labels, dtype="float32"):
-    """Computes weights that normalizes your loss per class.
-
-    Labels must be a batch of binary labels. The function takes labels and
-    computes the weights per batch. Weights will be smaller for the class that have more
-    samples in this batch. This is useful if you unbalanced classes in your dataset or
-    batch.
-
-    Parameters
-    ----------
-    labels : ``tf.Tensor``
-        Labels of your current input. The shape must be [batch_size] and values must be
-        either 0 or 1.
-    dtype : ``tf.dtype``
-        The dtype that weights will have. It should be float. Best is to provide
-        logits.dtype as input.
-
-    Returns
-    -------
-    ``tf.Tensor``
-        Computed weights that will cancel your dataset imbalance per batch.
-
-    Examples
-    --------
-    >>> import numpy
-    >>> import tensorflow as tf
-    >>> from bob.learn.tensorflow.loss import balanced_sigmoid_cross_entropy_loss_weights
-    >>> labels = numpy.array([1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0,
-    ...                 1, 1, 0, 1, 1, 1, 0, 1, 0, 1], dtype="int32")
-    >>> sum(labels), len(labels)
-    (20, 32)
-    >>> session = tf.Session() # Eager execution is also possible check https://www.tensorflow.org/guide/eager
-    >>> session.run(balanced_sigmoid_cross_entropy_loss_weights(labels, dtype='float32'))
-    array([0.8      , 0.8      , 1.3333334, 1.3333334, 1.3333334, 0.8      ,
-           0.8      , 1.3333334, 0.8      , 0.8      , 0.8      , 0.8      ,
-           0.8      , 0.8      , 1.3333334, 0.8      , 1.3333334, 0.8      ,
-           1.3333334, 1.3333334, 0.8      , 1.3333334, 0.8      , 0.8      ,
-           1.3333334, 0.8      , 0.8      , 0.8      , 1.3333334, 0.8      ,
-           1.3333334, 0.8      ], dtype=float32)
-
-    You would use it like this:
-
-    >>> #weights = balanced_sigmoid_cross_entropy_loss_weights(labels, dtype=logits.dtype)
-    >>> #loss = tf.losses.sigmoid_cross_entropy(logits=logits, labels=labels, weights=weights)
-    """
-    labels = tf.cast(labels, dtype='int32')
-    batch_size = tf.cast(tf.shape(labels)[0], dtype=dtype)
-    weights = tf.cast(tf.reduce_sum(labels), dtype=dtype)
-    weights = tf.convert_to_tensor([batch_size - weights, weights])
-    weights = batch_size / weights / 2
-    weights = tf.gather(weights, labels)
-    return weights
diff --git a/bob/learn/tensorflow/loss/vat.py b/bob/learn/tensorflow/loss/vat.py
deleted file mode 100644
index b48f4f8918287a68e71f1647d22cbe1a4a0d2c52..0000000000000000000000000000000000000000
--- a/bob/learn/tensorflow/loss/vat.py
+++ /dev/null
@@ -1,144 +0,0 @@
-# Adapted from https://github.com/takerum/vat_tf Its license:
-#
-# MIT License
-#
-# Copyright (c) 2017 Takeru Miyato
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# SOFTWARE.
-
-
-import tensorflow as tf
-from functools import partial
-
-
-def get_normalized_vector(d):
-    d /= (1e-12 + tf.reduce_max(tf.abs(d), list(range(1, len(d.get_shape()))), keepdims=True))
-    d /= tf.sqrt(1e-6 + tf.reduce_sum(tf.pow(d, 2.0), list(range(1, len(d.get_shape()))), keepdims=True))
-    return d
-
-
-def logsoftmax(x):
-    xdev = x - tf.reduce_max(x, 1, keepdims=True)
-    lsm = xdev - tf.log(tf.reduce_sum(tf.exp(xdev), 1, keepdims=True))
-    return lsm
-
-
-def kl_divergence_with_logit(q_logit, p_logit):
-    q = tf.nn.softmax(q_logit)
-    qlogq = tf.reduce_mean(tf.reduce_sum(q * logsoftmax(q_logit), 1))
-    qlogp = tf.reduce_mean(tf.reduce_sum(q * logsoftmax(p_logit), 1))
-    return qlogq - qlogp
-
-
-def entropy_y_x(logit):
-    p = tf.nn.softmax(logit)
-    return -tf.reduce_mean(tf.reduce_sum(p * logsoftmax(logit), 1))
-
-
-class VATLoss:
-    """A class to hold parameters for Virtual Adversarial Training (VAT) Loss
-    and perform it.
-
-    Attributes
-    ----------
-    epsilon : float
-        norm length for (virtual) adversarial training
-    method : str
-        The method for calculating the loss: ``vatent`` for VAT loss + entropy
-        and ``vat`` for only VAT loss.
-    num_power_iterations : int
-        the number of power iterations
-    xi : float
-        small constant for finite difference
-    """
-
-    def __init__(self, epsilon=8.0, xi=1e-6, num_power_iterations=1, method='vatent', **kwargs):
-        super(VATLoss, self).__init__(**kwargs)
-        self.epsilon = epsilon
-        self.xi = xi
-        self.num_power_iterations = num_power_iterations
-        self.method = method
-
-    def __call__(self, features, logits, architecture, mode):
-        """Computes the VAT loss for unlabeled features.
-        If you are doing semi-supervised learning, only pass the unlabeled
-        features and their logits here.
-
-        Parameters
-        ----------
-        features : object
-            Tensor representing the (unlabeled) features
-        logits : object
-            Tensor representing the logits of (unlabeled) features.
-        architecture : object
-            A callable that constructs the model. It should accept ``mode`` and
-            ``reuse`` as keyword arguments. The features will be given as the
-            first input.
-        mode : str
-            One of tf.estimator.ModeKeys.{TRAIN,EVAL} strings.
-
-        Returns
-        -------
-        object
-            The loss.
-
-        Raises
-        ------
-        NotImplementedError
-            If self.method is not ``vat`` or ``vatent``.
-        """
-        if mode != tf.estimator.ModeKeys.TRAIN:
-            return 0.
-        architecture = partial(architecture, reuse=True)
-        with tf.variable_scope(tf.get_variable_scope(), reuse=True):
-            vat_loss = self.virtual_adversarial_loss(features, logits, architecture, mode)
-            tf.summary.scalar("loss_VAT", vat_loss)
-            tf.add_to_collection(tf.GraphKeys.LOSSES, vat_loss)
-            if self.method == 'vat':
-                loss = vat_loss
-            elif self.method == 'vatent':
-                ent_loss = entropy_y_x(logits)
-                tf.summary.scalar("loss_entropy", ent_loss)
-                tf.add_to_collection(tf.GraphKeys.LOSSES, ent_loss)
-                loss = vat_loss + ent_loss
-            else:
-                raise ValueError
-            return loss
-
-    def virtual_adversarial_loss(self, features, logits, architecture, mode, name="vat_loss_op"):
-        r_vadv = self.generate_virtual_adversarial_perturbation(features, logits, architecture, mode)
-        logit_p = tf.stop_gradient(logits)
-        adversarial_input = features + r_vadv
-        tf.summary.image("Adversarial_Image", adversarial_input)
-        logit_m = architecture(adversarial_input, mode=mode)[0]
-        loss = kl_divergence_with_logit(logit_p, logit_m)
-        return tf.identity(loss, name=name)
-
-    def generate_virtual_adversarial_perturbation(self, features, logits, architecture, mode):
-        d = tf.random_normal(shape=tf.shape(features))
-
-        for _ in range(self.num_power_iterations):
-            d = self.xi * get_normalized_vector(d)
-            logit_p = logits
-            logit_m = architecture(features + d, mode=mode)[0]
-            dist = kl_divergence_with_logit(logit_p, logit_m)
-            grad = tf.gradients(dist, [d], aggregation_method=2)[0]
-            d = tf.stop_gradient(grad)
-
-        return self.epsilon * get_normalized_vector(d)
diff --git a/bob/learn/tensorflow/style_transfer/__init__.py b/bob/learn/tensorflow/losses/__init__.py
similarity index 65%
rename from bob/learn/tensorflow/style_transfer/__init__.py
rename to bob/learn/tensorflow/losses/__init__.py
index 11f0c6a989d75fcae4804500619f8d980349e54d..65cdcab2e9e6b5ba4090b269eb5fc68e5544797c 100644
--- a/bob/learn/tensorflow/style_transfer/__init__.py
+++ b/bob/learn/tensorflow/losses/__init__.py
@@ -1,4 +1,4 @@
-from .neural_transfer import compute_features, compute_gram, do_style_transfer
+from .center_loss import CenterLoss, CenterLossLayer
 
 # gets sphinx autodoc done right - don't remove it
 def __appropriate__(*args):
@@ -6,7 +6,7 @@ def __appropriate__(*args):
 
     Parameters:
 
-            *args: An iterable of objects to modify
+      *args: An iterable of objects to modify
 
     Resolves `Sphinx referencing issues
     <https://github.com/sphinx-doc/sphinx/issues/3048>`
@@ -17,6 +17,7 @@ def __appropriate__(*args):
 
 
 __appropriate__(
+    CenterLoss,
+    CenterLossLayer
 )
-
-__all__ = [_ for _ in dir() if not _.startswith('_')]
+__all__ = [_ for _ in dir() if not _.startswith("_")]
diff --git a/bob/learn/tensorflow/losses/center_loss.py b/bob/learn/tensorflow/losses/center_loss.py
new file mode 100644
index 0000000000000000000000000000000000000000..894a461200639042094c6b9e29e1721eee1478cb
--- /dev/null
+++ b/bob/learn/tensorflow/losses/center_loss.py
@@ -0,0 +1,88 @@
+import tensorflow as tf
+
+
+class CenterLossLayer(tf.keras.layers.Layer):
+    """A layer to be added in the model if you want to use CenterLoss
+
+    Attributes
+    ----------
+    centers : tf.Variable
+        The variable that keeps track of centers.
+    n_classes : int
+        Number of classes of the task.
+    n_features : int
+        The size of prelogits.
+    """
+
+    def __init__(self, n_classes, n_features, **kwargs):
+        super().__init__(**kwargs)
+        self.n_classes = n_classes
+        self.n_features = n_features
+        self.centers = tf.Variable(
+            tf.zeros([n_classes, n_features]),
+            name="centers",
+            trainable=False,
+            # in a distributed strategy, we want updates to this variable to be summed.
+            aggregation=tf.VariableAggregation.SUM,
+        )
+
+    def call(self, x):
+        # pass through layer
+        return tf.identity(x)
+
+    def get_config(self):
+        config = super().get_config()
+        config.update({"n_classes": self.n_classes, "n_features": self.n_features})
+        return config
+
+
+class CenterLoss(tf.keras.losses.Loss):
+    """Center loss.
+    Introduced in: A Discriminative Feature Learning Approach for Deep Face Recognition
+    https://ydwen.github.io/papers/WenECCV16.pdf
+
+    .. warning::
+
+        This loss MUST NOT BE CALLED during evaluation as it will update the centers!
+        This loss only works with sparse labels.
+        This loss must be used with CenterLossLayer embedded into the model
+
+    Attributes
+    ----------
+    alpha : float
+        The moving average coefficient for updating centers in each batch.
+    centers : tf.Variable
+        The variable that keeps track of centers.
+    centers_layer
+        The layer that keeps track of centers.
+    """
+
+    def __init__(
+        self,
+        centers_layer,
+        alpha=0.9,
+        update_centers=True,
+        name="center_loss",
+        **kwargs
+    ):
+        super().__init__(name=name, **kwargs)
+        self.centers_layer = centers_layer
+        self.centers = self.centers_layer.centers
+        self.alpha = alpha
+        self.update_centers = update_centers
+
+    def call(self, sparse_labels, prelogits):
+        sparse_labels = tf.reshape(sparse_labels, (-1,))
+        centers_batch = tf.gather(self.centers, sparse_labels)
+        # the reduction of batch dimension will be done by the parent class
+        center_loss = tf.keras.losses.mean_squared_error(prelogits, centers_batch)
+
+        # update centers
+        if self.update_centers:
+            diff = (1 - self.alpha) * (centers_batch - prelogits)
+            updates = tf.scatter_nd(sparse_labels[:, None], diff, self.centers.shape)
+            # using assign_sub will make sure updates are added during distributed
+            # training
+            self.centers.assign_sub(updates)
+
+        return center_loss
diff --git a/bob/learn/tensorflow/metrics/__init__.py b/bob/learn/tensorflow/metrics/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..72ee7b5fce68569d2359176ed78f076030030d47
--- /dev/null
+++ b/bob/learn/tensorflow/metrics/__init__.py
@@ -0,0 +1,20 @@
+from .embedding_accuracy import EmbeddingAccuracy, predict_using_tensors
+
+# gets sphinx autodoc done right - don't remove it
+def __appropriate__(*args):
+    """Says object was actually declared here, an not on the import module.
+
+    Parameters:
+
+      *args: An iterable of objects to modify
+
+    Resolves `Sphinx referencing issues
+    <https://github.com/sphinx-doc/sphinx/issues/3048>`
+    """
+
+    for obj in args:
+        obj.__module__ = __name__
+
+
+__appropriate__(EmbeddingAccuracy)
+__all__ = [_ for _ in dir() if not _.startswith("_")]
diff --git a/bob/learn/tensorflow/metrics/embedding_accuracy.py b/bob/learn/tensorflow/metrics/embedding_accuracy.py
new file mode 100644
index 0000000000000000000000000000000000000000..20ac7294a1000ed09b688d08bbd4be9f1eb35b60
--- /dev/null
+++ b/bob/learn/tensorflow/metrics/embedding_accuracy.py
@@ -0,0 +1,39 @@
+import numpy as np
+import tensorflow as tf
+import tensorflow.keras.backend as K
+from tensorflow.python.keras.metrics import MeanMetricWrapper
+
+from ..utils import pdist
+
+
+def predict_using_tensors(embedding, labels):
+    """
+    Compute the predictions through exhaustive comparisons between
+    embeddings using tensors
+    """
+
+    # Fitting the main diagonal with infs (removing comparisons with the same
+    # sample)
+    inf = tf.cast(tf.ones_like(labels), tf.float32) * np.inf
+
+    distances = pdist(embedding)
+    distances = tf.linalg.set_diag(distances, inf)
+    indexes = tf.argmin(input=distances, axis=1)
+    return tf.gather(labels, indexes)
+
+
+def accuracy_from_embeddings(labels, prelogits):
+    labels = tf.reshape(labels, (-1,))
+    embeddings = tf.nn.l2_normalize(prelogits, 1)
+    predictions = predict_using_tensors(embeddings, labels)
+    return tf.cast(tf.math.equal(labels, predictions), K.floatx())
+
+
+class EmbeddingAccuracy(MeanMetricWrapper):
+    """Calculates accuracy from labels and prelogits.
+    This class relies on the fact that, in each batch, at least two images are
+    available from each class(identity).
+    """
+
+    def __init__(self, name="embedding_accuracy", dtype=None):
+        super().__init__(accuracy_from_embeddings, name, dtype=dtype)
diff --git a/bob/learn/tensorflow/models/__init__.py b/bob/learn/tensorflow/models/__init__.py
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..48804ec2338b14d47fe6e4e5c332b561d56daed9 100644
--- a/bob/learn/tensorflow/models/__init__.py
+++ b/bob/learn/tensorflow/models/__init__.py
@@ -0,0 +1,26 @@
+from .alexnet import AlexNet_simplified
+from .densenet import DenseNet
+from .mine import MineModel
+
+# gets sphinx autodoc done right - don't remove it
+def __appropriate__(*args):
+    """Says object was actually declared here, an not on the import module.
+
+    Parameters:
+
+      *args: An iterable of objects to modify
+
+    Resolves `Sphinx referencing issues
+    <https://github.com/sphinx-doc/sphinx/issues/3048>`
+    """
+
+    for obj in args:
+        obj.__module__ = __name__
+
+
+__appropriate__(
+    AlexNet_simplified,
+    DenseNet,
+    MineModel
+)
+__all__ = [_ for _ in dir() if not _.startswith("_")]
diff --git a/bob/learn/tensorflow/models/alexnet.py b/bob/learn/tensorflow/models/alexnet.py
index 202348034fdbbd299b1fb2f64c09e439e822d682..1ff0907775faa81c6ce5777f0065bf1ad49b3e42 100644
--- a/bob/learn/tensorflow/models/alexnet.py
+++ b/bob/learn/tensorflow/models/alexnet.py
@@ -9,20 +9,50 @@ def AlexNet_simplified(name="AlexNet", **kwargs):
     model = tf.keras.Sequential(
         [
             tf.keras.Input(shape=(227, 227, 3)),
-            tf.keras.layers.Conv2D(filters=96, kernel_size=11, strides=4, name="C1", activation="relu"),
+            tf.keras.layers.Conv2D(
+                filters=96, kernel_size=11, strides=4, name="C1", activation="relu"
+            ),
             tf.keras.layers.MaxPool2D(pool_size=3, strides=2, name="P1"),
-            tf.keras.layers.Conv2D(filters=256, kernel_size=5, strides=1, name="C2", activation="relu", padding="same"),
+            tf.keras.layers.Conv2D(
+                filters=256,
+                kernel_size=5,
+                strides=1,
+                name="C2",
+                activation="relu",
+                padding="same",
+            ),
             tf.keras.layers.MaxPool2D(pool_size=3, strides=2, name="P2"),
-            tf.keras.layers.Conv2D(filters=384, kernel_size=3, strides=1, name="C3", activation="relu", padding="same"),
-            tf.keras.layers.Conv2D(filters=384, kernel_size=3, strides=1, name="C4", activation="relu", padding="same"),
-            tf.keras.layers.Conv2D(filters=256, kernel_size=3, strides=1, name="C5", activation="relu", padding="same"),
+            tf.keras.layers.Conv2D(
+                filters=384,
+                kernel_size=3,
+                strides=1,
+                name="C3",
+                activation="relu",
+                padding="same",
+            ),
+            tf.keras.layers.Conv2D(
+                filters=384,
+                kernel_size=3,
+                strides=1,
+                name="C4",
+                activation="relu",
+                padding="same",
+            ),
+            tf.keras.layers.Conv2D(
+                filters=256,
+                kernel_size=3,
+                strides=1,
+                name="C5",
+                activation="relu",
+                padding="same",
+            ),
             tf.keras.layers.MaxPool2D(pool_size=3, strides=2, name="P5"),
             tf.keras.layers.Flatten(name="FLATTEN"),
             tf.keras.layers.Dropout(rate=0.5, name="D6"),
             tf.keras.layers.Dense(units=4096, activation="relu", name="F6"),
             tf.keras.layers.Dropout(rate=0.5, name="D7"),
             tf.keras.layers.Dense(units=4096, activation="relu", name="F7"),
-            tf.keras.layers.Dense(units=1000, activation="softmax", name="OUTPUT"),
+            tf.keras.layers.Dense(units=1000, name="OUTPUT"),
         ],
         name=name,
         **kwargs
@@ -32,6 +62,7 @@ def AlexNet_simplified(name="AlexNet", **kwargs):
 
 if __name__ == "__main__":
     import pkg_resources
+
     from bob.learn.tensorflow.utils import model_summary
 
     model = AlexNet_simplified()
diff --git a/bob/learn/tensorflow/models/autoencoder_face.py b/bob/learn/tensorflow/models/autoencoder_face.py
index 7be56d7b056afc423e452963a4bb5278257f4a18..bb92f92c6eb0658e31e6881e38a5794fc9a5a581 100644
--- a/bob/learn/tensorflow/models/autoencoder_face.py
+++ b/bob/learn/tensorflow/models/autoencoder_face.py
@@ -4,6 +4,7 @@ Mohammadi, Amir and Bhattacharjee, Sushil and Marcel, Sebastien, ICASSP 2020
 """
 
 import tensorflow as tf
+
 from bob.learn.tensorflow.models.densenet import densenet161
 
 
@@ -114,6 +115,7 @@ def autoencoder_face(z_dim=256, weight_decay=1e-10, decoder_last_act="tanh"):
 if __name__ == "__main__":
     import pkg_resources
     from tabulate import tabulate
+
     from bob.learn.tensorflow.utils import model_summary
 
     model = ConvDecoder(z_dim=256, weight_decay=1e-9, last_act="tanh", name="Decoder")
diff --git a/bob/learn/tensorflow/models/densenet.py b/bob/learn/tensorflow/models/densenet.py
index 7dfbc8aea0941ab86eb1fc3780d862578d3adf82..f17f5b860b2d3b92f06cd2b8586d5b590e0378e3 100644
--- a/bob/learn/tensorflow/models/densenet.py
+++ b/bob/learn/tensorflow/models/densenet.py
@@ -3,6 +3,7 @@ Reference [Densely Connected Convolutional Networks](https://arxiv.org/abs/1608.
 """
 
 import tensorflow as tf
+
 from bob.extension import rc
 
 l2 = tf.keras.regularizers.l2
@@ -433,21 +434,21 @@ class DeepPixBiS(tf.keras.Model):
             tf.keras.layers.Flatten(
                 data_format=data_format, name="Pixel_Logits_Flatten"
             ),
-            tf.keras.layers.Activation("sigmoid", name="activation"),
         ]
 
     def call(self, x, training=None):
-        for l in self.sequential_layers:
+        for layer in self.sequential_layers:
             try:
-                x = l(x, training=training)
+                x = layer(x, training=training)
             except TypeError:
-                x = l(x)
+                x = layer(x)
         return x
 
 
 if __name__ == "__main__":
     import pkg_resources
     from tabulate import tabulate
+
     from bob.learn.tensorflow.utils import model_summary
 
     def print_model(inputs, outputs):
diff --git a/bob/learn/tensorflow/models/inception.py b/bob/learn/tensorflow/models/inception.py
index 3e25a59fa4ae9c6bb9d823ea7d6c20c311f37ca4..5a8314aa9ce82c1ddadcab47be0a16a16b5a0739 100644
--- a/bob/learn/tensorflow/models/inception.py
+++ b/bob/learn/tensorflow/models/inception.py
@@ -2,8 +2,7 @@ import tensorflow as tf
 
 
 class LRN(tf.keras.layers.Lambda):
-    """local response normalization with default parameters for GoogLeNet
-    """
+    """local response normalization with default parameters for GoogLeNet"""
 
     def __init__(self, alpha=0.0001, beta=0.75, depth_radius=5, **kwargs):
         self.alpha = alpha
@@ -21,8 +20,8 @@ class LRN(tf.keras.layers.Lambda):
 class InceptionModule(tf.keras.Model):
     """The inception module as it was introduced in:
 
-        C. Szegedy et al., “Going deeper with convolutions,” in Proceedings of the IEEE
-        Conference on Computer Vision and Pattern Recognition, 2015, pp. 1–9.
+    C. Szegedy et al., “Going deeper with convolutions,” in Proceedings of the IEEE
+    Conference on Computer Vision and Pattern Recognition, 2015, pp. 1–9.
     """
 
     def __init__(
@@ -112,7 +111,9 @@ def GoogLeNet(*, num_classes=1000, name="GoogLeNet", **kwargs):
             ),
             tf.keras.layers.MaxPool2D(3, 2, padding="same", name="pool1/3x3_s2"),
             LRN(name="pool1/norm1"),
-            tf.keras.layers.Conv2D(64, 1, padding="same", activation="relu", name="conv2/3x3_reduce"),
+            tf.keras.layers.Conv2D(
+                64, 1, padding="same", activation="relu", name="conv2/3x3_reduce"
+            ),
             tf.keras.layers.Conv2D(
                 192, 3, padding="same", activation="relu", name="conv2/3x3"
             ),
@@ -131,7 +132,7 @@ def GoogLeNet(*, num_classes=1000, name="GoogLeNet", **kwargs):
             InceptionModule(384, 192, 384, 48, 128, 128, name="inception_5b"),
             tf.keras.layers.GlobalAvgPool2D(name="pool5"),
             tf.keras.layers.Dropout(rate=0.4, name="dropout"),
-            tf.keras.layers.Dense(num_classes, name="output", activation="softmax"),
+            tf.keras.layers.Dense(num_classes, name="output"),
         ],
         name=name,
         **kwargs
@@ -143,6 +144,7 @@ def GoogLeNet(*, num_classes=1000, name="GoogLeNet", **kwargs):
 if __name__ == "__main__":
     import pkg_resources
     from tabulate import tabulate
+
     from bob.learn.tensorflow.utils import model_summary
 
     inputs = tf.keras.Input((28, 28, 192), name="input")
diff --git a/bob/learn/tensorflow/models/inception_resnet_v2.py b/bob/learn/tensorflow/models/inception_resnet_v2.py
index 79b1a66d24f59a3788742fe843e08466e3a67bbd..ad5b8629f52b27496aa5c92aeaa2e3793965042a 100644
--- a/bob/learn/tensorflow/models/inception_resnet_v2.py
+++ b/bob/learn/tensorflow/models/inception_resnet_v2.py
@@ -1,29 +1,39 @@
 # -*- coding: utf-8 -*-
 """Inception-ResNet-V2 MultiScale-Inception-ResNet-V2 models for Keras.
 """
-from tensorflow.keras.models import Model
-from tensorflow.keras.layers import (
-    Activation,
-    BatchNormalization,
-    Concatenate,
-    Conv2D,
-    Dense,
-    Dropout,
-    Input,
-    Lambda,
-    MaxPool2D,
-    AvgPool2D,
-    GlobalAvgPool2D,
-    GlobalMaxPool2D,
-)
-from tensorflow.keras import backend as K
-import tensorflow as tf
 import logging
 
+import tensorflow as tf
+from tensorflow.keras import backend as K
+from tensorflow.keras.layers import Activation
+from tensorflow.keras.layers import AvgPool2D
+from tensorflow.keras.layers import BatchNormalization
+from tensorflow.keras.layers import Concatenate
+from tensorflow.keras.layers import Conv2D
+from tensorflow.keras.layers import Dense
+from tensorflow.keras.layers import Dropout
+from tensorflow.keras.layers import GlobalAvgPool2D
+from tensorflow.keras.layers import GlobalMaxPool2D
+from tensorflow.keras.layers import Input
+from tensorflow.keras.layers import MaxPool2D
+from tensorflow.keras.models import Model
+from tensorflow.keras.models import Sequential
+
+from ..utils import SequentialLayer
+
 logger = logging.getLogger(__name__)
 
 
-class Conv2D_BN(tf.keras.Sequential):
+def Conv2D_BN(
+    filters,
+    kernel_size,
+    strides=1,
+    padding="same",
+    activation="relu",
+    use_bias=False,
+    name=None,
+    **kwargs,
+):
     """Utility class to apply conv + BN.
 
     # Arguments
@@ -54,50 +64,30 @@ class Conv2D_BN(tf.keras.Sequential):
         and `name + '/BatchNorm'` for the batch norm layer.
     """
 
-    def __init__(
-        self,
-        filters,
-        kernel_size,
-        strides=1,
-        padding="same",
-        activation="relu",
-        use_bias=False,
-        name=None,
-        **kwargs,
-    ):
-
-        self.filters = filters
-        self.kernel_size = kernel_size
-        self.strides = strides
-        self.padding = padding
-        self.activation = activation
-        self.use_bias = use_bias
-
-        layers = [
-            Conv2D(
-                filters,
-                kernel_size,
-                strides=strides,
-                padding=padding,
-                use_bias=use_bias,
-                name=name,
-            )
-        ]
+    layers = [
+        Conv2D(
+            filters,
+            kernel_size,
+            strides=strides,
+            padding=padding,
+            use_bias=use_bias,
+            name="Conv2D",
+        )
+    ]
 
-        if not use_bias:
-            bn_axis = 1 if K.image_data_format() == "channels_first" else 3
-            bn_name = None if name is None else name + "/BatchNorm"
-            layers += [BatchNormalization(axis=bn_axis, scale=False, name=bn_name)]
+    if not use_bias:
+        bn_axis = 1 if K.image_data_format() == "channels_first" else 3
+        layers += [BatchNormalization(axis=bn_axis, scale=False, name="BatchNorm")]
 
-        if activation is not None:
-            ac_name = None if name is None else name + "/Act"
-            layers += [Activation(activation, name=ac_name)]
+    if activation is not None:
+        layers += [Activation(activation, name="Act")]
 
-        super().__init__(layers, name=name, **kwargs)
+    return SequentialLayer(layers, name=name, **kwargs)
 
 
-class ScaledResidual(tf.keras.Model):
+class ScaledResidual(tf.keras.layers.Layer):
     """A scaled residual connection layer"""
+
     def __init__(self, scale, name="scaled_residual", **kwargs):
         super().__init__(name=name, **kwargs)
         self.scale = scale
@@ -105,8 +95,13 @@ class ScaledResidual(tf.keras.Model):
     def call(self, inputs, training=None):
         return inputs[0] + inputs[1] * self.scale
 
+    def get_config(self):
+        config = super().get_config()
+        config.update({"scale": self.scale, "name": self.name})
+        return config
 
-class InceptionResnetBlock(tf.keras.Model):
+
+class InceptionResnetBlock(tf.keras.layers.Layer):
     """An Inception-ResNet block.
 
     This class builds 3 types of Inception-ResNet blocks mentioned
@@ -164,32 +159,24 @@ class InceptionResnetBlock(tf.keras.Model):
         self.n = n
 
         if block_type == "block35":
-            branch_0 = [Conv2D_BN(32 // n, 1, name="branch0_conv1")]
-            branch_1 = [Conv2D_BN(32 // n, 1, name="branch1_conv1")]
-            branch_1 += [Conv2D_BN(32 // n, 3, name="branch1_conv2")]
-            branch_2 = [Conv2D_BN(32 // n, 1, name="branch2_conv1")]
-            branch_2 += [Conv2D_BN(48 // n, 3, name="branch2_conv2")]
-            branch_2 += [Conv2D_BN(64 // n, 3, name="branch2_conv3")]
+            branch_0 = [Conv2D_BN(32 // n, 1, name="Branch_0/Conv2d_1x1")]
+            branch_1 = [Conv2D_BN(32 // n, 1, name="Branch_1/Conv2d_0a_1x1")]
+            branch_1 += [Conv2D_BN(32 // n, 3, name="Branch_1/Conv2d_0b_3x3")]
+            branch_2 = [Conv2D_BN(32 // n, 1, name="Branch_2/Conv2d_0a_1x1")]
+            branch_2 += [Conv2D_BN(48 // n, 3, name="Branch_2/Conv2d_0b_3x3")]
+            branch_2 += [Conv2D_BN(64 // n, 3, name="Branch_2/Conv2d_0c_3x3")]
             branches = [branch_0, branch_1, branch_2]
         elif block_type == "block17":
-            branch_0 = [Conv2D_BN(192 // n, 1, name="branch0_conv1")]
-            branch_1 = [Conv2D_BN(128 // n, 1, name="branch1_conv1")]
-            branch_1 += [
-                Conv2D_BN(160 // n, (1, 7), name="branch1_conv2")
-            ]
-            branch_1 += [
-                Conv2D_BN(192 // n, (7, 1), name="branch1_conv3")
-            ]
+            branch_0 = [Conv2D_BN(192 // n, 1, name="Branch_0/Conv2d_1x1")]
+            branch_1 = [Conv2D_BN(128 // n, 1, name="Branch_1/Conv2d_0a_1x1")]
+            branch_1 += [Conv2D_BN(160 // n, (1, 7), name="Branch_1/Conv2d_0b_1x7")]
+            branch_1 += [Conv2D_BN(192 // n, (7, 1), name="Branch_1/Conv2d_0c_7x1")]
             branches = [branch_0, branch_1]
         elif block_type == "block8":
-            branch_0 = [Conv2D_BN(192 // n, 1, name="branch0_conv1")]
-            branch_1 = [Conv2D_BN(192 // n, 1, name="branch1_conv1")]
-            branch_1 += [
-                Conv2D_BN(224 // n, (1, 3), name="branch1_conv2")
-            ]
-            branch_1 += [
-                Conv2D_BN(256 // n, (3, 1), name="branch1_conv3")
-            ]
+            branch_0 = [Conv2D_BN(192 // n, 1, name="Branch_0/Conv2d_1x1")]
+            branch_1 = [Conv2D_BN(192 // n, 1, name="Branch_1/Conv2d_0a_1x1")]
+            branch_1 += [Conv2D_BN(224 // n, (1, 3), name="Branch_1/Conv2d_0b_1x3")]
+            branch_1 += [Conv2D_BN(256 // n, (3, 1), name="Branch_1/Conv2d_0c_3x1")]
             branches = [branch_0, branch_1]
         else:
             raise ValueError(
@@ -203,18 +190,9 @@ class InceptionResnetBlock(tf.keras.Model):
         channel_axis = 1 if K.image_data_format() == "channels_first" else 3
         self.concat = Concatenate(axis=channel_axis, name="concatenate")
         self.up_conv = Conv2D_BN(
-            n_channels, 1, activation=None, use_bias=True, name="up_conv"
+            n_channels, 1, activation=None, use_bias=True, name="Conv2d_1x1"
         )
 
-        # output_shape = (None, None, n_channels)
-        # if K.image_data_format() == "channels_first":
-        #     output_shape = (n_channels, None, None)
-        # self.residual = Lambda(
-        #     lambda inputs, scale: inputs[0] + inputs[1] * scale,
-        #     output_shape=output_shape,
-        #     arguments={"scale": scale},
-        #     name="residual_scale",
-        # )
         self.residual = ScaledResidual(scale)
         self.act = lambda x: x
         if activation is not None:
@@ -236,8 +214,26 @@ class InceptionResnetBlock(tf.keras.Model):
 
         return x
 
+    def get_config(self):
+        config = super().get_config()
+        config.update(
+            {
+                name: getattr(self, name)
+                for name in [
+                    "n_channels",
+                    "scale",
+                    "block_type",
+                    "block_idx",
+                    "activation",
+                    "n",
+                    "name",
+                ]
+            }
+        )
+        return config
+
 
-class ReductionA(tf.keras.Model):
+class ReductionA(tf.keras.layers.Layer):
     """A Reduction A block for InceptionResnetV2"""
 
     def __init__(
@@ -265,19 +261,19 @@ class ReductionA(tf.keras.Model):
                 3,
                 strides=1 if use_atrous else 2,
                 padding=padding,
-                name="branch1_conv1",
+                name="Branch_0/Conv2d_1a_3x3",
             )
         ]
 
         branch_2 = [
-            Conv2D_BN(k, 1, name="branch2_conv1"),
-            Conv2D_BN(kl, 3, name="branch2_conv2"),
+            Conv2D_BN(k, 1, name="Branch_1/Conv2d_0a_1x1"),
+            Conv2D_BN(kl, 3, name="Branch_1/Conv2d_0b_3x3"),
             Conv2D_BN(
                 km,
                 3,
                 strides=1 if use_atrous else 2,
                 padding=padding,
-                name="branch2_conv3",
+                name="Branch_1/Conv2d_1a_3x3",
             ),
         ]
 
@@ -286,7 +282,7 @@ class ReductionA(tf.keras.Model):
                 3,
                 strides=1 if use_atrous else 2,
                 padding=padding,
-                name="branch3_pool1",
+                name="Branch_2/MaxPool_1a_3x3",
             )
         ]
         self.branches = [branch_1, branch_2, branch_pool]
@@ -306,8 +302,18 @@ class ReductionA(tf.keras.Model):
 
         return self.concat(branch_outputs)
 
+    def get_config(self):
+        config = super().get_config()
+        config.update(
+            {
+                name: getattr(self, name)
+                for name in ["padding", "k", "kl", "km", "n", "use_atrous", "name"]
+            }
+        )
+        return config
+
 
-class ReductionB(tf.keras.Model):
+class ReductionB(tf.keras.layers.Layer):
     """A Reduction B block for InceptionResnetV2"""
 
     def __init__(
@@ -334,31 +340,23 @@ class ReductionB(tf.keras.Model):
         self.pq = pq
 
         branch_1 = [
-            Conv2D_BN(n, 1, name="branch1_conv1"),
-            Conv2D_BN(
-                no, 3, strides=2, padding=padding, name="branch1_conv2"
-            ),
+            Conv2D_BN(n, 1, name="Branch_0/Conv2d_0a_1x1"),
+            Conv2D_BN(no, 3, strides=2, padding=padding, name="Branch_0/Conv2d_1a_3x3"),
         ]
 
         branch_2 = [
-            Conv2D_BN(p, 1, name="branch2_conv1"),
-            Conv2D_BN(
-                pq, 3, strides=2, padding=padding, name="branch2_conv2"
-            ),
+            Conv2D_BN(p, 1, name="Branch_1/Conv2d_0a_1x1"),
+            Conv2D_BN(pq, 3, strides=2, padding=padding, name="Branch_1/Conv2d_1a_3x3"),
         ]
 
         branch_3 = [
-            Conv2D_BN(k, 1, name="branch3_conv1"),
-            Conv2D_BN(kl, 3, name="branch3_conv2"),
-            Conv2D_BN(
-                km, 3, strides=2, padding=padding, name="branch3_conv3"
-            ),
+            Conv2D_BN(k, 1, name="Branch_2/Conv2d_0a_1x1"),
+            Conv2D_BN(kl, 3, name="Branch_2/Conv2d_0b_3x3"),
+            Conv2D_BN(km, 3, strides=2, padding=padding, name="Branch_2/Conv2d_1a_3x3"),
         ]
 
         branch_pool = [
-            MaxPool2D(
-                3, strides=2, padding=padding, name=f"branch4_pool1"
-            )
+            MaxPool2D(3, strides=2, padding=padding, name="Branch_3/MaxPool_1a_3x3")
         ]
         self.branches = [branch_1, branch_2, branch_3, branch_pool]
         channel_axis = 1 if K.image_data_format() == "channels_first" else 3
@@ -377,23 +375,49 @@ class ReductionB(tf.keras.Model):
 
         return self.concat(branch_outputs)
 
+    def get_config(self):
+        config = super().get_config()
+        config.update(
+            {
+                name: getattr(self, name)
+                for name in ["padding", "k", "kl", "km", "n", "no", "p", "pq", "name"]
+            }
+        )
+        return config
+
 
-class InceptionA(tf.keras.Model):
+class InceptionA(tf.keras.layers.Layer):
     def __init__(self, pool_filters, name="inception_a", **kwargs):
         super().__init__(name=name, **kwargs)
         self.pool_filters = pool_filters
 
-        self.branch1x1 = Conv2D_BN(96, kernel_size=1, padding="same", name="branch1_conv1")
+        self.branch1x1 = Conv2D_BN(
+            96, kernel_size=1, padding="same", name="Branch_0/Conv2d_1x1"
+        )
 
-        self.branch3x3dbl_1 = Conv2D_BN(64, kernel_size=1, padding="same", name="branch2_conv1")
-        self.branch3x3dbl_2 = Conv2D_BN(96, kernel_size=3, padding="same", name="branch2_conv2")
-        self.branch3x3dbl_3 = Conv2D_BN(96, kernel_size=3, padding="same", name="branch2_conv3")
+        self.branch3x3dbl_1 = Conv2D_BN(
+            64, kernel_size=1, padding="same", name="Branch_2/Conv2d_0a_1x1"
+        )
+        self.branch3x3dbl_2 = Conv2D_BN(
+            96, kernel_size=3, padding="same", name="Branch_2/Conv2d_0b_3x3"
+        )
+        self.branch3x3dbl_3 = Conv2D_BN(
+            96, kernel_size=3, padding="same", name="Branch_2/Conv2d_0c_3x3"
+        )
 
-        self.branch5x5_1 = Conv2D_BN(48, kernel_size=1, padding="same", name="branch3_conv1")
-        self.branch5x5_2 = Conv2D_BN(64, kernel_size=5, padding="same", name="branch3_conv2")
+        self.branch5x5_1 = Conv2D_BN(
+            48, kernel_size=1, padding="same", name="Branch_1/Conv2d_0a_1x1"
+        )
+        self.branch5x5_2 = Conv2D_BN(
+            64, kernel_size=5, padding="same", name="Branch_1/Conv2d_0b_5x5"
+        )
 
-        self.branch_pool_1 = AvgPool2D(pool_size=3, strides=1, padding="same", name="branch4_pool1")
-        self.branch_pool_2 = Conv2D_BN(pool_filters, kernel_size=1, padding="same", name="branch4_conv1")
+        self.branch_pool_1 = AvgPool2D(
+            pool_size=3, strides=1, padding="same", name="Branch_3/AvgPool_0a_3x3"
+        )
+        self.branch_pool_2 = Conv2D_BN(
+            pool_filters, kernel_size=1, padding="same", name="Branch_3/Conv2d_0b_1x1"
+        )
 
         channel_axis = 1 if K.image_data_format() == "channels_first" else 3
         self.concat = Concatenate(axis=channel_axis)
@@ -414,6 +438,11 @@ class InceptionA(tf.keras.Model):
         outputs = [branch1x1, branch5x5, branch3x3dbl, branch_pool]
         return self.concat(outputs)
 
+    def get_config(self):
+        config = super().get_config()
+        config.update({"pool_filters": self.pool_filters, "name": self.name})
+        return config
+
 
 def InceptionResNetV2(
     include_top=True,
@@ -421,18 +450,17 @@ def InceptionResNetV2(
     input_shape=None,
     pooling=None,
     classes=1000,
+    bottleneck=False,
+    dropout_rate=0.2,
+    name="InceptionResnetV2",
     **kwargs,
 ):
     """Instantiates the Inception-ResNet v2 architecture.
-    Optionally loads weights pre-trained on ImageNet.
     Note that the data format convention used by the model is
     the one specified in your Keras config at `~/.keras/keras.json`.
     # Arguments
         include_top: whether to include the fully-connected
             layer at the top of the network.
-        weights: one of `None` (random initialization),
-              'imagenet' (pre-training on ImageNet),
-              or the path to the weights file to be loaded.
         input_tensor: optional Keras tensor (i.e. output of `tf.keras.Input()`)
             to use as image input for the model.
         input_shape: optional shape tuple, only to be specified
@@ -469,84 +497,118 @@ def InceptionResNetV2(
         else:
             img_input = input_tensor
 
-    # Stem block: 35 x 35 x 192
-    x = Conv2D_BN(32, 3, strides=2, padding="valid")(img_input)
-    x = Conv2D_BN(32, 3, padding="valid")(x)
-    x = Conv2D_BN(64, 3)(x)
-    x = MaxPool2D(3, strides=2)(x)
-    x = Conv2D_BN(80, 1, padding="valid")(x)
-    x = Conv2D_BN(192, 3, padding="valid")(x)
-    x = MaxPool2D(3, strides=2)(x)
-
-    # Mixed 5b (Inception-A block): 35 x 35 x 320
-    # branch_0 = Conv2D_BN(96, 1)(x)
-    # branch_1 = Conv2D_BN(48, 1)(x)
-    # branch_1 = Conv2D_BN(64, 5)(branch_1)
-    # branch_2 = Conv2D_BN(64, 1)(x)
-    # branch_2 = Conv2D_BN(96, 3)(branch_2)
-    # branch_2 = Conv2D_BN(96, 3)(branch_2)
-    # branch_pool = AvgPool2D(3, strides=1, padding="same")(x)
-    # branch_pool = Conv2D_BN(64, 1)(branch_pool)
-    # branches = [branch_0, branch_1, branch_2, branch_pool]
-    # channel_axis = 1 if K.image_data_format() == "channels_first" else 3
-    # x = Concatenate(axis=channel_axis, name="mixed_5b")(branches)
-    x = InceptionA(pool_filters=64)(x)
+    layers = [
+        # Stem block: 35 x 35 x 192
+        Conv2D_BN(32, 3, strides=2, padding="valid", name="Conv2d_1a_3x3"),
+        Conv2D_BN(32, 3, padding="valid", name="Conv2d_2a_3x3"),
+        Conv2D_BN(64, 3, name="Conv2d_2b_3x3"),
+        MaxPool2D(3, strides=2, name="MaxPool_3a_3x3"),
+        Conv2D_BN(80, 1, padding="valid", name="Conv2d_3b_1x1"),
+        Conv2D_BN(192, 3, padding="valid", name="Conv2d_4a_3x3"),
+        MaxPool2D(3, strides=2, name="MaxPool_5a_3x3"),
+        # Mixed 5b (Inception-A block): 35 x 35 x 320
+        InceptionA(pool_filters=64, name="Mixed_5b"),
+    ]
 
     # 10x block35 (Inception-ResNet-A block): 35 x 35 x 320
     for block_idx in range(1, 11):
-        x = InceptionResnetBlock(
-            n_channels=320, scale=0.17, block_type="block35", block_idx=block_idx,
-            name=f"block35_{block_idx}",
-        )(x)
+        layers.append(
+            InceptionResnetBlock(
+                n_channels=320,
+                scale=0.17,
+                block_type="block35",
+                block_idx=block_idx,
+                name=f"block35_{block_idx}",
+            )
+        )
 
     # Mixed 6a (Reduction-A block): 17 x 17 x 1088
-    x = ReductionA(padding="valid", n=384, k=256, kl=256, km=384, use_atrous=False)(x)
+    layers.append(
+        ReductionA(
+            padding="valid",
+            n=384,
+            k=256,
+            kl=256,
+            km=384,
+            use_atrous=False,
+            name="Mixed_6a",
+        )
+    )
 
     # 20x block17 (Inception-ResNet-B block): 17 x 17 x 1088
     for block_idx in range(1, 21):
-        x = InceptionResnetBlock(
-            n_channels=1088, scale=0.1, block_type="block17", block_idx=block_idx,
-            name=f"block17_{block_idx}",
-        )(x)
+        layers.append(
+            InceptionResnetBlock(
+                n_channels=1088,
+                scale=0.1,
+                block_type="block17",
+                block_idx=block_idx,
+                name=f"block17_{block_idx}",
+            )
+        )
 
     # Mixed 7a (Reduction-B block): 8 x 8 x 2080
-    x = ReductionB(
-        padding="valid", n=256, no=384, p=256, pq=288, k=256, kl=288, km=320
-    )(x)
+    layers.append(
+        ReductionB(
+            padding="valid",
+            n=256,
+            no=384,
+            p=256,
+            pq=288,
+            k=256,
+            kl=288,
+            km=320,
+            name="Mixed_7a",
+        )
+    )
 
     # 10x block8 (Inception-ResNet-C block): 8 x 8 x 2080
     for block_idx in range(1, 10):
-        x = InceptionResnetBlock(
-            n_channels=2080, scale=0.2, block_type="block8", block_idx=block_idx,
-            name=f"block8_{block_idx}",
-        )(x)
-    x = InceptionResnetBlock(
-        n_channels=2080, scale=1.0, activation=None, block_type="block8", block_idx=10,
-        name=f"block8_{block_idx+1}",
-    )(x)
+        layers.append(
+            InceptionResnetBlock(
+                n_channels=2080,
+                scale=0.2,
+                block_type="block8",
+                block_idx=block_idx,
+                name=f"block8_{block_idx}",
+            )
+        )
+    layers.append(
+        InceptionResnetBlock(
+            n_channels=2080,
+            scale=1.0,
+            activation=None,
+            block_type="block8",
+            block_idx=10,
+            name=f"block8_{block_idx+1}",
+        )
+    )
 
     # Final convolution block: 8 x 8 x 1536
-    x = Conv2D_BN(1536, 1, name="conv_7b")(x)
+    layers.append(Conv2D_BN(1536, 1, name="Conv2d_7b_1x1"))
 
-    if include_top:
-        # Classification block
-        x = GlobalAvgPool2D(name="avg_pool")(x)
-        x = Dense(classes, activation="softmax", name="predictions")(x)
-    else:
-        if pooling == "avg":
-            x = GlobalAvgPool2D()(x)
-        elif pooling == "max":
-            x = GlobalMaxPool2D()(x)
+    if (include_top and pooling is None) or (bottleneck):
+        pooling = "avg"
 
-    # Ensure that the model takes into account
-    # any potential predecessors of `input_tensor`.
-    if input_tensor is not None:
-        inputs = tf.keras.utils.get_source_inputs(input_tensor)
-    else:
-        inputs = img_input
+    if pooling == "avg":
+        layers.append(GlobalAvgPool2D())
+    elif pooling == "max":
+        layers.append(GlobalMaxPool2D())
+
+    if bottleneck:
+        layers.append(Dropout(dropout_rate, name="Dropout"))
+        layers.append(Dense(128, use_bias=False, name="Bottleneck"))
+        layers.append(
+            BatchNormalization(axis=-1, scale=False, name="Bottleneck/BatchNorm")
+        )
+
+    # Classification block
+    if include_top:
+        layers.append(Dense(classes, name="logits"))
 
-    # Create model.
-    model = Model(inputs, x, name="inception_resnet_v2")
+    # Create model and call it on input to create its variables.
+    model = Sequential(layers, name=name, **kwargs)
+    model(img_input)
 
     return model
 
@@ -574,7 +636,7 @@ def MultiScaleInceptionResNetV2(
     padding = "SAME" if align_feature_maps else "VALID"
     name = name or "InceptionResnetV2"
 
-    with tf.name_scope(name, "InceptionResnetV2", [img_input]):
+    with tf.compat.v1.name_scope(name, "InceptionResnetV2", [img_input]):
         # convert colors from RGB to a learned color space and batch norm inputs
         # 224, 224, 4
         net = Conv2D_BN(
@@ -683,8 +745,8 @@ def MultiScaleInceptionResNetV2(
 
 if __name__ == "__main__":
     import pkg_resources
-    from tabulate import tabulate
     from bob.learn.tensorflow.utils import model_summary
+    from tabulate import tabulate
 
     def print_model(inputs, outputs, name=None):
         print("")
diff --git a/bob/learn/tensorflow/models/lenet5.py b/bob/learn/tensorflow/models/lenet5.py
index a868515b897fe2c010cf407b1af44d5fbe352649..425b337725b60b6dc471c572c80df1f83eded74e 100644
--- a/bob/learn/tensorflow/models/lenet5.py
+++ b/bob/learn/tensorflow/models/lenet5.py
@@ -22,7 +22,7 @@ def LeNet5_simplified(name="LeNet5", **kwargs):
             ),
             tf.keras.layers.Flatten(name="FLATTEN"),
             tf.keras.layers.Dense(units=84, activation="tanh", name="F6"),
-            tf.keras.layers.Dense(units=10, activation="sigmoid", name="OUTPUT"),
+            tf.keras.layers.Dense(units=10, name="OUTPUT"),
         ],
         name=name,
         **kwargs
@@ -32,6 +32,7 @@ def LeNet5_simplified(name="LeNet5", **kwargs):
 
 if __name__ == "__main__":
     import pkg_resources
+
     from bob.learn.tensorflow.utils import model_summary
 
     model = LeNet5_simplified()
diff --git a/bob/learn/tensorflow/models/mcae.py b/bob/learn/tensorflow/models/mcae.py
index 07cbb529be878551cfd2efcf2e438f802c52557f..214307820df190b1eb6af376ad7af4a1654d9d2e 100644
--- a/bob/learn/tensorflow/models/mcae.py
+++ b/bob/learn/tensorflow/models/mcae.py
@@ -51,8 +51,8 @@ class ConvEncoder(tf.keras.Model):
         self.sequential_layers = layers
 
     def call(self, x, training=None):
-        for l in self.sequential_layers:
-            x = l(x)
+        for layer in self.sequential_layers:
+            x = layer(x)
         return x
 
 
@@ -67,7 +67,7 @@ class ConvDecoder(tf.keras.Model):
         name="Decoder",
         **kwargs,
     ):
-        super().__init__(name=name, ** kwargs)
+        super().__init__(name=name, **kwargs)
         self.data_format = data_format
         l2_kw = get_l2_kw(weight_decay)
         layers = []
@@ -91,8 +91,8 @@ class ConvDecoder(tf.keras.Model):
         self.sequential_layers = layers
 
     def call(self, x, training=None):
-        for l in self.sequential_layers:
-            x = l(x)
+        for layer in self.sequential_layers:
+            x = layer(x)
         return x
 
 
diff --git a/bob/learn/tensorflow/models/mine.py b/bob/learn/tensorflow/models/mine.py
new file mode 100644
index 0000000000000000000000000000000000000000..4f766236f0cf33f6b1d73ffd14a2eafec636b1ad
--- /dev/null
+++ b/bob/learn/tensorflow/models/mine.py
@@ -0,0 +1,68 @@
+"""
+Implements the MINE loss from the paper:
+
+Mutual Information Neural Estimation (https://arxiv.org/pdf/1801.04062.pdf)
+
+"""
+
+import tensorflow as tf
+
+class MineModel(tf.keras.Model):
+    """
+
+    Parameters
+    **********
+
+      is_mine_f: bool
+         If true, will implement MINE-F (equation 6), otherwise will implement equation 5
+    """
+
+    def __init__(self, is_mine_f=False, name="MINE", units=10, **kwargs):
+        super().__init__(name=name, **kwargs)
+        self.units = units
+        self.is_mine_f = is_mine_f
+        
+        self.transformer_x = tf.keras.layers.Dense(self.units)
+        self.transformer_z = tf.keras.layers.Dense(self.units)
+        self.transformer_xz = tf.keras.layers.Dense(self.units)
+        self.transformer_output = tf.keras.layers.Dense(1)
+
+    def call(self, inputs):
+        def compute(x, z):
+            h1_x = self.transformer_x(x)
+            h1_z = self.transformer_z(z)
+            h1 = tf.keras.layers.ReLU()(h1_x + h1_z)
+            h2 = self.transformer_output(tf.keras.layers.ReLU()(self.transformer_xz(h1)))
+
+            return h2
+
+        def compute_lower_bound(x, z):
+            t_xz = compute(x,z)
+            z_shuffle = tf.random.shuffle(z)
+            t_x_z = compute(x, z_shuffle)
+
+            if self.is_mine_f:
+                lb = -(
+                    tf.reduce_mean(t_xz, axis=0)
+                    - tf.reduce_mean(tf.math.exp(t_x_z-1))
+                )
+            else:
+                lb = -(
+                    tf.reduce_mean(t_xz, axis=0)
+                    - tf.math.log(tf.reduce_mean(tf.math.exp(t_x_z)))
+                )
+
+            self.add_loss(lb)
+            return -lb
+
+        x = inputs[0]
+        z = inputs[1]
+
+        return compute_lower_bound(x, z)
+
+
+    def get_config(self):
+        config = super().get_config()
+        config.update({"units": self.units})
+        return config
+
diff --git a/bob/learn/tensorflow/models/mlp.py b/bob/learn/tensorflow/models/mlp.py
index 3804c4e3222b2b0616e59f243bf8498f4f8f151f..71076818853e976063e64be108a3f27e3bd1a3d8 100644
--- a/bob/learn/tensorflow/models/mlp.py
+++ b/bob/learn/tensorflow/models/mlp.py
@@ -22,7 +22,9 @@ class MLP(tf.keras.Model):
         for i, n in enumerate(hidden_layers, start=1):
             sequential_layers.extend(
                 [
-                    tf.keras.layers.Dense(n, use_bias=False, name=f"dense_{i}", **dense_kw),
+                    tf.keras.layers.Dense(
+                        n, use_bias=False, name=f"dense_{i}", **dense_kw
+                    ),
                     tf.keras.layers.BatchNormalization(scale=False, name=f"bn_{i}"),
                     tf.keras.layers.Activation("relu", name=f"relu_{i}"),
                 ]
@@ -77,7 +79,9 @@ class MLPDropout(tf.keras.Model):
         for i, n in enumerate(hidden_layers, start=1):
             sequential_layers.extend(
                 [
-                    tf.keras.layers.Dense(n, use_bias=False, name=f"dense_{i}", **dense_kw),
+                    tf.keras.layers.Dense(
+                        n, use_bias=False, name=f"dense_{i}", **dense_kw
+                    ),
                     tf.keras.layers.Activation("relu", name=f"relu_{i}"),
                     tf.keras.layers.Dropout(rate=drop_rate, name=f"drop_{i}"),
                 ]
diff --git a/bob/learn/tensorflow/models/msu_patch.py b/bob/learn/tensorflow/models/msu_patch.py
index af1ad1da9a287d3cd55b2e616385a0145acc50d5..21ad7bf98ca0adc7bf2efab14177ff82b2c3531f 100644
--- a/bob/learn/tensorflow/models/msu_patch.py
+++ b/bob/learn/tensorflow/models/msu_patch.py
@@ -13,24 +13,39 @@ def MSUPatch(name="MSUPatch", **kwargs):
     return tf.keras.Sequential(
         [
             tf.keras.layers.Conv2D(
-                50, (5, 5), padding="same", use_bias=False, name="Conv-1", input_shape=(96, 96, 3)
+                50,
+                (5, 5),
+                padding="same",
+                use_bias=False,
+                name="Conv-1",
+                input_shape=(96, 96, 3),
             ),
             tf.keras.layers.BatchNormalization(scale=False, name="BN-1"),
             tf.keras.layers.Activation("relu", name="ReLU-1"),
             tf.keras.layers.MaxPool2D(padding="same", name="MaxPool-1"),
-            tf.keras.layers.Conv2D(100, (3, 3), padding="same", use_bias=False, name="Conv-2"),
+            tf.keras.layers.Conv2D(
+                100, (3, 3), padding="same", use_bias=False, name="Conv-2"
+            ),
             tf.keras.layers.BatchNormalization(scale=False, name="BN-2"),
             tf.keras.layers.Activation("relu", name="ReLU-2"),
             tf.keras.layers.MaxPool2D(padding="same", name="MaxPool-2"),
-            tf.keras.layers.Conv2D(150, (3, 3), padding="same", use_bias=False, name="Conv-3"),
+            tf.keras.layers.Conv2D(
+                150, (3, 3), padding="same", use_bias=False, name="Conv-3"
+            ),
             tf.keras.layers.BatchNormalization(scale=False, name="BN-3"),
             tf.keras.layers.Activation("relu", name="ReLU-3"),
-            tf.keras.layers.MaxPool2D(pool_size=3, strides=2, padding="same", name="MaxPool-3"),
-            tf.keras.layers.Conv2D(200, (3, 3), padding="same", use_bias=False, name="Conv-4"),
+            tf.keras.layers.MaxPool2D(
+                pool_size=3, strides=2, padding="same", name="MaxPool-3"
+            ),
+            tf.keras.layers.Conv2D(
+                200, (3, 3), padding="same", use_bias=False, name="Conv-4"
+            ),
             tf.keras.layers.BatchNormalization(scale=False, name="BN-4"),
             tf.keras.layers.Activation("relu", name="ReLU-4"),
             tf.keras.layers.MaxPool2D(padding="same", name="MaxPool-4"),
-            tf.keras.layers.Conv2D(250, (3, 3), padding="same", use_bias=False, name="Conv-5"),
+            tf.keras.layers.Conv2D(
+                250, (3, 3), padding="same", use_bias=False, name="Conv-5"
+            ),
             tf.keras.layers.BatchNormalization(scale=False, name="BN-5"),
             tf.keras.layers.Activation("relu", name="ReLU-5"),
             tf.keras.layers.MaxPool2D(padding="same", name="MaxPool-5"),
@@ -52,6 +67,7 @@ def MSUPatch(name="MSUPatch", **kwargs):
 if __name__ == "__main__":
     import pkg_resources
     from tabulate import tabulate
+
     from bob.learn.tensorflow.utils import model_summary
 
     model = MSUPatch()
diff --git a/bob/learn/tensorflow/models/simple_cnn.py b/bob/learn/tensorflow/models/simple_cnn.py
index 82ae65a68e12325d727a6ae9e2e024d12ca7eba7..f4c4ec56f71dbaecec83b0ddf23a0010fb62539a 100644
--- a/bob/learn/tensorflow/models/simple_cnn.py
+++ b/bob/learn/tensorflow/models/simple_cnn.py
@@ -1,15 +1,14 @@
 """A small CNN used for patch-based Face PAD"""
 
-from tensorflow.python.keras import Sequential, Input
-from tensorflow.python.keras.layers import (
-    Conv2D,
-    BatchNormalization,
-    Activation,
-    MaxPool2D,
-    Flatten,
-    Dense,
-    Dropout,
-)
+from tensorflow.python.keras import Input
+from tensorflow.python.keras import Sequential
+from tensorflow.python.keras.layers import Activation
+from tensorflow.python.keras.layers import BatchNormalization
+from tensorflow.python.keras.layers import Conv2D
+from tensorflow.python.keras.layers import Dense
+from tensorflow.python.keras.layers import Dropout
+from tensorflow.python.keras.layers import Flatten
+from tensorflow.python.keras.layers import MaxPool2D
 
 
 def SimpleCNN(input_shape=(28, 28, 3), inputs=None, name="SimpleCNN", **kwargs):
@@ -44,6 +43,7 @@ def SimpleCNN(input_shape=(28, 28, 3), inputs=None, name="SimpleCNN", **kwargs):
 if __name__ == "__main__":
     import pkg_resources
     from tabulate import tabulate
+
     from bob.learn.tensorflow.utils import model_summary
 
     model = SimpleCNN()
diff --git a/bob/learn/tensorflow/network/Chopra.py b/bob/learn/tensorflow/network/Chopra.py
deleted file mode 100644
index a3242e40b3a925cdd5a4a4a228beb1c4726003d6..0000000000000000000000000000000000000000
--- a/bob/learn/tensorflow/network/Chopra.py
+++ /dev/null
@@ -1,109 +0,0 @@
-#!/usr/bin/env python
-# vim: set fileencoding=utf-8 :
-# @author: Tiago de Freitas Pereira <tiago.pereira@idiap.ch>
-
-import tensorflow as tf
-
-
-def chopra(
-        inputs,
-        conv1_kernel_size=[7, 7],
-        conv1_output=15,
-        pooling1_size=[2, 2],
-        conv2_kernel_size=[6, 6],
-        conv2_output=45,
-        pooling2_size=[4, 3],
-        fc1_output=250,
-        seed=10,
-        reuse=False,
-):
-    """Class that creates the architecture presented in the paper:
-
-    Chopra, Sumit, Raia Hadsell, and Yann LeCun. "Learning a similarity metric discriminatively, with application to
-    face verification." 2005 IEEE Computer Society Conference on Computer Vision and Pattern Recognition (CVPR'05). Vol. 1. IEEE, 2005.
-
-    This is modifield version of the original architecture.
-    It is inspired on https://gitlab.idiap.ch/bob/xfacereclib.cnn/blob/master/lua/network.lua
-
-    -- C1 : Convolutional, kernel = 7x7 pixels, 15 feature maps
-
-    -- M2 : MaxPooling, 2x2
-
-    -- HT : Hard Hyperbolic Tangent
-
-    -- C3 : Convolutional, kernel = 6x6 pixels, 45 feature maps
-
-    -- M4 : MaxPooling, 4x3
-
-    -- HT : Hard Hyperbolic Tangent
-
-    -- R  : Reshaping layer HT 5x5 => 25 (45 times; once for each feature map)
-
-    -- L5 : Linear 25 => 250
-
-
-    **Parameters**
-
-        conv1_kernel_size:
-
-        conv1_output:
-
-        pooling1_size:
-
-        conv2_kernel_size:
-
-        conv2_output:
-
-        pooling2_size
-
-        fc1_output:
-        
-        seed:
-    """
-    slim = tf.contrib.slim
-
-    end_points = dict()
-
-    initializer = tf.contrib.layers.xavier_initializer(
-        uniform=False, dtype=tf.float32, seed=seed)
-
-    graph = slim.conv2d(
-        inputs,
-        conv1_output,
-        conv1_kernel_size,
-        activation_fn=tf.nn.relu,
-        stride=1,
-        weights_initializer=initializer,
-        scope='conv1',
-        reuse=reuse)
-    end_points['conv1'] = graph
-
-    graph = slim.max_pool2d(graph, pooling1_size, scope='pool1')
-    end_points['pool1'] = graph
-
-    graph = slim.conv2d(
-        graph,
-        conv2_output,
-        conv2_kernel_size,
-        activation_fn=tf.nn.relu,
-        stride=1,
-        weights_initializer=initializer,
-        scope='conv2',
-        reuse=reuse)
-    end_points['conv2'] = graph
-    graph = slim.max_pool2d(graph, pooling2_size, scope='pool2')
-    end_points['pool2'] = graph
-
-    graph = slim.flatten(graph, scope='flatten1')
-    end_points['flatten1'] = graph
-
-    graph = slim.fully_connected(
-        graph,
-        fc1_output,
-        weights_initializer=initializer,
-        activation_fn=None,
-        scope='fc1',
-        reuse=reuse)
-    end_points['fc1'] = graph
-
-    return graph, end_points
diff --git a/bob/learn/tensorflow/network/Dummy.py b/bob/learn/tensorflow/network/Dummy.py
deleted file mode 100644
index 917f30863a6fffafe6b590b44bdc8d4a785da587..0000000000000000000000000000000000000000
--- a/bob/learn/tensorflow/network/Dummy.py
+++ /dev/null
@@ -1,64 +0,0 @@
-#!/usr/bin/env python
-# vim: set fileencoding=utf-8 :
-# @author: Tiago de Freitas Pereira <tiago.pereira@idiap.ch>
-
-import tensorflow as tf
-from .utils import is_trainable
-
-
-def dummy(inputs,
-          reuse=False,
-          mode=tf.estimator.ModeKeys.TRAIN,
-          trainable_variables=None,
-          **kwargs):
-    """
-    Create all the necessary variables for this CNN
-
-    Parameters
-    ----------
-        inputs:
-        
-        reuse:
-
-        mode:
-
-        trainable_variables:
-
-    """
-
-    slim = tf.contrib.slim
-    end_points = dict()
-
-    # Here is my choice to shutdown the whole scope
-    trainable = is_trainable("Dummy", trainable_variables)
-    with tf.variable_scope('Dummy', reuse=reuse):
-
-        initializer = tf.contrib.layers.xavier_initializer()
-        name = 'conv1'
-        graph = slim.conv2d(
-            inputs,
-            10, [3, 3],
-            activation_fn=tf.nn.relu,
-            stride=1,
-            scope=name,
-            weights_initializer=initializer,
-            trainable=trainable)
-        end_points[name] = graph
-
-        graph = slim.max_pool2d(graph, [4, 4], scope='pool1')
-        end_points['pool1'] = graph
-
-        graph = slim.flatten(graph, scope='flatten1')
-        end_points['flatten1'] = graph
-
-        name = 'fc1'
-        graph = slim.fully_connected(
-            graph,
-            50,
-            weights_initializer=initializer,
-            activation_fn=None,
-            scope=name,
-            trainable=trainable)
-        end_points[name] = graph
-
-    return graph, end_points
diff --git a/bob/learn/tensorflow/network/InceptionResnetV1.py b/bob/learn/tensorflow/network/InceptionResnetV1.py
deleted file mode 100644
index 4d5cfda1492c858d6e8948d6ede7e2b7994378f9..0000000000000000000000000000000000000000
--- a/bob/learn/tensorflow/network/InceptionResnetV1.py
+++ /dev/null
@@ -1,674 +0,0 @@
-# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Contains the definition of the Inception Resnet V1 architecture.
-As described in http://arxiv.org/abs/1602.07261.
-  Inception-v4, Inception-ResNet and the Impact of Residual Connections
-    on Learning
-  Christian Szegedy, Sergey Ioffe, Vincent Vanhoucke, Alex Alemi
-"""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import tensorflow as tf
-import tensorflow.contrib.slim as slim
-from .utils import is_trainable
-
-
-# Inception-Renset-A
-def block35(net,
-            scale=1.0,
-            activation_fn=tf.nn.relu,
-            scope=None,
-            reuse=None,
-            trainable_variables=True):
-    """Builds the 35x35 resnet block."""
-    with tf.variable_scope(scope, 'Block35', [net]):
-        with tf.variable_scope('Branch_0'):
-            tower_conv = slim.conv2d(
-                net, 32, 1, scope='Conv2d_1x1',
-                reuse=reuse,
-                trainable=trainable_variables)
-        with tf.variable_scope('Branch_1'):
-            tower_conv1_0 = slim.conv2d(
-                net,
-                32,
-                1,
-                scope='Conv2d_0a_1x1',
-                reuse=reuse,
-                trainable=trainable_variables)
-            tower_conv1_1 = slim.conv2d(
-                tower_conv1_0,
-                32,
-                3,
-                scope='Conv2d_0b_3x3',
-                reuse=reuse,                
-                trainable=trainable_variables)
-        with tf.variable_scope('Branch_2'):
-            tower_conv2_0 = slim.conv2d(
-                net,
-                32,
-                1,
-                scope='Conv2d_0a_1x1',
-                reuse=reuse,                
-                trainable=trainable_variables)
-            tower_conv2_1 = slim.conv2d(
-                tower_conv2_0,
-                32,
-                3,
-                scope='Conv2d_0b_3x3',
-                reuse=reuse,
-                trainable=trainable_variables)
-            tower_conv2_2 = slim.conv2d(
-                tower_conv2_1,
-                32,
-                3,
-                scope='Conv2d_0c_3x3',
-                reuse=reuse,
-                trainable=trainable_variables)
-        mixed = tf.concat([tower_conv, tower_conv1_1, tower_conv2_2], 3)
-        up = slim.conv2d(
-            mixed,
-            net.get_shape()[3],
-            1,
-            normalizer_fn=None,
-            activation_fn=None,
-            scope='Conv2d_1x1',
-            reuse=reuse,
-            trainable=trainable_variables)
-        net += scale * up
-        if activation_fn:
-            net = activation_fn(net)
-    return net
-
-
-# Inception-Renset-B
-def block17(net,
-            scale=1.0,
-            activation_fn=tf.nn.relu,
-            scope=None,
-            reuse=None,
-            trainable_variables=True):
-    """Builds the 17x17 resnet block."""
-    with tf.variable_scope(scope, 'Block17', [net]):
-        with tf.variable_scope('Branch_0'):
-            tower_conv = slim.conv2d(
-                net, 128, 1, scope='Conv2d_1x1', trainable=trainable_variables, reuse=reuse)
-        with tf.variable_scope('Branch_1'):
-            tower_conv1_0 = slim.conv2d(
-                net,
-                128,
-                1,
-                scope='Conv2d_0a_1x1',
-                reuse=reuse,
-                trainable=trainable_variables)
-            tower_conv1_1 = slim.conv2d(
-                tower_conv1_0,
-                128, [1, 7],
-                scope='Conv2d_0b_1x7',
-                reuse=reuse,
-                trainable=trainable_variables)
-            tower_conv1_2 = slim.conv2d(
-                tower_conv1_1,
-                128, [7, 1],
-                scope='Conv2d_0c_7x1',
-                reuse=reuse,
-                trainable=trainable_variables)
-        mixed = tf.concat([tower_conv, tower_conv1_2], 3)
-        up = slim.conv2d(
-            mixed,
-            net.get_shape()[3],
-            1,
-            normalizer_fn=None,
-            activation_fn=None,
-            scope='Conv2d_1x1',
-            reuse=reuse,
-            trainable=trainable_variables)
-        net += scale * up
-        if activation_fn:
-            net = activation_fn(net)
-    return net
-
-
-# Inception-Resnet-C
-def block8(net,
-           scale=1.0,
-           activation_fn=tf.nn.relu,
-           scope=None,
-           reuse=None,           
-           trainable_variables=True):
-    """Builds the 8x8 resnet block."""
-    with tf.variable_scope(scope, 'Block8', [net]):
-        with tf.variable_scope('Branch_0'):
-            tower_conv = slim.conv2d(
-                net, 192, 1, scope='Conv2d_1x1', trainable=trainable_variables,reuse=reuse)
-        with tf.variable_scope('Branch_1'):
-            tower_conv1_0 = slim.conv2d(
-                net,
-                192,
-                1,
-                scope='Conv2d_0a_1x1',
-                reuse=reuse,
-                trainable=trainable_variables)
-            tower_conv1_1 = slim.conv2d(
-                tower_conv1_0,
-                192, [1, 3],
-                scope='Conv2d_0b_1x3',
-                reuse=reuse,
-                trainable=trainable_variables)
-            tower_conv1_2 = slim.conv2d(
-                tower_conv1_1,
-                192, [3, 1],
-                scope='Conv2d_0c_3x1',
-                reuse=reuse,
-                trainable=trainable_variables)
-        mixed = tf.concat([tower_conv, tower_conv1_2], 3)
-        up = slim.conv2d(
-            mixed,
-            net.get_shape()[3],
-            1,
-            normalizer_fn=None,
-            activation_fn=None,
-            scope='Conv2d_1x1',
-            reuse=reuse,
-            trainable=trainable_variables)
-        net += scale * up
-        if activation_fn:
-            net = activation_fn(net)
-    return net
-
-
-def reduction_a(net, k, l, m, n, trainable_variables=True, reuse=None):
-    with tf.variable_scope('Branch_0'):
-        tower_conv = slim.conv2d(
-            net,
-            n,
-            3,
-            stride=2,
-            padding='VALID',
-            scope='Conv2d_1a_3x3',
-            reuse=reuse,
-            trainable=trainable_variables)
-    with tf.variable_scope('Branch_1'):
-        tower_conv1_0 = slim.conv2d(
-            net, k, 1, scope='Conv2d_0a_1x1', trainable=trainable_variables, reuse=reuse)
-        tower_conv1_1 = slim.conv2d(
-            tower_conv1_0,
-            l,
-            3,
-            scope='Conv2d_0b_3x3',
-            reuse=reuse,
-            trainable=trainable_variables)
-        tower_conv1_2 = slim.conv2d(
-            tower_conv1_1,
-            m,
-            3,
-            stride=2,
-            padding='VALID',
-            reuse=reuse,
-            scope='Conv2d_1a_3x3',
-            trainable=trainable_variables)
-    with tf.variable_scope('Branch_2'):
-        tower_pool = slim.max_pool2d(
-            net, 3, stride=2, padding='VALID', scope='MaxPool_1a_3x3')
-    net = tf.concat([tower_conv, tower_conv1_2, tower_pool], 3)
-    return net
-
-
-def reduction_b(net, trainable_variables=True, reuse=None):
-    with tf.variable_scope('Branch_0'):
-        tower_conv = slim.conv2d(
-            net, 256, 1, scope='Conv2d_0a_1x1', trainable=trainable_variables, reuse=reuse)
-        tower_conv_1 = slim.conv2d(
-            tower_conv,
-            384,
-            3,
-            stride=2,
-            padding='VALID',
-            reuse=reuse,
-            scope='Conv2d_1a_3x3',
-            trainable=trainable_variables)
-    with tf.variable_scope('Branch_1'):
-        tower_conv1 = slim.conv2d(
-            net, 256, 1, scope='Conv2d_0a_1x1', trainable=trainable_variables, reuse=reuse)
-        tower_conv1_1 = slim.conv2d(
-            tower_conv1,
-            256,
-            3,
-            stride=2,
-            padding='VALID',
-            scope='Conv2d_1a_3x3',
-            reuse=reuse,
-            trainable=trainable_variables)
-    with tf.variable_scope('Branch_2'):
-        tower_conv2 = slim.conv2d(
-            net, 256, 1, scope='Conv2d_0a_1x1', trainable=trainable_variables, reuse=reuse)
-        tower_conv2_1 = slim.conv2d(
-            tower_conv2,
-            256,
-            3,            
-            scope='Conv2d_0b_3x3',
-            reuse=reuse,
-            trainable=trainable_variables)
-        tower_conv2_2 = slim.conv2d(
-            tower_conv2_1,
-            256,
-            3,
-            stride=2,
-            padding='VALID',
-            scope='Conv2d_1a_3x3',
-            reuse=reuse,
-            trainable=trainable_variables)
-    with tf.variable_scope('Branch_3'):
-        tower_pool = slim.max_pool2d(
-            net, 3, stride=2, padding='VALID', scope='MaxPool_1a_3x3')
-    net = tf.concat([tower_conv_1, tower_conv1_1, tower_conv2_2, tower_pool],
-                    3)
-    return net
-
-
-def inception_resnet_v1_batch_norm(inputs,
-                                   dropout_keep_prob=0.8,
-                                   bottleneck_layer_size=128,
-                                   reuse=None,
-                                   scope='InceptionResnetV1',
-                                   mode=tf.estimator.ModeKeys.TRAIN,
-                                   trainable_variables=None,
-                                   weight_decay=1e-5,
-                                   **kwargs):
-    """
-    Creates the Inception Resnet V1 model applying batch not to each
-    Convolutional and FullyConnected layer.
-
-    Parameters
-    ----------
-
-      inputs:
-        4-D tensor of size [batch_size, height, width, 3].
-
-      num_classes:
-        number of predicted classes.
-
-      is_training:
-        whether is training or not.
-
-      dropout_keep_prob: float
-        the fraction to keep before final layer.
-
-      reuse:
-        whether or not the network and its variables should be reused. To be
-        able to reuse 'scope' must be given.
-
-      scope:
-        Optional variable_scope.
-
-      trainable_variables: :any:`list`
-        List of variables to be trainable=True
-
-    Returns
-    -------
-      logits:
-        the logits outputs of the model.
-
-      end_points:
-        the set of end_points from the inception model.
-
-    """
-
-    batch_norm_params = {
-        # Decay for the moving averages.
-        'decay': 0.995,
-        # epsilon to prevent 0s in variance.
-        'epsilon': 0.001,
-        # force in-place updates of mean and variance estimates
-        'updates_collections': None,
-    }
-
-    with slim.arg_scope(
-        [slim.conv2d, slim.fully_connected],
-            weights_initializer=tf.truncated_normal_initializer(stddev=0.1),
-            weights_regularizer=slim.l2_regularizer(weight_decay),
-            normalizer_fn=slim.batch_norm,
-            normalizer_params=batch_norm_params):
-        return inception_resnet_v1(
-            inputs,
-            dropout_keep_prob=dropout_keep_prob,
-            bottleneck_layer_size=bottleneck_layer_size,
-            reuse=reuse,
-            scope=scope,
-            mode=mode,
-            trainable_variables=trainable_variables,
-        )
-
-
-def inception_resnet_v1(inputs,
-                        dropout_keep_prob=0.8,
-                        bottleneck_layer_size=128,
-                        reuse=None,
-                        scope='InceptionResnetV1',
-                        mode=tf.estimator.ModeKeys.TRAIN,
-                        trainable_variables=None,
-                        **kwargs):
-    """
-    Creates the Inception Resnet V1 model.
-
-    Parameters
-    ----------
-
-      inputs:
-        4-D tensor of size [batch_size, height, width, 3].
-
-      num_classes:
-        number of predicted classes.
-
-      is_training:
-        whether is training or not.
-
-      dropout_keep_prob: float
-        the fraction to keep before final layer.
-
-      reuse:
-        whether or not the network and its variables should be reused. To be
-        able to reuse 'scope' must be given.
-
-      scope:
-        Optional variable_scope.
-
-      trainable_variables: :any:`list`
-        List of variables to be trainable=True
-
-    Returns
-    -------
-      logits:
-        the logits outputs of the model.
-
-      end_points:
-        the set of end_points from the inception model.
-
-    """
-    end_points = {}
-
-    with tf.variable_scope(scope, 'InceptionResnetV1', [inputs], reuse=reuse):
-        with slim.arg_scope(
-            [slim.dropout],
-                is_training=(mode == tf.estimator.ModeKeys.TRAIN)):
-
-            with slim.arg_scope(
-                [slim.conv2d, slim.max_pool2d, slim.avg_pool2d],
-                    stride=1,
-                    padding='SAME'):
-
-                # 149 x 149 x 32
-                name = "Conv2d_1a_3x3_BN"
-                trainable = is_trainable(name, trainable_variables, mode=mode)
-                with slim.arg_scope(
-                    [slim.batch_norm],
-                        is_training=(mode == tf.estimator.ModeKeys.TRAIN),
-                        trainable=trainable):
-
-                    name = "Conv2d_1a_3x3"
-                    trainable = is_trainable(name, trainable_variables, mode=mode)
-                    net = slim.conv2d(
-                        inputs,
-                        32,
-                        3,
-                        stride=2,
-                        padding='VALID',
-                        reuse=reuse,
-                        scope=name,
-                        trainable=trainable)
-                    end_points[name] = net
-
-                # 147 x 147 x 32
-                name = "Conv2d_2a_3x3_BN"
-                trainable = is_trainable(name, trainable_variables, mode=mode)
-                with slim.arg_scope(
-                    [slim.batch_norm],
-                        is_training=(mode == tf.estimator.ModeKeys.TRAIN),
-                        trainable=trainable):
-                    name = "Conv2d_2a_3x3"
-                    trainable = is_trainable(name, trainable_variables, mode=mode)
-                    net = slim.conv2d(
-                        net,
-                        32,
-                        3,
-                        padding='VALID',
-                        scope=name,
-                        reuse=reuse,
-                        trainable=trainable)
-                    end_points[name] = net
-
-                # 147 x 147 x 64
-                name = "Conv2d_2b_3x3_BN"
-                trainable = is_trainable(name, trainable_variables, mode=mode)
-                with slim.arg_scope(
-                    [slim.batch_norm],
-                        is_training=(mode == tf.estimator.ModeKeys.TRAIN),
-                        trainable=trainable):
-
-                    name = "Conv2d_2b_3x3"
-                    trainable = is_trainable(name, trainable_variables, mode=mode)
-                    net = slim.conv2d(
-                        net, 64, 3, scope=name, trainable=trainable, reuse=reuse)
-                    end_points[name] = net
-
-                # 73 x 73 x 64
-                net = slim.max_pool2d(
-                    net, 3, stride=2, padding='VALID', scope='MaxPool_3a_3x3')
-                end_points['MaxPool_3a_3x3'] = net
-
-                # 73 x 73 x 80
-                name = "Conv2d_3b_1x1_BN"
-                trainable = is_trainable(name, trainable_variables, mode=mode)
-                with slim.arg_scope(
-                    [slim.batch_norm],
-                        is_training=(mode == tf.estimator.ModeKeys.TRAIN),
-                        trainable=trainable):
-
-                    name = "Conv2d_3b_1x1"
-                    trainable = is_trainable(name, trainable_variables, mode=mode)
-                    net = slim.conv2d(
-                        net,
-                        80,
-                        1,
-                        padding='VALID',
-                        scope=name,
-                        reuse=reuse,
-                        trainable=trainable)
-                    end_points[name] = net
-
-                # 71 x 71 x 192
-                name = "Conv2d_4a_3x3_BN"
-                trainable = is_trainable(name, trainable_variables, mode=mode)
-                with slim.arg_scope(
-                    [slim.batch_norm],
-                        is_training=(mode == tf.estimator.ModeKeys.TRAIN),
-                        trainable=trainable):
-
-                    name = "Conv2d_4a_3x3"
-                    trainable = is_trainable(name, trainable_variables, mode=mode)
-                    net = slim.conv2d(
-                        net,
-                        192,
-                        3,
-                        padding='VALID',
-                        scope=name,
-                        reuse=reuse,
-                        trainable=trainable)
-                    end_points[name] = net
-
-                # 35 x 35 x 256
-                name = "Conv2d_4b_3x3_BN"
-                trainable = is_trainable(name, trainable_variables, mode=mode)
-                with slim.arg_scope(
-                    [slim.batch_norm],
-                        is_training=(mode == tf.estimator.ModeKeys.TRAIN),
-                        trainable=trainable):
-
-                    name = "Conv2d_4b_3x3"
-                    trainable = is_trainable(name, trainable_variables, mode=mode)
-                    net = slim.conv2d(
-                        net,
-                        256,
-                        3,
-                        stride=2,
-                        padding='VALID',
-                        scope=name,
-                        reuse=reuse,
-                        trainable=trainable)
-                    end_points[name] = net
-
-                # 5 x Inception-resnet-A
-                name = "block35_BN"
-                trainable = is_trainable(name, trainable_variables, mode=mode)
-                with slim.arg_scope(
-                    [slim.batch_norm],
-                        is_training=(mode == tf.estimator.ModeKeys.TRAIN),
-                        trainable=trainable):
-
-                    name = "block35"
-                    trainable = is_trainable(name, trainable_variables, mode=mode)
-                    net = slim.repeat(
-                        net,
-                        5,
-                        block35,
-                        scale=0.17,
-                        reuse=reuse,
-                        trainable_variables=trainable)
-                    end_points[name] = net
-
-                # Reduction-A
-                name = "Mixed_6a_BN"
-                trainable = is_trainable(name, trainable_variables, mode=mode)
-                with slim.arg_scope(
-                    [slim.batch_norm],
-                        is_training=(mode == tf.estimator.ModeKeys.TRAIN),
-                        trainable=trainable):
-
-                    name = "Mixed_6a"
-                    trainable = is_trainable(name, trainable_variables, mode=mode)
-                    with tf.variable_scope(name):
-                        net = reduction_a(
-                            net,
-                            192,
-                            192,
-                            256,
-                            384,
-                            trainable_variables=trainable)
-                    end_points[name] = net
-
-                # 10 x Inception-Resnet-B
-                name = "block17_BN"
-                trainable = is_trainable(name, trainable_variables, mode=mode)
-                with slim.arg_scope(
-                    [slim.batch_norm],
-                        is_training=(mode == tf.estimator.ModeKeys.TRAIN),
-                        trainable=trainable):
-
-                    name = "block17"
-                    trainable = is_trainable(name, trainable_variables, mode=mode)
-                    net = slim.repeat(
-                        net,
-                        10,
-                        block17,
-                        scale=0.10,
-                        trainable_variables=trainable)
-                    end_points[name] = net
-
-                # Reduction-B
-                name = "Mixed_7a_BN"
-                trainable = is_trainable(name, trainable_variables, mode=mode)
-                with slim.arg_scope(
-                    [slim.batch_norm],
-                        is_training=(mode == tf.estimator.ModeKeys.TRAIN),
-                        trainable=trainable):
-
-                    name = "Mixed_7a"
-                    trainable = is_trainable(name, trainable_variables, mode=mode)
-
-                    with tf.variable_scope(name):
-                        net = reduction_b(
-                            net, trainable_variables=trainable, reuse=reuse)
-                    end_points[name] = net
-
-                # 5 x Inception-Resnet-C
-                name = "block8_BN"
-                trainable = is_trainable(name, trainable_variables, mode=mode)
-                with slim.arg_scope(
-                    [slim.batch_norm],
-                        is_training=(mode == tf.estimator.ModeKeys.TRAIN),
-                        trainable=trainable):
-
-                    name = "block8"
-                    trainable = is_trainable(name, trainable_variables, mode=mode)
-                    net = slim.repeat(
-                        net,
-                        5,
-                        block8,
-                        scale=0.20,
-                        trainable_variables=trainable)
-                    end_points[name] = net
-
-                name = "Mixed_8b_BN"
-                trainable = is_trainable(name, trainable_variables, mode=mode)
-                with slim.arg_scope(
-                    [slim.batch_norm],
-                        is_training=(mode == tf.estimator.ModeKeys.TRAIN),
-                        trainable=trainable):
-
-                    name = "Mixed_8b"
-                    trainable = is_trainable(name, trainable_variables, mode=mode)
-                    net = block8(
-                        net,
-                        activation_fn=None,
-                        trainable_variables=trainable)
-                    end_points[name] = net
-
-                with tf.variable_scope('Logits'):
-                    end_points['PrePool'] = net
-                    #pylint: disable=no-member
-                    net = slim.avg_pool2d(
-                        net,
-                        net.get_shape()[1:3],
-                        padding='VALID',
-                        scope='AvgPool_1a_8x8')
-                    net = slim.flatten(net)
-
-                    net = slim.dropout(
-                        net,
-                        dropout_keep_prob,
-                        is_training=(mode == tf.estimator.ModeKeys.TRAIN),
-                        scope='Dropout')
-
-                    end_points['PreLogitsFlatten'] = net
-
-                name = "Bottleneck_BN"
-                trainable = is_trainable(name, trainable_variables, mode=mode)
-                with slim.arg_scope(
-                    [slim.batch_norm],
-                        is_training=(mode == tf.estimator.ModeKeys.TRAIN),
-                        trainable=trainable):
-
-                    name = "Bottleneck"
-                    trainable = is_trainable(name, trainable_variables, mode=mode)
-                    net = slim.fully_connected(
-                        net,
-                        bottleneck_layer_size,
-                        activation_fn=None,
-                        scope=name,
-                        trainable=trainable)
-                end_points[name] = net
-
-    return net, end_points
diff --git a/bob/learn/tensorflow/network/InceptionResnetV2.py b/bob/learn/tensorflow/network/InceptionResnetV2.py
deleted file mode 100644
index 3daf573feee3d6c210e642a92c5461acc69e6c32..0000000000000000000000000000000000000000
--- a/bob/learn/tensorflow/network/InceptionResnetV2.py
+++ /dev/null
@@ -1,763 +0,0 @@
-# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Contains the definition of the Inception Resnet V2 architecture.
-As described in http://arxiv.org/abs/1602.07261.
-  Inception-v4, Inception-ResNet and the Impact of Residual Connections
-    on Learning
-  Christian Szegedy, Sergey Ioffe, Vincent Vanhoucke, Alex Alemi
-"""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import tensorflow as tf
-import tensorflow.contrib.slim as slim
-from .utils import is_trainable
-
-
-# Inception-Renset-A
-def block35(net,
-            scale=1.0,
-            activation_fn=tf.nn.relu,
-            scope=None,
-            reuse=None,
-            trainable_variables=True):
-    """Builds the 35x35 resnet block."""
-    with tf.variable_scope(scope, 'Block35', [net]):
-        with tf.variable_scope('Branch_0'):
-            tower_conv = slim.conv2d(
-                net,
-                32,
-                1,
-                scope='Conv2d_1x1',
-                trainable=trainable_variables,
-                reuse=reuse)
-        with tf.variable_scope('Branch_1'):
-            tower_conv1_0 = slim.conv2d(
-                net,
-                32,
-                1,
-                scope='Conv2d_0a_1x1',
-                trainable=trainable_variables,
-                reuse=reuse)
-            tower_conv1_1 = slim.conv2d(
-                tower_conv1_0,
-                32,
-                3,
-                scope='Conv2d_0b_3x3',
-                trainable=trainable_variables,
-                reuse=reuse)
-        with tf.variable_scope('Branch_2'):
-            tower_conv2_0 = slim.conv2d(
-                net,
-                32,
-                1,
-                scope='Conv2d_0a_1x1',
-                trainable=trainable_variables,
-                reuse=reuse)
-            tower_conv2_1 = slim.conv2d(
-                tower_conv2_0,
-                48,
-                3,
-                scope='Conv2d_0b_3x3',
-                trainable=trainable_variables,
-                reuse=reuse)
-            tower_conv2_2 = slim.conv2d(
-                tower_conv2_1,
-                64,
-                3,
-                scope='Conv2d_0c_3x3',
-                trainable=trainable_variables,
-                reuse=reuse)
-        mixed = tf.concat([tower_conv, tower_conv1_1, tower_conv2_2], 3)
-        up = slim.conv2d(
-            mixed,
-            net.get_shape()[3],
-            1,
-            normalizer_fn=None,
-            activation_fn=None,
-            scope='Conv2d_1x1',
-            trainable=trainable_variables,
-            reuse=reuse)
-        net += scale * up
-        if activation_fn:
-            net = activation_fn(net)
-    return net
-
-
-# Inception-Renset-B
-def block17(net,
-            scale=1.0,
-            activation_fn=tf.nn.relu,
-            scope=None,
-            reuse=None,
-            trainable_variables=True):
-    """Builds the 17x17 resnet block."""
-    with tf.variable_scope(scope, 'Block17', [net]):
-        with tf.variable_scope('Branch_0'):
-            tower_conv = slim.conv2d(
-                net,
-                192,
-                1,
-                scope='Conv2d_1x1',
-                trainable=trainable_variables,
-                reuse=reuse)
-        with tf.variable_scope('Branch_1'):
-            tower_conv1_0 = slim.conv2d(
-                net,
-                128,
-                1,
-                scope='Conv2d_0a_1x1',
-                trainable=trainable_variables,
-                reuse=reuse)
-            tower_conv1_1 = slim.conv2d(
-                tower_conv1_0,
-                160, [1, 7],
-                scope='Conv2d_0b_1x7',
-                trainable=trainable_variables,
-                reuse=reuse)
-            tower_conv1_2 = slim.conv2d(
-                tower_conv1_1,
-                192, [7, 1],
-                scope='Conv2d_0c_7x1',
-                trainable=trainable_variables,
-                reuse=reuse)
-        mixed = tf.concat([tower_conv, tower_conv1_2], 3)
-        up = slim.conv2d(
-            mixed,
-            net.get_shape()[3],
-            1,
-            normalizer_fn=None,
-            activation_fn=None,
-            scope='Conv2d_1x1',
-            trainable=trainable_variables,
-            reuse=reuse)
-        net += scale * up
-        if activation_fn:
-            net = activation_fn(net)
-    return net
-
-
-# Inception-Resnet-C
-def block8(net,
-           scale=1.0,
-           activation_fn=tf.nn.relu,
-           scope=None,
-           reuse=None,
-           trainable_variables=True):
-    """Builds the 8x8 resnet block."""
-    with tf.variable_scope(scope, 'Block8', [net]):
-        with tf.variable_scope('Branch_0'):
-            tower_conv = slim.conv2d(
-                net,
-                192,
-                1,
-                scope='Conv2d_1x1',
-                trainable=trainable_variables,
-                reuse=reuse)
-        with tf.variable_scope('Branch_1'):
-            tower_conv1_0 = slim.conv2d(
-                net,
-                192,
-                1,
-                scope='Conv2d_0a_1x1',
-                trainable=trainable_variables,
-                reuse=reuse)
-            tower_conv1_1 = slim.conv2d(
-                tower_conv1_0,
-                224, [1, 3],
-                scope='Conv2d_0b_1x3',
-                trainable=trainable_variables,
-                reuse=reuse)
-            tower_conv1_2 = slim.conv2d(
-                tower_conv1_1,
-                256, [3, 1],
-                scope='Conv2d_0c_3x1',
-                trainable=trainable_variables,
-                reuse=reuse)
-        mixed = tf.concat([tower_conv, tower_conv1_2], 3)
-        up = slim.conv2d(
-            mixed,
-            net.get_shape()[3],
-            1,
-            normalizer_fn=None,
-            activation_fn=None,
-            scope='Conv2d_1x1',
-            trainable=trainable_variables,
-            reuse=reuse)
-        net += scale * up
-        if activation_fn:
-            net = activation_fn(net)
-    return net
-
-
-def inception_resnet_v2_batch_norm(inputs,
-                                   dropout_keep_prob=0.8,
-                                   bottleneck_layer_size=128,
-                                   reuse=None,
-                                   scope='InceptionResnetV2',
-                                   mode=tf.estimator.ModeKeys.TRAIN,
-                                   trainable_variables=None,
-                                   weight_decay=5e-5,
-                                   **kwargs):
-    """
-    Creates the Inception Resnet V2 model applying batch not to each
-    Convolutional and FullyConnected layer.
-
-
-    **Parameters**:
-
-      inputs:
-        4-D tensor of size [batch_size, height, width, 3].
-
-      num_classes:
-        number of predicted classes.
-
-      is_training:
-        whether is training or not.
-
-      dropout_keep_prob: float
-        the fraction to keep before final layer.
-
-      reuse:
-        whether or not the network and its variables should be reused. To be
-        able to reuse 'scope' must be given.
-
-      scope:
-        Optional variable_scope.
-
-      trainable_variables: list
-        List of variables to be trainable=True
-
-    **Returns**:
-
-      logits:
-        the logits outputs of the model.
-
-      end_points:
-        the set of end_points from the inception model.
-    """
-
-    batch_norm_params = {
-        # Decay for the moving averages.
-        'decay': 0.995,
-        # epsilon to prevent 0s in variance.
-        'epsilon': 0.001,
-        # force in-place updates of mean and variance estimates
-        'updates_collections': None,
-    }
-
-    with slim.arg_scope(
-        [slim.conv2d, slim.fully_connected],
-            weights_initializer=tf.truncated_normal_initializer(stddev=0.1),
-            weights_regularizer=slim.l2_regularizer(weight_decay),
-            normalizer_fn=slim.batch_norm,
-            normalizer_params=batch_norm_params):
-        return inception_resnet_v2(
-            inputs,
-            dropout_keep_prob=dropout_keep_prob,
-            bottleneck_layer_size=bottleneck_layer_size,
-            reuse=reuse,
-            scope=scope,
-            mode=mode,
-            trainable_variables=trainable_variables,
-            **kwargs)
-
-
-def inception_resnet_v2(inputs,
-                        dropout_keep_prob=0.8,
-                        bottleneck_layer_size=128,
-                        reuse=None,
-                        scope='InceptionResnetV2',
-                        mode=tf.estimator.ModeKeys.TRAIN,
-                        trainable_variables=None,
-                        **kwargs):
-    """Creates the Inception Resnet V2 model.
-
-    Parameters
-    ----------
-
-      inputs:
-        4-D tensor of size [batch_size, height, width, 3].
-
-      num_classes:
-        number of predicted classes.
-
-      is_training:
-        whether is training or not.
-
-      dropout_keep_prob: float
-        the fraction to keep before final layer.
-
-      reuse:
-        whether or not the network and its variables should be reused. To be
-        able to reuse 'scope' must be given.
-
-      scope:
-        Optional variable_scope.
-
-      trainable_variables: :any:`list`
-        List of variables to be trainable=True
-
-    Returns
-    -------
-      logits:
-        the logits outputs of the model.
-
-      end_points:
-        the set of end_points from the inception model.
-    """
-    end_points = {}
-
-    with tf.variable_scope(scope, 'InceptionResnetV2', [inputs], reuse=reuse):
-        with slim.arg_scope([slim.dropout],
-                            is_training=(mode == tf.estimator.ModeKeys.TRAIN)):
-
-            with slim.arg_scope(
-                [slim.conv2d, slim.max_pool2d, slim.avg_pool2d],
-                    stride=1,
-                    padding='SAME'):
-                # 149 x 149 x 32
-                name = "Conv2d_1a_3x3_BN"
-                trainable = is_trainable(name, trainable_variables, mode=mode)
-                with slim.arg_scope(
-                    [slim.batch_norm],
-                        is_training=trainable,
-                        trainable=trainable):
-                    name = "Conv2d_1a_3x3"
-                    trainable = is_trainable(
-                        name, trainable_variables, mode=mode)
-                    net = slim.conv2d(
-                        inputs,
-                        32,
-                        3,
-                        stride=2,
-                        padding='VALID',
-                        scope=name,
-                        trainable=trainable,
-                        reuse=reuse)
-                    end_points[name] = net
-
-                # 147 x 147 x 32
-                name = "Conv2d_2a_3x3_BN"
-                trainable = is_trainable(name, trainable_variables, mode=mode)
-                with slim.arg_scope(
-                    [slim.batch_norm],
-                        is_training=trainable,
-                        trainable=trainable):
-                    name = "Conv2d_2a_3x3"
-                    trainable = is_trainable(
-                        name, trainable_variables, mode=mode)
-                    net = slim.conv2d(
-                        net,
-                        32,
-                        3,
-                        padding='VALID',
-                        scope=name,
-                        trainable=trainable,
-                        reuse=reuse)
-                    end_points[name] = net
-
-                # 147 x 147 x 64
-                name = "Conv2d_2b_3x3_BN"
-                trainable = is_trainable(name, trainable_variables, mode=mode)
-                with slim.arg_scope(
-                    [slim.batch_norm],
-                        is_training=trainable,
-                        trainable=trainable):
-                    name = "Conv2d_2b_3x3"
-                    trainable = is_trainable(
-                        name, trainable_variables, mode=mode)
-                    net = slim.conv2d(
-                        net, 64, 3, scope=name, trainable=trainable, reuse=reuse)
-                    end_points[name] = net
-
-                # 73 x 73 x 64
-                net = slim.max_pool2d(
-                    net, 3, stride=2, padding='VALID', scope='MaxPool_3a_3x3')
-                end_points['MaxPool_3a_3x3'] = net
-
-                # 73 x 73 x 80
-                name = "Conv2d_3b_1x1_BN"
-                trainable = is_trainable(name, trainable_variables, mode=mode)
-                with slim.arg_scope(
-                    [slim.batch_norm],
-                        is_training=trainable,
-                        trainable=trainable):
-                    name = "Conv2d_3b_1x1"
-                    trainable = is_trainable(
-                        name, trainable_variables, mode=mode)
-                    net = slim.conv2d(
-                        net,
-                        80,
-                        1,
-                        padding='VALID',
-                        scope=name,
-                        trainable=trainable,
-                        reuse=reuse)
-                    end_points[name] = net
-
-                # 71 x 71 x 192
-                name = "Conv2d_4a_3x3_BN"
-                trainable = is_trainable(name, trainable_variables, mode=mode)
-                with slim.arg_scope(
-                    [slim.batch_norm],
-                        is_training=trainable,
-                        trainable=trainable):
-                    name = "Conv2d_4a_3x3"
-                    trainable = is_trainable(
-                        name, trainable_variables, mode=mode)
-                    net = slim.conv2d(
-                        net,
-                        192,
-                        3,
-                        padding='VALID',
-                        scope=name,
-                        trainable=trainable,
-                        reuse=reuse)
-                    end_points[name] = net
-
-                # 35 x 35 x 192
-                net = slim.max_pool2d(
-                    net, 3, stride=2, padding='VALID', scope='MaxPool_5a_3x3')
-                end_points['MaxPool_5a_3x3'] = net
-
-                # 35 x 35 x 320
-                name = "Mixed_5b_BN"
-                trainable = is_trainable(name, trainable_variables, mode=mode)
-                with slim.arg_scope(
-                    [slim.batch_norm],
-                        is_training=trainable,
-                        trainable=trainable):
-
-                    name = "Mixed_5b"
-                    trainable = is_trainable(
-                        name, trainable_variables, mode=mode)
-                    with tf.variable_scope(name):
-                        with tf.variable_scope('Branch_0'):
-                            tower_conv = slim.conv2d(
-                                net,
-                                96,
-                                1,
-                                scope='Conv2d_1x1',
-                                trainable=trainable,
-                                reuse=reuse)
-                        with tf.variable_scope('Branch_1'):
-                            tower_conv1_0 = slim.conv2d(
-                                net,
-                                48,
-                                1,
-                                scope='Conv2d_0a_1x1',
-                                trainable=trainable,
-                                reuse=reuse)
-                            tower_conv1_1 = slim.conv2d(
-                                tower_conv1_0,
-                                64,
-                                5,
-                                scope='Conv2d_0b_5x5',
-                                trainable=trainable,
-                                reuse=reuse)
-                        with tf.variable_scope('Branch_2'):
-                            tower_conv2_0 = slim.conv2d(
-                                net,
-                                64,
-                                1,
-                                scope='Conv2d_0a_1x1',
-                                trainable=trainable,
-                                reuse=reuse)
-                            tower_conv2_1 = slim.conv2d(
-                                tower_conv2_0,
-                                96,
-                                3,
-                                scope='Conv2d_0b_3x3',
-                                trainable=trainable,
-                                reuse=reuse)
-                            tower_conv2_2 = slim.conv2d(
-                                tower_conv2_1,
-                                96,
-                                3,
-                                scope='Conv2d_0c_3x3',
-                                trainable=trainable,
-                                reuse=reuse)
-                        with tf.variable_scope('Branch_3'):
-                            tower_pool = slim.avg_pool2d(
-                                net,
-                                3,
-                                stride=1,
-                                padding='SAME',
-                                scope='AvgPool_0a_3x3')
-                            tower_pool_1 = slim.conv2d(
-                                tower_pool,
-                                64,
-                                1,
-                                scope='Conv2d_0b_1x1',
-                                trainable=trainable,
-                                reuse=reuse)
-                        net = tf.concat([
-                            tower_conv, tower_conv1_1, tower_conv2_2, tower_pool_1
-                        ], 3)
-                    end_points[name] = net
-
-                # BLOCK 35
-                name = "Block35_BN"
-                trainable = is_trainable(name, trainable_variables, mode=mode)
-                with slim.arg_scope(
-                    [slim.batch_norm],
-                        is_training=trainable,
-                        trainable=trainable):
-
-                    name = "Block35"
-                    trainable = is_trainable(
-                        name, trainable_variables, mode=mode)
-                    net = slim.repeat(
-                        net,
-                        10,
-                        block35,
-                        scale=0.17,
-                        trainable_variables=trainable,
-                        reuse=reuse)
-                end_points[name] = net
-
-                # 17 x 17 x 1024
-                name = "Mixed_6a_BN"
-                trainable = is_trainable(name, trainable_variables, mode=mode)
-                with slim.arg_scope(
-                    [slim.batch_norm],
-                        is_training=trainable,
-                        trainable=trainable):
-
-                    name = "Mixed_6a"
-                    trainable = is_trainable(
-                        name, trainable_variables, mode=mode)
-                    with tf.variable_scope(name):
-                        with tf.variable_scope('Branch_0'):
-                            tower_conv = slim.conv2d(
-                                net,
-                                384,
-                                3,
-                                stride=2,
-                                padding='VALID',
-                                scope='Conv2d_1a_3x3',
-                                trainable=trainable,
-                                reuse=reuse)
-                        with tf.variable_scope('Branch_1'):
-                            tower_conv1_0 = slim.conv2d(
-                                net,
-                                256,
-                                1,
-                                scope='Conv2d_0a_1x1',
-                                trainable=trainable,
-                                reuse=reuse)
-                            tower_conv1_1 = slim.conv2d(
-                                tower_conv1_0,
-                                256,
-                                3,
-                                scope='Conv2d_0b_3x3',
-                                trainable=trainable,
-                                reuse=reuse)
-                            tower_conv1_2 = slim.conv2d(
-                                tower_conv1_1,
-                                384,
-                                3,
-                                stride=2,
-                                padding='VALID',
-                                scope='Conv2d_1a_3x3',
-                                trainable=trainable,
-                                reuse=reuse)
-                        with tf.variable_scope('Branch_2'):
-                            tower_pool = slim.max_pool2d(
-                                net,
-                                3,
-                                stride=2,
-                                padding='VALID',
-                                scope='MaxPool_1a_3x3')
-                        net = tf.concat(
-                            [tower_conv, tower_conv1_2, tower_pool], 3)
-                        end_points[name] = net
-
-                # BLOCK 17
-                name = "Block17_BN"
-                trainable = is_trainable(name, trainable_variables, mode=mode)
-                with slim.arg_scope(
-                    [slim.batch_norm],
-                        is_training=trainable,
-                        trainable=trainable):
-
-                    name = "Block17"
-                    trainable = is_trainable(
-                        name, trainable_variables, mode=mode)
-                    net = slim.repeat(
-                        net,
-                        20,
-                        block17,
-                        scale=0.10,
-                        trainable_variables=trainable,
-                        reuse=reuse)
-                end_points[name] = net
-
-                name = "Mixed_7a_BN"
-                trainable = is_trainable(name, trainable_variables, mode=mode)
-                with slim.arg_scope(
-                    [slim.batch_norm],
-                        is_training=trainable,
-                        trainable=trainable):
-
-                    name = "Mixed_7a"
-                    trainable = is_trainable(
-                        name, trainable_variables, mode=mode)
-                    with tf.variable_scope(name):
-                        with tf.variable_scope('Branch_0'):
-                            tower_conv = slim.conv2d(
-                                net,
-                                256,
-                                1,
-                                scope='Conv2d_0a_1x1',
-                                trainable=trainable,
-                                reuse=reuse)
-                            tower_conv_1 = slim.conv2d(
-                                tower_conv,
-                                384,
-                                3,
-                                stride=2,
-                                padding='VALID',
-                                scope='Conv2d_1a_3x3',
-                                trainable=trainable,
-                                reuse=reuse)
-                        with tf.variable_scope('Branch_1'):
-                            tower_conv1 = slim.conv2d(
-                                net,
-                                256,
-                                1,
-                                scope='Conv2d_0a_1x1',
-                                trainable=trainable,
-                                reuse=reuse)
-                            tower_conv1_1 = slim.conv2d(
-                                tower_conv1,
-                                288,
-                                3,
-                                stride=2,
-                                padding='VALID',
-                                scope='Conv2d_1a_3x3',
-                                trainable=trainable,
-                                reuse=reuse)
-                        with tf.variable_scope('Branch_2'):
-                            tower_conv2 = slim.conv2d(
-                                net,
-                                256,
-                                1,
-                                scope='Conv2d_0a_1x1',
-                                trainable=trainable,
-                                reuse=reuse)
-                            tower_conv2_1 = slim.conv2d(
-                                tower_conv2,
-                                288,
-                                3,
-                                scope='Conv2d_0b_3x3',
-                                trainable=trainable,
-                                reuse=reuse)
-                            tower_conv2_2 = slim.conv2d(
-                                tower_conv2_1,
-                                320,
-                                3,
-                                stride=2,
-                                padding='VALID',
-                                scope='Conv2d_1a_3x3',
-                                trainable=trainable,
-                                reuse=reuse)
-                        with tf.variable_scope('Branch_3'):
-                            tower_pool = slim.max_pool2d(
-                                net,
-                                3,
-                                stride=2,
-                                padding='VALID',
-                                scope='MaxPool_1a_3x3')
-                        net = tf.concat([
-                            tower_conv_1, tower_conv1_1, tower_conv2_2, tower_pool
-                        ], 3)
-                    end_points[name] = net
-
-                # Block 8
-                name = "Block8_BN"
-                trainable = is_trainable(name, trainable_variables, mode=mode)
-                with slim.arg_scope(
-                    [slim.batch_norm],
-                        is_training=trainable,
-                        trainable=trainable):
-
-                    name = "Block8"
-                    trainable = is_trainable(
-                        name, trainable_variables, mode=mode)
-                    net = slim.repeat(
-                        net,
-                        9,
-                        block8,
-                        scale=0.20,
-                        trainable_variables=trainable,
-                        reuse=reuse)
-                    net = block8(
-                        net,
-                        activation_fn=None,
-                        trainable_variables=trainable,
-                        reuse=reuse)
-                end_points[name] = net
-
-                name = "Conv2d_7b_1x1_BN"
-                trainable = is_trainable(name, trainable_variables, mode=mode)
-                with slim.arg_scope(
-                    [slim.batch_norm],
-                        is_training=trainable,
-                        trainable=trainable):
-
-                    name = "Conv2d_7b_1x1"
-                    trainable = is_trainable(
-                        name, trainable_variables, mode=mode)
-                    net = slim.conv2d(
-                        net, 1536, 1, scope=name, trainable=trainable, reuse=reuse)
-                    end_points[name] = net
-
-                with tf.variable_scope('Logits'):
-                    end_points['PrePool'] = net
-                    # pylint: disable=no-member
-                    net = slim.avg_pool2d(
-                        net,
-                        net.get_shape()[1:3],
-                        padding='VALID',
-                        scope='AvgPool_1a_8x8')
-                    net = slim.flatten(net)
-
-                    net = slim.dropout(net, dropout_keep_prob, scope='Dropout')
-
-                    end_points['PreLogitsFlatten'] = net
-
-                name = "Bottleneck_BN"
-                trainable = is_trainable(name, trainable_variables, mode=mode)
-                with slim.arg_scope(
-                    [slim.batch_norm],
-                        is_training=trainable,
-                        trainable=trainable):
-
-                    name = "Bottleneck"
-                    trainable = is_trainable(
-                        name, trainable_variables, mode=mode)
-                    net = slim.fully_connected(
-                        net,
-                        bottleneck_layer_size,
-                        activation_fn=None,
-                        scope=name,
-                        reuse=reuse,
-                        trainable=trainable)
-                    end_points[name] = net
-
-    return net, end_points
diff --git a/bob/learn/tensorflow/network/JointIncResV2Simple.py b/bob/learn/tensorflow/network/JointIncResV2Simple.py
deleted file mode 100644
index fcbd6da85d7903b9d42a931a7d82545f90052c32..0000000000000000000000000000000000000000
--- a/bob/learn/tensorflow/network/JointIncResV2Simple.py
+++ /dev/null
@@ -1,171 +0,0 @@
-from .InceptionResnetV2 import inception_resnet_v2_batch_norm
-from .InceptionResnetV1 import inception_resnet_v1_batch_norm
-from .SimpleCNN import base_architecture as simplecnn_arch
-import numpy as np
-import tensorflow as tf
-
-
-def architecture(faces, mode, face_arch='InceptionResnetV2', **kwargs):
-    # construct patches inside the model
-    ksizes = strides = [1, 28, 28, 1]
-    rates = [1, 1, 1, 1]
-    patches = tf.extract_image_patches(faces, ksizes, strides, rates, 'VALID')
-    n_blocks = int(np.prod(patches.shape[1:3]))
-    # n_blocks should be 25 for 160x160 faces
-    patches = tf.reshape(patches, [-1, n_blocks, 28, 28, 3])
-
-    simplecnn_kwargs = {
-        'kernerl_size': (3, 3),
-        'data_format': 'channels_last',
-        'add_batch_norm': True,
-        'use_bias_with_batch_norm': False,
-    }
-    simplecnn_kwargs.update(kwargs)
-    endpoints = {}
-    # construct simplecnn from patches
-    for i in range(n_blocks):
-        if i == 0:
-            reuse = False
-        else:
-            reuse = True
-        with tf.variable_scope('SimpleCNN', reuse=reuse):
-            net, temp = simplecnn_arch(patches[:, i], mode, **simplecnn_kwargs)
-        if i == 0:
-            simplecnn_embeddings = net
-            endpoints.update(temp)
-        else:
-            simplecnn_embeddings += net
-    # average the embeddings of patches
-    simplecnn_embeddings /= n_blocks
-
-    # construct inception_resnet_v1 or 2 from faces
-    if face_arch == 'InceptionResnetV2':
-        face_embeddings, temp = inception_resnet_v2_batch_norm(
-            faces, mode=mode, **kwargs)
-    elif face_arch == 'InceptionResnetV1':
-        face_embeddings, temp = inception_resnet_v1_batch_norm(
-            faces, mode=mode, **kwargs)
-    endpoints.update(temp)
-
-    embeddings = tf.concat([simplecnn_embeddings, face_embeddings], 1)
-
-    endpoints['final_embeddings'] = embeddings
-
-    return embeddings, endpoints
-
-
-def model_fn(features, labels, mode, params, config):
-    """The model function for join face and patch PAD. The input to the model
-    is 160x160 faces."""
-
-    faces = features['data']
-    key = features['key']
-
-    # organize the parameters
-    params = params or {}
-    learning_rate = params.get('learning_rate', 1e-4)
-    apply_moving_averages = params.get('apply_moving_averages', True)
-    n_classes = params.get('n_classes', 2)
-    add_histograms = params.get('add_histograms')
-    face_arch = params.get('face_arch', 'InceptionResnetV2')
-
-    embeddings, _ = architecture(faces, mode, face_arch=face_arch)
-
-    # Logits layer
-    logits = tf.layers.dense(inputs=embeddings, units=n_classes, name='logits')
-
-    # # restore the model from an extra_checkpoint
-    # if extra_checkpoint is not None and mode == tf.estimator.ModeKeys.TRAIN:
-    #     tf.train.init_from_checkpoint(
-    #         ckpt_dir_or_file=extra_checkpoint["checkpoint_path"],
-    #         assignment_map=extra_checkpoint["scopes"],
-    #     )
-
-    predictions = {
-        # Generate predictions (for PREDICT and EVAL mode)
-        "classes": tf.argmax(input=logits, axis=1),
-        # Add `softmax_tensor` to the graph. It is used for PREDICT and by the
-        # `logging_hook`.
-        "probabilities": tf.nn.softmax(logits, name="softmax_tensor"),
-        'key': key,
-    }
-    if mode == tf.estimator.ModeKeys.PREDICT:
-        return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)
-
-    accuracy = tf.metrics.accuracy(
-        labels=labels, predictions=predictions["classes"])
-    metrics = {'accuracy': accuracy}
-
-    global_step = tf.train.get_or_create_global_step()
-
-    # Compute the moving average of all individual losses and the total loss.
-    if apply_moving_averages and mode == tf.estimator.ModeKeys.TRAIN:
-        variable_averages = tf.train.ExponentialMovingAverage(
-            0.9999, global_step)
-        variable_averages_op = variable_averages.apply(
-            tf.trainable_variables())
-    else:
-        variable_averages_op = tf.no_op(name='noop')
-
-    if mode == tf.estimator.ModeKeys.TRAIN:
-        # for batch normalization to be updated as well:
-        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
-    else:
-        update_ops = []
-
-    with tf.control_dependencies([variable_averages_op] + update_ops):
-
-        # Calculate Loss (for both TRAIN and EVAL modes)
-        cross_loss = tf.losses.sparse_softmax_cross_entropy(
-            logits=logits, labels=labels)
-
-        regularization_losses = tf.get_collection(
-            tf.GraphKeys.REGULARIZATION_LOSSES)
-
-        loss = tf.add_n(
-            [cross_loss] + regularization_losses, name="total_loss")
-
-        if apply_moving_averages and mode == tf.estimator.ModeKeys.TRAIN:
-            # Compute the moving average of all individual losses and the total
-            # loss.
-            loss_averages = tf.train.ExponentialMovingAverage(0.9, name='avg')
-            loss_averages_op = loss_averages.apply(
-                tf.get_collection(tf.GraphKeys.LOSSES))
-        else:
-            loss_averages_op = tf.no_op(name='noop')
-
-        if mode == tf.estimator.ModeKeys.TRAIN:
-
-            optimizer = tf.train.GradientDescentOptimizer(
-                learning_rate=learning_rate)
-            train_op = tf.group(
-                optimizer.minimize(loss, global_step=global_step),
-                variable_averages_op, loss_averages_op)
-
-            # Log accuracy and loss
-            with tf.name_scope('train_metrics'):
-                tf.summary.scalar('accuracy', accuracy[1])
-                tf.summary.scalar('cross_entropy_loss', cross_loss)
-                tf.summary.scalar('loss', loss)
-                if apply_moving_averages:
-                    for l in tf.get_collection(tf.GraphKeys.LOSSES):
-                        tf.summary.scalar(l.op.name + "_averaged",
-                                          loss_averages.average(l))
-
-            # add histograms summaries
-            if add_histograms == 'all':
-                for v in tf.all_variables():
-                    tf.summary.histogram(v.name, v)
-            elif add_histograms == 'train':
-                for v in tf.trainable_variables():
-                    tf.summary.histogram(v.name, v)
-
-        else:
-            train_op = None
-
-    return tf.estimator.EstimatorSpec(
-        mode=mode,
-        predictions=predictions,
-        loss=loss,
-        train_op=train_op,
-        eval_metric_ops=metrics)
diff --git a/bob/learn/tensorflow/network/LightCNN9.py b/bob/learn/tensorflow/network/LightCNN9.py
deleted file mode 100644
index 6d102ca22129e4703aeec66b03dedea086de0e28..0000000000000000000000000000000000000000
--- a/bob/learn/tensorflow/network/LightCNN9.py
+++ /dev/null
@@ -1,189 +0,0 @@
-#!/usr/bin/env python
-# vim: set fileencoding=utf-8 :
-# @author: Tiago de Freitas Pereira <tiago.pereira@idiap.ch>
-
-import tensorflow as tf
-from bob.learn.tensorflow.layers import maxout
-from .utils import is_trainable
-
-
-def light_cnn9(inputs,
-               seed=10,
-               reuse=False,
-               trainable_variables=None,
-               **kwargs):
-    """Creates the graph for the Light CNN-9 in 
-
-       Wu, Xiang, et al. "A light CNN for deep face representation with noisy labels." arXiv preprint arXiv:1511.02683 (2015).
-    """
-    slim = tf.contrib.slim
-
-    with tf.variable_scope('LightCNN9', reuse=reuse):
-        initializer = tf.contrib.layers.xavier_initializer(
-            uniform=False, dtype=tf.float32, seed=seed)
-        end_points = dict()
-        name = "Conv1"
-        trainable = is_trainable(name, trainable_variables)
-        graph = slim.conv2d(
-            inputs,
-            96, [5, 5],
-            activation_fn=tf.nn.relu,
-            stride=1,
-            weights_initializer=initializer,
-            scope=name,
-            trainable=trainable,
-            reuse=reuse)
-        end_points[name] = graph
-
-        graph = maxout(graph, num_units=48, name='Maxout1')
-
-        graph = slim.max_pool2d(
-            graph, [2, 2], stride=2, padding="SAME", scope='Pool1')
-
-        ####
-        name = "Conv2a"
-        trainable = is_trainable(name, trainable_variables)
-        graph = slim.conv2d(
-            graph,
-            96, [1, 1],
-            activation_fn=tf.nn.relu,
-            stride=1,
-            weights_initializer=initializer,
-            scope=name,
-            trainable=trainable,
-            reuse=reuse)
-
-        graph = maxout(graph, num_units=48, name='Maxout2a')
-
-        name = "Conv2"
-        trainable = is_trainable(name, trainable_variables)
-        graph = slim.conv2d(
-            graph,
-            192, [3, 3],
-            activation_fn=tf.nn.relu,
-            stride=1,
-            weights_initializer=initializer,
-            scope=name,
-            trainable=trainable,
-            reuse=reuse)
-        end_points[name] = graph
-
-        graph = maxout(graph, num_units=96, name='Maxout2')
-
-        graph = slim.max_pool2d(
-            graph, [2, 2], stride=2, padding="SAME", scope='Pool2')
-
-        #####
-        name = "Conv3a"
-        trainable = is_trainable(name, trainable_variables)
-        graph = slim.conv2d(
-            graph,
-            192, [1, 1],
-            activation_fn=tf.nn.relu,
-            stride=1,
-            weights_initializer=initializer,
-            scope=name,
-            trainable=trainable,
-            reuse=reuse)
-
-        graph = maxout(graph, num_units=96, name='Maxout3a')
-
-        name = "Conv3"
-        trainable = is_trainable(name, trainable_variables)
-        graph = slim.conv2d(
-            graph,
-            384, [3, 3],
-            activation_fn=tf.nn.relu,
-            stride=1,
-            weights_initializer=initializer,
-            scope=name,
-            trainable=trainable,
-            reuse=reuse)
-        end_points[name] = graph
-
-        graph = maxout(graph, num_units=192, name='Maxout3')
-
-        graph = slim.max_pool2d(
-            graph, [2, 2], stride=2, padding="SAME", scope='Pool3')
-
-        #####
-        name = "Conv4a"
-        trainable = is_trainable(name, trainable_variables)
-        graph = slim.conv2d(
-            graph,
-            384, [1, 1],
-            activation_fn=tf.nn.relu,
-            stride=1,
-            weights_initializer=initializer,
-            scope=name,
-            trainable=trainable,
-            reuse=reuse)
-
-        graph = maxout(graph, num_units=192, name='Maxout4a')
-
-        name = "Conv4"
-        trainable = is_trainable(name, trainable_variables)
-        graph = slim.conv2d(
-            graph,
-            256, [3, 3],
-            activation_fn=tf.nn.relu,
-            stride=1,
-            weights_initializer=initializer,
-            scope=name,
-            trainable=trainable,
-            reuse=reuse)
-        end_points[name] = graph
-
-        graph = maxout(graph, num_units=128, name='Maxout4')
-
-        #####
-        name = "Conv5a"
-        trainable = is_trainable(name, trainable_variables)
-        graph = slim.conv2d(
-            graph,
-            256, [1, 1],
-            activation_fn=tf.nn.relu,
-            stride=1,
-            weights_initializer=initializer,
-            scope=name,
-            trainable=trainable,
-            reuse=reuse)
-
-        graph = maxout(graph, num_units=128, name='Maxout5a')
-
-        name = "Conv5"
-        trainable = is_trainable(name, trainable_variables)
-        graph = slim.conv2d(
-            graph,
-            256, [3, 3],
-            activation_fn=tf.nn.relu,
-            stride=1,
-            weights_initializer=initializer,
-            scope=name,
-            trainable=trainable,
-            reuse=reuse)
-        end_points[name] = graph
-
-        graph = maxout(graph, num_units=128, name='Maxout5')
-
-        graph = slim.max_pool2d(
-            graph, [2, 2], stride=2, padding="SAME", scope='Pool4')
-
-        graph = slim.flatten(graph, scope='flatten1')
-        end_points['flatten1'] = graph
-
-        graph = slim.dropout(graph, keep_prob=0.5, scope='dropout1')
-
-        name = "fc1"
-        trainable = is_trainable(name, trainable_variables)
-        prelogits = slim.fully_connected(
-            graph,
-            512,
-            weights_initializer=initializer,
-            activation_fn=tf.nn.relu,
-            scope=name,
-            trainable=trainable,
-            reuse=reuse)
-        end_points['fc1'] = prelogits
-
-    return prelogits, end_points
diff --git a/bob/learn/tensorflow/network/MLP.py b/bob/learn/tensorflow/network/MLP.py
deleted file mode 100644
index 8225724984d0d688b6c348c0b957d3f228e6521b..0000000000000000000000000000000000000000
--- a/bob/learn/tensorflow/network/MLP.py
+++ /dev/null
@@ -1,126 +0,0 @@
-#!/usr/bin/env python
-# vim: set fileencoding=utf-8 :
-# @author: Tiago de Freitas Pereira <tiago.pereira@idiap.ch>
-
-import tensorflow as tf
-from bob.learn.tensorflow.network.utils import is_trainable
-
-slim = tf.contrib.slim
-
-
-def mlp(
-    inputs,
-    output_shape,
-    hidden_layers=[10],
-    hidden_activation=tf.nn.tanh,
-    output_activation=None,
-    seed=10,
-    **kwargs
-):
-    """An MLP is a representation of a Multi-Layer Perceptron.
-
-    This implementation is feed-forward and fully-connected.
-    The implementation allows setting a global and the output activation functions.
-    References to fully-connected feed-forward networks: Bishop's Pattern Recognition and Machine Learning, Chapter 5. Figure 5.1 shows what is programmed.
-
-    MLPs normally are multi-layered systems, with 1 or more hidden layers.
-
-    **Parameters**
-
-        output_shape: number of neurons in the output.
-
-        hidden_layers: :py:class:`list` that contains the amount of hidden layers, where each element is the number of neurons
-
-        hidden_activation: Activation function of the hidden layers. Possible values can be seen
-                          `here <https://www.tensorflow.org/versions/r0.11/api_docs/python/nn.html#activation-functions>`_.
-                           If you set to ``None``, the activation will be linear.
-
-        output_activation: Activation of the output layer.  If you set to `None`, the activation will be linear
-
-        seed:
-    """
-
-    initializer = tf.contrib.layers.xavier_initializer(
-        uniform=False, dtype=tf.float32, seed=seed
-    )
-
-    graph = inputs
-    for i in range(len(hidden_layers)):
-
-        weights = hidden_layers[i]
-        graph = slim.fully_connected(
-            graph,
-            weights,
-            weights_initializer=initializer,
-            activation_fn=hidden_activation,
-            scope="fc_{0}".format(i),
-        )
-
-    graph = slim.fully_connected(
-        graph,
-        output_shape,
-        weights_initializer=initializer,
-        activation_fn=output_activation,
-        scope="fc_output",
-    )
-
-    return graph
-
-
-def mlp_with_batchnorm_and_dropout(
-    inputs,
-    fully_connected_layers,
-    mode=tf.estimator.ModeKeys.TRAIN,
-    trainable_variables=None,
-    **kwargs
-):
-
-    if trainable_variables is not None:
-        raise ValueError(
-            "The batch_norm layers selectable training is not implemented!"
-        )
-
-    end_points = {}
-    net = slim.flatten(inputs)
-
-    weight_decay = 1e-5
-    dropout_keep_prob = 0.5
-    batch_norm_params = {
-        # Decay for the moving averages.
-        "decay": 0.995,
-        # epsilon to prevent 0s in variance.
-        "epsilon": 0.001,
-        # force in-place updates of mean and variance estimates
-        "updates_collections": None,
-        "is_training": (mode == tf.estimator.ModeKeys.TRAIN),
-    }
-
-    with slim.arg_scope(
-        [slim.fully_connected],
-        weights_initializer=tf.truncated_normal_initializer(stddev=0.1),
-        weights_regularizer=slim.l2_regularizer(weight_decay),
-        normalizer_fn=slim.batch_norm,
-        normalizer_params=batch_norm_params,
-    ), tf.name_scope("MLP"):
-
-        # hidden layers
-        for i, n in enumerate(fully_connected_layers):
-            name = "fc_{:0d}".format(i)
-            trainable = is_trainable(name, trainable_variables, mode=mode)
-            with slim.arg_scope(
-                [slim.batch_norm], is_training=trainable, trainable=trainable
-            ):
-
-                net = slim.fully_connected(net, n, scope=name, trainable=trainable)
-                end_points[name] = net
-
-            name = "dropout_{:0d}".format(i)
-            net = slim.dropout(
-                net,
-                dropout_keep_prob,
-                is_training=(mode == tf.estimator.ModeKeys.TRAIN),
-                scope=name,
-            )
-            end_points[name] = net
-
-    return net, end_points
diff --git a/bob/learn/tensorflow/network/PatchCNN.py b/bob/learn/tensorflow/network/PatchCNN.py
deleted file mode 100644
index 98db7e52ccffbf1addd9d3fb139a5bb5afdc4541..0000000000000000000000000000000000000000
--- a/bob/learn/tensorflow/network/PatchCNN.py
+++ /dev/null
@@ -1,434 +0,0 @@
-"""Path-based CNN Estimator for "Face Anti-spoofing Using Patch and Depth-based
-CNNs".
-
-The architecture is:
-
-+--------------+---------------+---------------+
-| Layer        | Filter/Stride | Output Size   |
-+--------------+---------------+---------------+
-| Conv-1       | 5 x 5/1       | 96 x 96 x 50  |
-| BN-1         |               | 96 x 96 x 50  |
-| MaxPooling-1 | 2 x 2/2       | 48 x 48 x 50  |
-+--------------+---------------+---------------+
-| Conv-2       | 3 x 3/1       | 48 x 48 x 100 |
-| BN-2         |               | 48 x 48 x 100 |
-| MaxPooling-2 | 2 x 2/2       | 24 x 24 x 100 |
-+--------------+---------------+---------------+
-| Conv-3       | 3 x 3/1       | 24 x 24 x 150 |
-| BN-3         |               | 24 x 24 x 150 |
-| MaxPooling-3 | 3 x 3/2       | 12 x 12 x 150 |
-+--------------+---------------+---------------+
-| Conv-4       | 3 x 3/1       | 12 x 12 x 200 |
-| BN-4         |               | 12 x 12 x 200 |
-| MaxPooling-4 | 2 x 2/2       | 6 x 6 x 200   |
-+--------------+---------------+---------------+
-| Conv-5       | 3 x 3/1       | 6 x 6 x 250   |
-| BN-5         |               | 6 x 6 x 250   |
-| MaxPooling-5 | 2 x 2/2       | 3 x 3 x 250   |
-+--------------+---------------+---------------+
-| FC-1         | 3 x 3/1       | 1 x 1 x 1000  |
-| BN-6         |               | 1 x 1 x 1000  |
-| Dropout      | 0.5           | 1 x 1 x 1000  |
-+--------------+---------------+---------------+
-| FC-2         | 1 x 1/1       | 1 x 1 x 400   |
-| BN-7         |               | 1 x 1 x 400   |
-| FC-3         | 1 x 1/1       | 1 x 1 x 2     |
-+--------------+---------------+---------------+
-
-```
-from tensorflow.python.keras import *
-from tensorflow.python.keras.layers import *
-patch = Sequential([
-    Conv2D(50, (5, 5), padding='same', use_bias=False, input_shape=(96,96,3)),
-    BatchNormalization(scale=False),
-    Activation('relu'),
-    MaxPool2D(padding='same'),
-
-    Conv2D(100, (3, 3), padding='same', use_bias=False),
-    BatchNormalization(scale=False),
-    Activation('relu'),
-    MaxPool2D(padding='same'),
-
-    Conv2D(150, (3, 3), padding='same', use_bias=False),
-    BatchNormalization(scale=False),
-    Activation('relu'),
-    MaxPool2D(pool_size=3, strides=2, padding='same'),
-
-    Conv2D(200, (3, 3), padding='same', use_bias=False),
-    BatchNormalization(scale=False),
-    Activation('relu'),
-    MaxPool2D(padding='same'),
-
-    Conv2D(250, (3, 3), padding='same', use_bias=False),
-    BatchNormalization(scale=False),
-    Activation('relu'),
-    MaxPool2D(padding='same'),
-
-    Flatten(),
-    Dense(1000, use_bias=False),
-    BatchNormalization(scale=False),
-    Activation('relu'),
-    Dropout(rate=0.5),
-
-    Dense(400, use_bias=False),
-    BatchNormalization(scale=False),
-    Activation('relu'),
-
-    Dense(2),
-])
-patch.summary()
-```
-_________________________________________________________________
-Layer (type)                 Output Shape              Param #
-=================================================================
-conv2d_1 (Conv2D)            (None, 96, 96, 50)        3750
-_________________________________________________________________
-batch_normalization_1 (Batch (None, 96, 96, 50)        150
-_________________________________________________________________
-activation_1 (Activation)    (None, 96, 96, 50)        0
-_________________________________________________________________
-max_pooling2d_1 (MaxPooling2 (None, 48, 48, 50)        0
-_________________________________________________________________
-conv2d_2 (Conv2D)            (None, 48, 48, 100)       45000
-_________________________________________________________________
-batch_normalization_2 (Batch (None, 48, 48, 100)       300
-_________________________________________________________________
-activation_2 (Activation)    (None, 48, 48, 100)       0
-_________________________________________________________________
-max_pooling2d_2 (MaxPooling2 (None, 24, 24, 100)       0
-_________________________________________________________________
-conv2d_3 (Conv2D)            (None, 24, 24, 150)       135000
-_________________________________________________________________
-batch_normalization_3 (Batch (None, 24, 24, 150)       450
-_________________________________________________________________
-activation_3 (Activation)    (None, 24, 24, 150)       0
-_________________________________________________________________
-max_pooling2d_3 (MaxPooling2 (None, 12, 12, 150)       0
-_________________________________________________________________
-conv2d_4 (Conv2D)            (None, 12, 12, 200)       270000
-_________________________________________________________________
-batch_normalization_4 (Batch (None, 12, 12, 200)       600
-_________________________________________________________________
-activation_4 (Activation)    (None, 12, 12, 200)       0
-_________________________________________________________________
-max_pooling2d_4 (MaxPooling2 (None, 6, 6, 200)         0
-_________________________________________________________________
-conv2d_5 (Conv2D)            (None, 6, 6, 250)         450000
-_________________________________________________________________
-batch_normalization_5 (Batch (None, 6, 6, 250)         750
-_________________________________________________________________
-activation_5 (Activation)    (None, 6, 6, 250)         0
-_________________________________________________________________
-max_pooling2d_5 (MaxPooling2 (None, 3, 3, 250)         0
-_________________________________________________________________
-flatten_1 (Flatten)          (None, 2250)              0
-_________________________________________________________________
-dense_1 (Dense)              (None, 1000)              2250000
-_________________________________________________________________
-batch_normalization_6 (Batch (None, 1000)              3000
-_________________________________________________________________
-activation_6 (Activation)    (None, 1000)              0
-_________________________________________________________________
-dropout_1 (Dropout)          (None, 1000)              0
-_________________________________________________________________
-dense_2 (Dense)              (None, 400)               400000
-_________________________________________________________________
-batch_normalization_7 (Batch (None, 400)               1200
-_________________________________________________________________
-activation_7 (Activation)    (None, 400)               0
-_________________________________________________________________
-dense_3 (Dense)              (None, 2)                 802
-=================================================================
-Total params: 3,561,002
-Trainable params: 3,556,702
-Non-trainable params: 4,300
-_________________________________________________________________
-"""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import tensorflow as tf
-
-
-def create_conv_layer(inputs,
-                      mode,
-                      data_format,
-                      endpoints,
-                      number,
-                      filters,
-                      kernel_size,
-                      pool_size,
-                      pool_strides,
-                      skip_pool=False):
-    bn_axis = 1 if data_format.lower() == 'channels_first' else 3
-    training = mode == tf.estimator.ModeKeys.TRAIN
-
-    name = 'Conv-{}'.format(number)
-    conv = tf.layers.conv2d(
-        inputs=inputs,
-        filters=filters,
-        kernel_size=kernel_size,
-        padding="same",
-        activation=None,
-        data_format=data_format,
-        name=name)
-    endpoints[name] = conv
-
-    name = 'BN-{}'.format(number)
-    bn = tf.layers.batch_normalization(
-        conv, axis=bn_axis, training=training, name=name)
-    endpoints[name] = bn
-
-    name = 'Activation-{}'.format(number)
-    bn_act = tf.nn.relu(bn, name=name)
-    endpoints[name] = bn_act
-
-    name = 'MaxPooling-{}'.format(number)
-    if skip_pool:
-        pool = bn_act
-    else:
-        pool = tf.layers.max_pooling2d(
-            inputs=bn_act,
-            pool_size=pool_size,
-            strides=pool_strides,
-            padding='same',
-            data_format=data_format,
-            name=name)
-    endpoints[name] = pool
-
-    return pool
-
-
-def create_dense_layer(inputs, mode, endpoints, number, units):
-    training = mode == tf.estimator.ModeKeys.TRAIN
-
-    name = 'FC-{}'.format(number)
-    fc = tf.layers.dense(
-        inputs=inputs, units=units, activation=None, name=name)
-    endpoints[name] = fc
-
-    name = 'BN-{}'.format(number + 5)
-    bn = tf.layers.batch_normalization(
-        fc, axis=1, training=training, name=name)
-    endpoints[name] = bn
-
-    name = 'Activation-{}'.format(number + 5)
-    bn_act = tf.nn.relu(bn, name=name)
-    endpoints[name] = bn_act
-
-    return bn_act
-
-
-def base_architecture(input_layer,
-                      mode,
-                      data_format,
-                      skip_first_two_pool=False,
-                      **kwargs):
-    training = mode == tf.estimator.ModeKeys.TRAIN
-    # Keep track of all the endpoints
-    endpoints = {}
-
-    # ======================
-    # Convolutional Layer #1
-    pool1 = create_conv_layer(
-        inputs=input_layer,
-        mode=mode,
-        data_format=data_format,
-        endpoints=endpoints,
-        number=1,
-        filters=50,
-        kernel_size=(5, 5),
-        pool_size=(2, 2),
-        pool_strides=2,
-        skip_pool=skip_first_two_pool)
-
-    # ======================
-    # Convolutional Layer #2
-    pool2 = create_conv_layer(
-        inputs=pool1,
-        mode=mode,
-        data_format=data_format,
-        endpoints=endpoints,
-        number=2,
-        filters=100,
-        kernel_size=(3, 3),
-        pool_size=(2, 2),
-        pool_strides=2,
-        skip_pool=skip_first_two_pool)
-
-    # ======================
-    # Convolutional Layer #3
-    pool3 = create_conv_layer(
-        inputs=pool2,
-        mode=mode,
-        data_format=data_format,
-        endpoints=endpoints,
-        number=3,
-        filters=150,
-        kernel_size=(3, 3),
-        pool_size=(3, 3),
-        pool_strides=2)
-
-    # ======================
-    # Convolutional Layer #4
-    pool4 = create_conv_layer(
-        inputs=pool3,
-        mode=mode,
-        data_format=data_format,
-        endpoints=endpoints,
-        number=4,
-        filters=200,
-        kernel_size=(3, 3),
-        pool_size=(2, 2),
-        pool_strides=2)
-
-    # ======================
-    # Convolutional Layer #5
-    pool5 = create_conv_layer(
-        inputs=pool4,
-        mode=mode,
-        data_format=data_format,
-        endpoints=endpoints,
-        number=5,
-        filters=250,
-        kernel_size=(3, 3),
-        pool_size=(2, 2),
-        pool_strides=2)
-
-    # ========================
-    # Flatten tensor into a batch of vectors
-    name = 'MaxPooling-5-Flat'
-    pool5_flat = tf.layers.flatten(pool5, name=name)
-    endpoints[name] = pool5_flat
-
-    # ========================
-    # Fully Connected Layer #1
-    fc1 = create_dense_layer(
-        inputs=pool5_flat,
-        mode=mode,
-        endpoints=endpoints,
-        number=1,
-        units=1000)
-
-    # ========================
-    # Dropout
-    name = 'dropout'
-    dropout = tf.layers.dropout(
-        inputs=fc1, rate=0.5, training=training, name=name)
-    endpoints[name] = dropout
-
-    # ========================
-    # Fully Connected Layer #2
-    fc2 = create_dense_layer(
-        inputs=dropout, mode=mode, endpoints=endpoints, number=2, units=400)
-
-    return fc2, endpoints
-
-
-def architecture(input_layer,
-                 mode=tf.estimator.ModeKeys.TRAIN,
-                 skip_first_two_pool=False,
-                 n_classes=2,
-                 data_format='channels_last',
-                 reuse=False,
-                 regularizer=None,
-                 **kwargs):
-
-    with tf.variable_scope('PatchCNN', reuse=reuse, regularizer=regularizer):
-
-        fc2, endpoints = base_architecture(
-            input_layer=input_layer,
-            mode=mode,
-            data_format=data_format,
-            skip_first_two_pool=skip_first_two_pool)
-        # Logits layer
-        logits = tf.layers.dense(inputs=fc2, units=n_classes)
-        endpoints['FC-3'] = logits
-        endpoints['logits'] = logits
-
-    return logits, endpoints
-
-
-def model_fn(features, labels, mode, params=None, config=None):
-    """Model function for CNN."""
-    data = features['data']
-    key = features['key']
-
-    params = params or {}
-    params = {k: v for k, v in params.items() if v is not None}
-
-    initial_learning_rate = params.get('learning_rate', 1e-3)
-    momentum = params.get('momentum', 0.99)
-    decay_steps = params.get('decay_steps', 1e5)
-    decay_rate = params.get('decay_rate', 1e-4)
-    staircase = params.get('staircase', True)
-    regularization_rate = params.get('regularization_rate', 0)
-
-    arch_kwargs = {
-        'skip_first_two_pool': params.get('skip_first_two_pool'),
-        'n_classes': params.get('n_classes'),
-        'data_format': params.get('data_format'),
-        'regularizer': params.get('regularizer')
-    }
-    arch_kwargs = {k: v for k, v in arch_kwargs.items() if v is not None}
-
-    logits, _ = architecture(data, mode=mode, **arch_kwargs)
-
-    predictions = {
-        # Generate predictions (for PREDICT and EVAL mode)
-        "classes": tf.argmax(input=logits, axis=1),
-        # Add `softmax_tensor` to the graph. It is used for PREDICT and by the
-        # `logging_hook`.
-        "probabilities": tf.nn.softmax(logits, name="softmax_tensor"),
-        'key': key,
-    }
-    if mode == tf.estimator.ModeKeys.PREDICT:
-        return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)
-
-    # Calculate Loss (for both TRAIN and EVAL modes)
-    labels = tf.cast(labels, dtype="int32")
-    loss = tf.losses.sparse_softmax_cross_entropy(logits=logits, labels=labels)
-    # Add the regularization terms to the loss
-    if regularization_rate:
-        loss += regularization_rate * \
-            tf.add_n(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
-
-    accuracy = tf.metrics.accuracy(
-        labels=labels, predictions=predictions["classes"])
-    metrics = {'accuracy': accuracy}
-
-    # Configure the training op
-    if mode == tf.estimator.ModeKeys.TRAIN:
-        global_step = tf.train.get_or_create_global_step()
-
-        learning_rate = tf.train.exponential_decay(
-            learning_rate=initial_learning_rate,
-            global_step=global_step,
-            decay_steps=decay_steps,
-            decay_rate=decay_rate,
-            staircase=staircase)
-
-        optimizer = tf.train.MomentumOptimizer(
-            learning_rate=learning_rate, momentum=momentum)
-
-        # for batch normalization to be updated as well:
-        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
-        with tf.control_dependencies(update_ops):
-            train_op = optimizer.minimize(loss=loss, global_step=global_step)
-
-        # Log accuracy and loss
-        with tf.name_scope('train_metrics'):
-            tf.summary.scalar('accuracy', accuracy[1])
-            tf.summary.scalar('loss', loss)
-            tf.summary.scalar('learning_rate', learning_rate)
-    else:
-        train_op = None
-
-    return tf.estimator.EstimatorSpec(
-        mode=mode,
-        predictions=predictions,
-        loss=loss,
-        train_op=train_op,
-        eval_metric_ops=metrics)
diff --git a/bob/learn/tensorflow/network/SimpleCNN.py b/bob/learn/tensorflow/network/SimpleCNN.py
deleted file mode 100644
index 1a3b2a084720f39c8fc1f80db092ea3098081ab0..0000000000000000000000000000000000000000
--- a/bob/learn/tensorflow/network/SimpleCNN.py
+++ /dev/null
@@ -1,483 +0,0 @@
-"""
-The network using keras (same as new_architecture function below)::
-
-    from tensorflow.python.keras import *
-    from tensorflow.python.keras.layers import *
-    simplecnn = Sequential([
-        Conv2D(32,(3,3),padding='same',use_bias=False, input_shape=(28,28,3)),
-        BatchNormalization(scale=False),
-        Activation('relu'),
-        MaxPool2D(padding='same'),
-        Conv2D(64,(3,3),padding='same',use_bias=False),
-        BatchNormalization(scale=False),
-        Activation('relu'),
-        MaxPool2D(padding='same'),
-        Flatten(),
-        Dense(1024, use_bias=False),
-        BatchNormalization(scale=False),
-        Activation('relu'),
-        Dropout(rate=0.4),
-        Dense(2),
-    ])
-    simplecnn.summary()
-    _________________________________________________________________
-    Layer (type)                 Output Shape              Param #
-    =================================================================
-    conv2d_1 (Conv2D)            (None, 28, 28, 32)        864
-    _________________________________________________________________
-    batch_normalization_1 (Batch (None, 28, 28, 32)        96
-    _________________________________________________________________
-    activation_1 (Activation)    (None, 28, 28, 32)        0
-    _________________________________________________________________
-    max_pooling2d_1 (MaxPooling2 (None, 14, 14, 32)        0
-    _________________________________________________________________
-    conv2d_2 (Conv2D)            (None, 14, 14, 64)        18432
-    _________________________________________________________________
-    batch_normalization_2 (Batch (None, 14, 14, 64)        192
-    _________________________________________________________________
-    activation_2 (Activation)    (None, 14, 14, 64)        0
-    _________________________________________________________________
-    max_pooling2d_2 (MaxPooling2 (None, 7, 7, 64)          0
-    _________________________________________________________________
-    flatten_1 (Flatten)          (None, 3136)              0
-    _________________________________________________________________
-    dense_1 (Dense)              (None, 1024)              3211264
-    _________________________________________________________________
-    batch_normalization_3 (Batch (None, 1024)              3072
-    _________________________________________________________________
-    activation_3 (Activation)    (None, 1024)              0
-    _________________________________________________________________
-    dropout_1 (Dropout)          (None, 1024)              0
-    _________________________________________________________________
-    dense_2 (Dense)              (None, 2)                 2050
-    =================================================================
-    Total params: 3,235,970
-    Trainable params: 3,233,730
-    Non-trainable params: 2,240
-    _________________________________________________________________
-"""
-
-
-import collections
-import tensorflow as tf
-from .utils import is_trainable
-from ..estimators import get_trainable_variables
-
-
-def create_conv_layer(inputs,
-                      mode,
-                      data_format,
-                      endpoints,
-                      number,
-                      filters,
-                      kernel_size,
-                      pool_size,
-                      pool_strides,
-                      add_batch_norm=False,
-                      trainable_variables=None,
-                      use_bias_with_batch_norm=True):
-    bn_axis = 1 if data_format.lower() == 'channels_first' else 3
-    training = mode == tf.estimator.ModeKeys.TRAIN
-
-    if add_batch_norm:
-        activation = None
-    else:
-        activation = tf.nn.relu
-
-    name = 'conv{}'.format(number)
-    trainable = is_trainable(name, trainable_variables)
-    conv = tf.layers.conv2d(
-        inputs=inputs,
-        filters=filters,
-        kernel_size=kernel_size,
-        padding="same",
-        activation=activation,
-        data_format=data_format,
-        trainable=trainable,
-        use_bias=((not add_batch_norm) or use_bias_with_batch_norm),
-    )
-    endpoints[name] = conv
-
-    if add_batch_norm:
-        name = 'bn{}'.format(number)
-        trainable = is_trainable(name, trainable_variables)
-        bn = tf.layers.batch_normalization(
-            conv, axis=bn_axis, training=training, trainable=trainable,
-            scale=use_bias_with_batch_norm)
-        endpoints[name] = bn
-
-        name = 'activation{}'.format(number)
-        bn_act = tf.nn.relu(bn)
-        endpoints[name] = bn_act
-    else:
-        bn_act = conv
-
-    name = 'pool{}'.format(number)
-    pool = tf.layers.max_pooling2d(
-        inputs=bn_act,
-        pool_size=pool_size,
-        strides=pool_strides,
-        padding='same',
-        data_format=data_format)
-    endpoints[name] = pool
-
-    return pool
-
-
-def base_architecture(input_layer,
-                      mode=tf.estimator.ModeKeys.TRAIN,
-                      kernerl_size=(3, 3),
-                      data_format='channels_last',
-                      add_batch_norm=False,
-                      trainable_variables=None,
-                      use_bias_with_batch_norm=True,
-                      **kwargs):
-    training = mode == tf.estimator.ModeKeys.TRAIN
-    # Keep track of all the endpoints
-    endpoints = {}
-
-    # Convolutional Layer #1
-    # Computes 32 features using a kernerl_size filter with ReLU
-    # activation.
-    # Padding is added to preserve width and height.
-    pool1 = create_conv_layer(
-        inputs=input_layer,
-        mode=mode,
-        data_format=data_format,
-        endpoints=endpoints,
-        number=1,
-        filters=32,
-        kernel_size=kernerl_size,
-        pool_size=(2, 2),
-        pool_strides=2,
-        add_batch_norm=add_batch_norm,
-        trainable_variables=trainable_variables,
-        use_bias_with_batch_norm=use_bias_with_batch_norm,
-    )
-
-    # Convolutional Layer #2
-    # Computes 64 features using a kernerl_size filter.
-    # Padding is added to preserve width and height.
-    pool2 = create_conv_layer(
-        inputs=pool1,
-        mode=mode,
-        data_format=data_format,
-        endpoints=endpoints,
-        number=2,
-        filters=64,
-        kernel_size=kernerl_size,
-        pool_size=(2, 2),
-        pool_strides=2,
-        add_batch_norm=add_batch_norm,
-        trainable_variables=trainable_variables,
-        use_bias_with_batch_norm=use_bias_with_batch_norm,
-    )
-
-    # Flatten tensor into a batch of vectors
-    pool2_flat = tf.layers.flatten(pool2)
-    endpoints['pool2_flat'] = pool2_flat
-
-    # Dense Layer
-    # Densely connected layer with 1024 neurons
-    if add_batch_norm:
-        activation = None
-    else:
-        activation = tf.nn.relu
-
-    name = 'dense'
-    trainable = is_trainable(name, trainable_variables)
-    dense = tf.layers.dense(
-        inputs=pool2_flat,
-        units=1024,
-        activation=activation,
-        trainable=trainable,
-        use_bias=((not add_batch_norm) or use_bias_with_batch_norm),
-    )
-    endpoints[name] = dense
-
-    if add_batch_norm:
-        name = 'bn{}'.format(3)
-        trainable = is_trainable(name, trainable_variables)
-        bn = tf.layers.batch_normalization(
-            dense, axis=1, training=training, trainable=trainable,
-            scale=use_bias_with_batch_norm)
-        endpoints[name] = bn
-
-        name = 'activation{}'.format(3)
-        bn_act = tf.nn.relu(bn)
-        endpoints[name] = bn_act
-    else:
-        bn_act = dense
-
-    # Add dropout operation; 0.6 probability that element will be kept
-    dropout = tf.layers.dropout(
-        inputs=bn_act, rate=0.4, training=mode == tf.estimator.ModeKeys.TRAIN)
-    endpoints['dropout'] = dropout
-
-    return dropout, endpoints
-
-
-def new_architecture(
-        input_layer,
-        mode=tf.estimator.ModeKeys.TRAIN,
-        kernerl_size=(3, 3),
-        data_format='channels_last',
-        add_batch_norm=True,
-        trainable_variables=None,
-        use_bias_with_batch_norm=False,
-        reuse=False,
-        **kwargs):
-    with tf.variable_scope('SimpleCNN', reuse=reuse):
-        return base_architecture(
-            input_layer=input_layer,
-            mode=mode,
-            kernerl_size=kernerl_size,
-            data_format=data_format,
-            add_batch_norm=add_batch_norm,
-            trainable_variables=trainable_variables,
-            use_bias_with_batch_norm=use_bias_with_batch_norm,
-            **kwargs)
-
-
-def slim_architecture(
-        input_layer,
-        mode=tf.estimator.ModeKeys.TRAIN,
-        kernerl_size=(3, 3),
-        data_format='channels_last',
-        add_batch_norm=True,
-        trainable_variables=None,
-        use_bias_with_batch_norm=False,
-        reuse=False,
-        **kwargs):
-    if data_format != 'channels_last':
-        raise ValueError("Only channels_last data_format is implemented!")
-    if (not add_batch_norm) or use_bias_with_batch_norm:
-        raise NotImplementedError()
-    slim = tf.contrib.slim
-    batch_norm_params = {
-        # Decay for the moving averages.
-        'decay': 0.995,
-        # epsilon to prevent 0s in variance.
-        'epsilon': 0.001,
-        # force in-place updates of mean and variance estimates
-        'updates_collections': None,
-    }
-
-    weight_decay = 5e-5
-    end_points = {}
-    with slim.arg_scope(
-        [slim.conv2d, slim.fully_connected],
-            weights_initializer=tf.truncated_normal_initializer(stddev=0.1),
-            weights_regularizer=slim.l2_regularizer(weight_decay),
-            normalizer_fn=slim.batch_norm,
-            normalizer_params=batch_norm_params):
-        with tf.variable_scope('SimpleCNN', reuse=reuse), slim.arg_scope(
-            [slim.batch_norm, slim.dropout],
-                is_training=mode == tf.estimator.ModeKeys.TRAIN):
-
-            initializer = tf.contrib.layers.xavier_initializer()
-            name = 'conv1'
-            trainable = is_trainable(name, trainable_variables)
-            graph = slim.conv2d(
-                input_layer,
-                32, kernerl_size,
-                activation_fn=tf.nn.relu,
-                stride=1,
-                scope=name,
-                weights_initializer=initializer,
-                trainable=trainable)
-            end_points[name] = graph
-
-            graph = slim.max_pool2d(graph, [2, 2], scope='pool1')
-            end_points['pool1'] = graph
-
-            name = 'conv2'
-            trainable = is_trainable(name, trainable_variables)
-            graph = slim.conv2d(
-                graph,
-                64, kernerl_size,
-                activation_fn=tf.nn.relu,
-                stride=1,
-                scope=name,
-                weights_initializer=initializer,
-                trainable=trainable)
-            end_points[name] = graph
-
-            graph = slim.max_pool2d(graph, [2, 2], scope='pool2')
-            end_points['pool2'] = graph
-
-            graph = slim.flatten(graph, scope='flatten')
-            end_points['flatten'] = graph
-
-            name = 'dense'
-            trainable = is_trainable(name, trainable_variables)
-            graph = slim.fully_connected(
-                graph,
-                1024,
-                weights_initializer=initializer,
-                activation_fn=tf.nn.relu,
-                scope=name,
-                trainable=trainable)
-            end_points[name] = graph
-
-            name = 'dropout'
-            graph = slim.dropout(graph, 0.6, scope='Dropout')
-            end_points[name] = graph
-
-    return graph, end_points
-
-
-def architecture(input_layer,
-                 mode=tf.estimator.ModeKeys.TRAIN,
-                 kernerl_size=(3, 3),
-                 n_classes=2,
-                 data_format='channels_last',
-                 reuse=False,
-                 add_batch_norm=False,
-                 trainable_variables=None,
-                 **kwargs):
-
-    with tf.variable_scope('SimpleCNN', reuse=reuse):
-
-        dropout, endpoints = base_architecture(
-            input_layer,
-            mode,
-            kernerl_size,
-            data_format,
-            add_batch_norm=add_batch_norm,
-            trainable_variables=trainable_variables)
-        # Logits layer
-        # Input Tensor Shape: [batch_size, 1024]
-        # Output Tensor Shape: [batch_size, n_classes]
-        name = 'logits'
-        trainable = is_trainable(name, trainable_variables)
-        logits = tf.layers.dense(
-            inputs=dropout, units=n_classes, trainable=trainable)
-        endpoints[name] = logits
-
-    return logits, endpoints
-
-
-def model_fn(features, labels, mode, params=None, config=None):
-    """Model function for CNN."""
-    data = features['data']
-    key = features['key']
-
-    params = params or {}
-    learning_rate = params.get('learning_rate', 1e-5)
-    apply_moving_averages = params.get('apply_moving_averages', False)
-    extra_checkpoint = params.get('extra_checkpoint')
-    trainable_variables = get_trainable_variables(extra_checkpoint)
-    loss_weights = params.get('loss_weights', 1.0)
-    add_histograms = params.get('add_histograms')
-    nnet_optimizer = params.get('nnet_optimizer') or 'sgd'
-
-    arch_kwargs = {
-        'kernerl_size': params.get('kernerl_size', None),
-        'n_classes': params.get('n_classes', None),
-        'data_format': params.get('data_format', None),
-        'add_batch_norm': params.get('add_batch_norm', None),
-        'trainable_variables': trainable_variables,
-    }
-    arch_kwargs = {k: v for k, v in arch_kwargs.items() if v is not None}
-
-    logits, _ = architecture(data, mode, **arch_kwargs)
-
-    # restore the model from an extra_checkpoint
-    if extra_checkpoint is not None and mode == tf.estimator.ModeKeys.TRAIN:
-        tf.train.init_from_checkpoint(
-            ckpt_dir_or_file=extra_checkpoint["checkpoint_path"],
-            assignment_map=extra_checkpoint["scopes"],
-        )
-
-    predictions = {
-        # Generate predictions (for PREDICT and EVAL mode)
-        "classes": tf.argmax(input=logits, axis=1),
-        # Add `softmax_tensor` to the graph. It is used for PREDICT and by the
-        # `logging_hook`.
-        "probabilities": tf.nn.softmax(logits, name="softmax_tensor"),
-        'key': key,
-    }
-    if mode == tf.estimator.ModeKeys.PREDICT:
-        return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)
-
-    # convert labels to the expected int32 format
-    labels = tf.cast(labels, dtype="int32")
-
-    accuracy = tf.metrics.accuracy(
-        labels=labels, predictions=predictions["classes"])
-    metrics = {'accuracy': accuracy}
-
-    global_step = tf.train.get_or_create_global_step()
-
-    # Compute the moving average of all individual losses and the total loss.
-    if apply_moving_averages and mode == tf.estimator.ModeKeys.TRAIN:
-        variable_averages = tf.train.ExponentialMovingAverage(
-            0.9999, global_step)
-        variable_averages_op = variable_averages.apply(
-            tf.trainable_variables())
-    else:
-        variable_averages_op = tf.no_op(name='noop')
-
-    if mode == tf.estimator.ModeKeys.TRAIN:
-        # for batch normalization to be updated as well:
-        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
-    else:
-        update_ops = []
-
-    with tf.control_dependencies([variable_averages_op] + update_ops):
-
-        # convert weights of per sample to weights per class
-        if isinstance(loss_weights, collections.Iterable):
-            loss_weights = tf.gather(loss_weights, labels)
-
-        # Calculate Loss (for both TRAIN and EVAL modes)
-        loss = tf.losses.sparse_softmax_cross_entropy(
-            logits=logits, labels=labels, weights=loss_weights)
-
-        if apply_moving_averages and mode == tf.estimator.ModeKeys.TRAIN:
-            # Compute the moving average of all individual losses and the total
-            # loss.
-            loss_averages = tf.train.ExponentialMovingAverage(0.9, name='avg')
-            loss_averages_op = loss_averages.apply(
-                tf.get_collection(tf.GraphKeys.LOSSES))
-        else:
-            loss_averages_op = tf.no_op(name='noop')
-
-        if mode == tf.estimator.ModeKeys.TRAIN:
-
-            if nnet_optimizer == 'sgd':
-                optimizer = tf.train.GradientDescentOptimizer(
-                    learning_rate=learning_rate)
-            else:
-                optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
-            train_op = tf.group(
-                optimizer.minimize(loss, global_step=global_step),
-                variable_averages_op, loss_averages_op)
-
-            # Log accuracy and loss
-            with tf.name_scope('train_metrics'):
-                tf.summary.scalar('accuracy', accuracy[1])
-                tf.summary.scalar('loss', loss)
-                if apply_moving_averages:
-                    for l in tf.get_collection(tf.GraphKeys.LOSSES):
-                        tf.summary.scalar(l.op.name + "_averaged",
-                                          loss_averages.average(l))
-
-            # add histograms summaries
-            if add_histograms == 'all':
-                for v in tf.all_variables():
-                    tf.summary.histogram(v.name, v)
-            elif add_histograms == 'train':
-                for v in tf.trainable_variables():
-                    tf.summary.histogram(v.name, v)
-
-        else:
-            train_op = None
-
-    return tf.estimator.EstimatorSpec(
-        mode=mode,
-        predictions=predictions,
-        loss=loss,
-        train_op=train_op,
-        eval_metric_ops=metrics)
diff --git a/bob/learn/tensorflow/network/Vgg.py b/bob/learn/tensorflow/network/Vgg.py
deleted file mode 100644
index 732c6a42de2764e9c818e7e3c13efb9f71a8f371..0000000000000000000000000000000000000000
--- a/bob/learn/tensorflow/network/Vgg.py
+++ /dev/null
@@ -1,134 +0,0 @@
-#!/usr/bin/env python
-# vim: set fileencoding=utf-8 :
-# @author: Tiago de Freitas Pereira <tiago.pereira@idiap.ch>
-
-
-"""
-VGG16 and VGG19 wrappers
-"""
-
-import tensorflow as tf
-from tensorflow.contrib.slim.python.slim.nets import vgg
-from tensorflow.contrib.layers.python.layers import layers as layers_lib
-from tensorflow.contrib import layers
-import tensorflow.contrib.slim as slim
-from tensorflow.python.ops import variable_scope
-from .utils import is_trainable
-
-
-def vgg_19(inputs,
-           reuse=None,                      
-           mode=tf.estimator.ModeKeys.TRAIN, **kwargs):
-    """
-    Oxford Net VGG 19-Layers version E Example from tf-slim
-
-    https://raw.githubusercontent.com/tensorflow/models/master/research/slim/nets/vgg.py
-
-    **Parameters**:
-
-        inputs: a 4-D tensor of size [batch_size, height, width, 3].
-
-        reuse: whether or not the network and its variables should be reused. To be
-               able to reuse 'scope' must be given.
-
-        mode:
-           Estimator mode keys
-    """
-
-    with slim.arg_scope(
-        [slim.conv2d],
-            trainable=mode==tf.estimator.ModeKeys.TRAIN):
-
-        return vgg.vgg_19(inputs, spatial_squeeze=False)
-
-
-def vgg_16(inputs,
-           reuse=None,                      
-           mode=tf.estimator.ModeKeys.TRAIN,
-           trainable_variables=None,
-           scope="vgg_16",
-           **kwargs):
-    """
-    Oxford Net VGG 16-Layers version E Example from tf-slim
-
-    https://raw.githubusercontent.com/tensorflow/models/master/research/slim/nets/vgg.py
-
-    **Parameters**:
-
-        inputs: a 4-D tensor of size [batch_size, height, width, 3].
-
-        reuse: whether or not the network and its variables should be reused. To be
-               able to reuse 'scope' must be given.
-
-        mode:
-           Estimator mode keys
-    """
-
-    dropout_keep_prob = 0.5
-    end_points = {}
-    
-
-    with variable_scope.variable_scope(scope, 'vgg_16', [inputs]) as sc:
-        end_points_collection = sc.original_name_scope + '_end_points'
-        # Collect outputs for conv2d, fully_connected and max_pool2d.
-
-        with slim.arg_scope(
-            [layers.conv2d, layers_lib.fully_connected, layers_lib.max_pool2d], outputs_collections=end_points_collection):
-    
-            with slim.arg_scope(
-                [slim.conv2d],
-                    trainable=mode==tf.estimator.ModeKeys.TRAIN):
-
-                name = "conv1"
-                trainable = is_trainable(name, trainable_variables, mode=mode)
-                net = layers_lib.repeat(
-                    inputs, 2, layers.conv2d, 64, [3, 3], scope=name, trainable=trainable)
-                net = layers_lib.max_pool2d(net, [2, 2], scope='pool1')
-                end_points[name] = net
-
-                name = "conv2"        
-                trainable = is_trainable(name, trainable_variables, mode=mode)
-                net = layers_lib.repeat(net, 2, layers.conv2d, 128, [3, 3], scope=name, trainable=trainable)
-                net = layers_lib.max_pool2d(net, [2, 2], scope='pool2')
-                end_points[name] = net        
-                
-                name = "conv3"
-                trainable = is_trainable(name, trainable_variables, mode=mode)
-                net = layers_lib.repeat(net, 3, layers.conv2d, 256, [3, 3], scope=name, trainable=trainable)
-                net = layers_lib.max_pool2d(net, [2, 2], scope='pool3')
-                end_points[name] = net        
-
-                name = "conv4"
-                trainable = is_trainable(name, trainable_variables, mode=mode)        
-                net = layers_lib.repeat(net, 3, layers.conv2d, 512, [3, 3], scope=name, trainable=trainable)
-                net = layers_lib.max_pool2d(net, [2, 2], scope='pool4')
-                end_points[name] = net        
-
-                name = "conv5"
-                trainable = is_trainable(name, trainable_variables, mode=mode)
-                net = layers_lib.repeat(net, 3, layers.conv2d, 512, [3, 3], scope=name, trainable=trainable)
-                net = layers_lib.max_pool2d(net, [2, 2], scope='pool5')
-                end_points[name] = net        
-                
-                net = layers.flatten(net)
-                
-                # Use conv2d instead of fully_connected layers.
-                name = "fc6"
-                trainable = is_trainable(name, trainable_variables, mode=mode)
-                net = layers.fully_connected(net, 4096, scope=name, trainable=trainable)
-                net = layers_lib.dropout(
-                    net, dropout_keep_prob, is_training=mode==tf.estimator.ModeKeys.TRAIN, scope='dropout6')
-                end_points[name] = net            
-
-                name = "fc7"            
-                trainable = is_trainable(name, trainable_variables, mode=mode)
-                net = layers.fully_connected(net, 4096, scope=name, trainable=trainable)
-                net = layers_lib.dropout(
-                    net, dropout_keep_prob, is_training=mode==tf.estimator.ModeKeys.TRAIN, scope='dropout7')
-
-                end_points[name] = net            
-  
-    # Convert end_points_collection into a end_point dict.
-    return net, end_points
-    #return vgg.vgg_16(inputs, spatial_squeeze=False)
-
diff --git a/bob/learn/tensorflow/network/__init__.py b/bob/learn/tensorflow/network/__init__.py
deleted file mode 100644
index 9497048c267c0c4f16cc244525217482b04c013b..0000000000000000000000000000000000000000
--- a/bob/learn/tensorflow/network/__init__.py
+++ /dev/null
@@ -1,37 +0,0 @@
-from .Chopra import chopra
-from .LightCNN9 import light_cnn9
-from .Dummy import dummy
-from .MLP import mlp, mlp_with_batchnorm_and_dropout
-from .InceptionResnetV2 import inception_resnet_v2, inception_resnet_v2_batch_norm
-from .InceptionResnetV1 import inception_resnet_v1, inception_resnet_v1_batch_norm
-from . import SimpleCNN
-from .Vgg import vgg_19, vgg_16
-
-
-# gets sphinx autodoc done right - don't remove it
-def __appropriate__(*args):
-    """Says object was actually declared here, an not on the import module.
-
-    Parameters:
-
-            *args: An iterable of objects to modify
-
-    Resolves `Sphinx referencing issues
-    <https://github.com/sphinx-doc/sphinx/issues/3048>`
-    """
-
-    for obj in args:
-        obj.__module__ = __name__
-
-
-__appropriate__(
-    chopra,
-    light_cnn9,
-    dummy,
-    mlp,
-    inception_resnet_v2, inception_resnet_v2_batch_norm,
-    inception_resnet_v1, inception_resnet_v1_batch_norm,
-    SimpleCNN, vgg_19, vgg_16
-)
-
-__all__ = [_ for _ in dir() if not _.startswith('_')]
diff --git a/bob/learn/tensorflow/network/utils.py b/bob/learn/tensorflow/network/utils.py
deleted file mode 100644
index 0c5c855f023be38dcc3b4d130f1117cb0ccfa307..0000000000000000000000000000000000000000
--- a/bob/learn/tensorflow/network/utils.py
+++ /dev/null
@@ -1,6 +0,0 @@
-#!/usr/bin/env python
-# vim: set fileencoding=utf-8 :
-# @author: Tiago de Freitas Pereira <tiago.pereira@idiap.ch>
-
-# functions were moved
-from ..utils.network import append_logits, is_trainable
diff --git a/bob/learn/tensorflow/script/__init__.py b/bob/learn/tensorflow/script/__init__.py
deleted file mode 100644
index b156cdd2398fd5af2b88db8279fd84c85b767b36..0000000000000000000000000000000000000000
--- a/bob/learn/tensorflow/script/__init__.py
+++ /dev/null
@@ -1,2 +0,0 @@
-# gets sphinx autodoc done right - don't remove it
-__all__ = [_ for _ in dir() if not _.startswith('_')]
diff --git a/bob/learn/tensorflow/script/cache_dataset.py b/bob/learn/tensorflow/script/cache_dataset.py
deleted file mode 100644
index 7f8c04f39dceaf9a312ed655d1a17882431058e2..0000000000000000000000000000000000000000
--- a/bob/learn/tensorflow/script/cache_dataset.py
+++ /dev/null
@@ -1,61 +0,0 @@
-#!/usr/bin/env python
-"""Trains networks using Tensorflow estimators.
-"""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-import logging
-import click
-import tensorflow as tf
-from bob.extension.scripts.click_helper import (
-    verbosity_option, ConfigCommand, ResourceOption, log_parameters)
-from bob.bio.base import is_argument_available
-
-logger = logging.getLogger(__name__)
-
-
-@click.command(
-    entry_point_group='bob.learn.tensorflow.config', cls=ConfigCommand)
-@click.option(
-    '--input-fn',
-    '-i',
-    required=True,
-    cls=ResourceOption,
-    entry_point_group='bob.learn.tensorflow.input_fn',
-    help='The ``input_fn`` that will return the features and labels. '
-         'You should call the dataset.cache(...) yourself in the input '
-         'function. If the ``input_fn`` accepts a ``cache_only`` argument, '
-         'it will be given as True.')
-@click.option(
-    '--mode',
-    cls=ResourceOption,
-    default=tf.estimator.ModeKeys.TRAIN,
-    show_default=True,
-    type=click.Choice((tf.estimator.ModeKeys.TRAIN,
-                       tf.estimator.ModeKeys.EVAL,
-                       tf.estimator.ModeKeys.PREDICT)),
-    help='mode value to be given to the input_fn.')
-@verbosity_option(cls=ResourceOption)
-def cache_dataset(input_fn, mode, **kwargs):
-    """Trains networks using Tensorflow estimators."""
-    log_parameters(logger)
-
-    kwargs = {}
-    if is_argument_available('cache_only', input_fn):
-        kwargs['cache_only'] = True
-        logger.info("cache_only as True will be passed to input_fn.")
-
-    # call the input function manually
-    with tf.Session() as sess:
-        data = input_fn(mode, **kwargs)
-        if isinstance(data, tf.data.Dataset):
-            iterator = data.make_initializable_iterator()
-            data = iterator.get_next()
-            sess.run(iterator.initializer)
-        sess.run(tf.initializers.global_variables())
-        try:
-            while True:
-                sess.run(data)
-        except tf.errors.OutOfRangeError:
-            click.echo("Finished reading the dataset.")
-            return
diff --git a/bob/learn/tensorflow/script/compute_statistics.py b/bob/learn/tensorflow/script/compute_statistics.py
deleted file mode 100644
index b876c5e4ae5be9f3df6f4f18c34aaaf50ef2395e..0000000000000000000000000000000000000000
--- a/bob/learn/tensorflow/script/compute_statistics.py
+++ /dev/null
@@ -1,82 +0,0 @@
-#!/usr/bin/env python
-"""Computes statistics on a BioGenerator.
-"""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-import logging
-import click
-import numpy as np
-from bob.extension.scripts.click_helper import (
-    verbosity_option, ConfigCommand, ResourceOption, log_parameters)
-from bob.learn.tensorflow.dataset.bio import BioGenerator
-
-logger = logging.getLogger(__name__)
-
-
-@click.command(
-    entry_point_group='bob.learn.tensorflow.config', cls=ConfigCommand,
-    epilog="""\b
-An example configuration could be::
-    # define the database:
-    from bob.bio.base.test.dummy.database import database
-    groups = ['dev']
-    biofiles = database.all_files(groups)
-"""
-)
-@click.option(
-    '--database',
-    '-d',
-    required=True,
-    cls=ResourceOption,
-    entry_point_group='bob.bio.database',
-    help='A bio database. Its original_directory must point to the correct '
-    'path.')
-@click.option(
-    '--biofiles',
-    required=True,
-    cls=ResourceOption,
-    help='The list of the bio files. You can only provide this through '
-    'config files.')
-@click.option(
-    '--load-data',
-    cls=ResourceOption,
-    entry_point_group='bob.learn.tensorflow.load_data',
-    help='A callable with the signature of '
-    '``data = load_data(database, biofile)``. '
-    ':any:`bob.bio.base.read_original_data` is used by default.')
-@click.option(
-    '--multiple-samples',
-    is_flag=True,
-    cls=ResourceOption,
-    help='If provided, it means that the data provided by reader contains '
-    'multiple samples with same label and path.')
-@verbosity_option(cls=ResourceOption)
-def compute_statistics(database, biofiles, load_data, multiple_samples,
-                       **kwargs):
-    """Computes statistics on a BioGenerator.
-
-    This script works with bob.bio.base databases. It will load all the samples
-    and print their mean.
-    """
-    log_parameters(logger, ignore=('biofiles', ))
-    logger.debug("len(biofiles): %d", len(biofiles))
-
-    assert len(biofiles), "biofiles are empty!"
-    logger.info('Calculating the mean for %d files', len(biofiles))
-
-    generator = BioGenerator(
-        database,
-        biofiles,
-        load_data=load_data,
-        multiple_samples=multiple_samples)
-
-    for i, (data, _, _) in enumerate(generator()):
-        if i == 0:
-            mean = np.cast['float'](data)
-        else:
-            mean += data
-
-    mean = mean.reshape(mean.shape[0], -1)
-    mean = np.mean(mean, axis=1)
-    click.echo(mean / (i + 1.))
diff --git a/bob/learn/tensorflow/script/db_to_tfrecords.py b/bob/learn/tensorflow/script/db_to_tfrecords.py
deleted file mode 100644
index 7460f7c664ec2726bdeae2bec5ca4bcfc0e11d4a..0000000000000000000000000000000000000000
--- a/bob/learn/tensorflow/script/db_to_tfrecords.py
+++ /dev/null
@@ -1,337 +0,0 @@
-#!/usr/bin/env python
-"""Converts Bio and PAD datasets to TFRecords file formats.
-"""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-import logging
-import os
-import random
-import tempfile
-import click
-import tensorflow as tf
-from bob.io.base import create_directories_safe, HDF5File
-from bob.extension.scripts.click_helper import (
-    verbosity_option,
-    ConfigCommand,
-    ResourceOption,
-    log_parameters,
-)
-from bob.learn.tensorflow.dataset.tfrecords import (
-    describe_tf_record,
-    write_a_sample,
-    normalize_tfrecords_path,
-    tfrecord_name_and_json_name,
-    dataset_to_tfrecord,
-)
-from bob.learn.tensorflow.utils import bytes2human
-
-
-logger = logging.getLogger(__name__)
-
-
-@click.command(entry_point_group="bob.learn.tensorflow.config", cls=ConfigCommand)
-@click.option(
-    "--samples",
-    required=True,
-    cls=ResourceOption,
-    help="A list of all samples that you want to write in the "
-    "tfrecords file. Whatever is inside this list is passed to "
-    "the reader.",
-)
-@click.option(
-    "--reader",
-    required=True,
-    cls=ResourceOption,
-    help="a function with the signature of ``data, label, key = "
-    "reader(sample)`` which takes a sample and returns the "
-    "loaded data, the label of the data, and a key which is "
-    "unique for every sample.",
-)
-@click.option(
-    "--output", "-o", required=True, cls=ResourceOption, help="Name of the output file."
-)
-@click.option(
-    "--shuffle",
-    is_flag=True,
-    cls=ResourceOption,
-    help="If provided, it will shuffle the samples.",
-)
-@click.option(
-    "--allow-failures",
-    is_flag=True,
-    cls=ResourceOption,
-    help="If provided, the samples which fail to load are ignored.",
-)
-@click.option(
-    "--multiple-samples",
-    is_flag=True,
-    cls=ResourceOption,
-    help="If provided, it means that the data provided by reader contains "
-    "multiple samples with same label and path.",
-)
-@click.option(
-    "--size-estimate",
-    is_flag=True,
-    cls=ResourceOption,
-    help="If given, will print the estimated file size instead of creating "
-    "the final tfrecord file.",
-)
-@verbosity_option(cls=ResourceOption)
-def db_to_tfrecords(
-    samples,
-    reader,
-    output,
-    shuffle,
-    allow_failures,
-    multiple_samples,
-    size_estimate,
-    **kwargs,
-):
-    """Converts Bio and PAD datasets to TFRecords file formats.
-
-    The best way to use this script is to send it to the io-big queue if you
-    are at Idiap::
-
-        $ jman submit -i -q q1d -- %(prog)s <config_files>...
-
-    An example for mnist would be::
-
-        from bob.db.mnist import Database
-        db = Database()
-        data, labels = db.data(groups='train')
-
-        samples = zip(data, labels, (str(i) for i in range(len(data))))
-
-        def reader(sample):
-            return sample
-
-        allow_failures = True
-        output = '/tmp/mnist_train.tfrecords'
-        shuffle = True
-
-    An example for bob.bio.base would be::
-
-        from bob.bio.base.test.dummy.database import database
-        from bob.bio.base.utils import read_original_data
-
-        groups = 'dev'
-
-        samples = database.all_files(groups=groups)
-
-        CLIENT_IDS = (str(f.client_id) for f
-                      in database.all_files(groups=groups))
-        CLIENT_IDS = list(set(CLIENT_IDS))
-        CLIENT_IDS = dict(zip(CLIENT_IDS, range(len(CLIENT_IDS))))
-
-
-        def file_to_label(f):
-            return CLIENT_IDS[str(f.client_id)]
-
-
-        def reader(biofile):
-            data = read_original_data(
-                biofile, database.original_directory,
-                database.original_extension)
-            label = file_to_label(biofile)
-            key = biofile.path
-            return (data, label, key)
-    """
-    log_parameters(logger, ignore=("samples",))
-    logger.debug("len(samples): %d", len(samples))
-
-    if size_estimate:
-        output = tempfile.NamedTemporaryFile(suffix=".tfrecords").name
-
-    output = normalize_tfrecords_path(output)
-
-    if not size_estimate:
-        logger.info("Writing samples to `{}'".format(output))
-
-    total_size = 0
-
-    create_directories_safe(os.path.dirname(output))
-
-    n_samples = len(samples)
-    sample_count = 0
-    with tf.python_io.TFRecordWriter(output) as writer:
-        if shuffle:
-            logger.info("Shuffling the samples before writing ...")
-            random.shuffle(samples)
-        for i, sample in enumerate(samples):
-            logger.info("Processing file %d out of %d", i + 1, n_samples)
-
-            data, label, key = reader(sample)
-
-            if data is None:
-                if allow_failures:
-                    logger.debug("... Skipping `{0}`.".format(sample))
-                    continue
-                else:
-                    raise RuntimeError("Reading failed for `{0}`".format(sample))
-
-            if multiple_samples:
-                for sample in data:
-                    total_size += write_a_sample(
-                        writer, sample, label, key, size_estimate=size_estimate
-                    )
-                    sample_count += 1
-            else:
-                total_size += write_a_sample(
-                    writer, data, label, key, size_estimate=size_estimate
-                )
-                sample_count += 1
-
-    if not size_estimate:
-        click.echo("Wrote {} samples into the tfrecords file.".format(sample_count))
-    else:
-        # delete the empty tfrecords file
-        try:
-            os.remove(output)
-        except Exception:
-            pass
-    click.echo(
-        "The total size of the tfrecords file will be roughly "
-        "{} bytes".format(bytes2human(total_size))
-    )
-
-
-@click.command()
-@click.argument("tf-record-path", nargs=1)
-@click.argument("shape", type=int, nargs=-1)
-@click.option(
-    "--batch-size", help="Batch size", show_default=True, required=True, default=1000
-)
-@verbosity_option(cls=ResourceOption)
-def describe_tfrecord(tf_record_path, shape, batch_size, **kwargs):
-    """
-    Very often you have a tf-record file, or a set of them, and you have no
-    idea how many samples you have there. Even worse, you have no idea how many
-    classes you have.
-
-    This click command will solve this thing for you by doing the following::
-
-        $ %(prog)s <tf-record-path> 182 182 3
-
-    """
-    n_samples, n_labels = describe_tf_record(tf_record_path, shape, batch_size)
-    click.echo("#############################################")
-    click.echo("Number of samples {0}".format(n_samples))
-    click.echo("Number of labels {0}".format(n_labels))
-    click.echo("#############################################")
-
-
-@click.command(entry_point_group="bob.learn.tensorflow.config", cls=ConfigCommand)
-@click.option(
-    "--dataset",
-    required=True,
-    cls=ResourceOption,
-    entry_point_group="bob.learn.tensorflow.dataset",
-    help="A tf.data.Dataset to be used.",
-)
-@click.option(
-    "--output", "-o", required=True, cls=ResourceOption, help="Name of the output file."
-)
-@click.option(
-    "--force",
-    "-f",
-    is_flag=True,
-    cls=ResourceOption,
-    help="Whether to overwrite existing files.",
-)
-@verbosity_option(cls=ResourceOption)
-def datasets_to_tfrecords(dataset, output, force, **kwargs):
-    """Converts tensorflow datasets into TFRecords.
-    Takes a list of datasets and outputs and writes each dataset into its output.
-    ``datasets`` and ``outputs`` variables must be lists.
-    You can convert the written TFRecord files back to datasets using
-    :any:`bob.learn.tensorflow.dataset.tfrecords.dataset_from_tfrecord`.
-
-    To use this script with SGE, change your dataset and output based on the SGE_TASK_ID
-    environment variable in your config file.
-    """
-    log_parameters(logger)
-
-    output, json_output = tfrecord_name_and_json_name(output)
-    if not force and os.path.isfile(output):
-        click.echo("Output file already exists: {}".format(output))
-        return
-
-    click.echo("Writing tfrecod to: {}".format(output))
-    with tf.Session() as sess:
-        os.makedirs(os.path.dirname(output), exist_ok=True)
-        try:
-            sess.run(dataset_to_tfrecord(dataset, output))
-        except Exception:
-            click.echo("Something failed. Deleting unfinished files.")
-            os.remove(output)
-            os.remove(json_output)
-            raise
-    click.echo("Successfully wrote all files.")
-
-
-@click.command(entry_point_group="bob.learn.tensorflow.config", cls=ConfigCommand)
-@click.option(
-    "--dataset",
-    required=True,
-    cls=ResourceOption,
-    entry_point_group="bob.learn.tensorflow.dataset",
-    help="A tf.data.Dataset to be used.",
-)
-@click.option(
-    "--output", "-o", required=True, cls=ResourceOption, help="Name of the output file."
-)
-@click.option(
-    "--mean",
-    is_flag=True,
-    cls=ResourceOption,
-    help="If provided, the mean of data and labels will be saved in the hdf5 "
-    'file as well. You can access them in the "mean" groups.',
-)
-@verbosity_option(cls=ResourceOption)
-def dataset_to_hdf5(dataset, output, mean, **kwargs):
-    """Saves a tensorflow dataset into an HDF5 file
-
-    It is assumed that the dataset returns a tuple of (data, label, key) and
-    the dataset is not batched.
-    """
-    log_parameters(logger)
-
-    data, label, key = dataset.make_one_shot_iterator().get_next()
-
-    sess = tf.Session()
-
-    extension = ".hdf5"
-
-    if not output.endswith(extension):
-        output += extension
-
-    create_directories_safe(os.path.dirname(output))
-
-    sample_count = 0
-    data_mean = 0.0
-    label_mean = 0.0
-
-    with HDF5File(output, "w") as f:
-        while True:
-            try:
-                d, l, k = sess.run([data, label, key])
-                group = "/{}".format(sample_count)
-                f.create_group(group)
-                f.cd(group)
-                f["data"] = d
-                f["label"] = l
-                f["key"] = k
-                sample_count += 1
-                if mean:
-                    data_mean += (d - data_mean) / sample_count
-                    label_mean += (l - label_mean) / sample_count
-            except tf.errors.OutOfRangeError:
-                break
-        if mean:
-            f.create_group("/mean")
-            f.cd("/mean")
-            f["data_mean"] = data_mean
-            f["label_mean"] = label_mean
-
-    click.echo(f"Wrote {sample_count} samples into the hdf5 file.")
diff --git a/bob/learn/tensorflow/script/eval.py b/bob/learn/tensorflow/script/eval.py
deleted file mode 100644
index 0ff48972f05cf70aa80c849ac7f6e85f0c9508b1..0000000000000000000000000000000000000000
--- a/bob/learn/tensorflow/script/eval.py
+++ /dev/null
@@ -1,295 +0,0 @@
-#!/usr/bin/env python
-"""Evaluates networks using Tensorflow estimators.
-"""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-import click
-import logging
-import os
-import shutil
-import sys
-import tensorflow as tf
-import time
-from glob import glob
-from collections import defaultdict, OrderedDict
-from ..utils.eval import get_global_step
-from bob.extension.scripts.click_helper import (
-    verbosity_option, ConfigCommand, ResourceOption, log_parameters)
-from bob.io.base import create_directories_safe
-
-logger = logging.getLogger(__name__)
-
-
-def copy_one_step(train_dir, global_step, save_dir, fail_on_error=False):
-    for path in glob('{}/model.ckpt-{}.*'.format(train_dir, global_step)):
-        dst = os.path.join(save_dir, os.path.basename(path))
-        if os.path.isfile(dst):
-            continue
-        try:
-            shutil.copy(path, dst)
-            logger.info("Copied `%s' over to `%s'", path, dst)
-        except OSError:
-            logger.warning(
-                "Failed to copy `%s' over to `%s'", path, dst,
-                exc_info=True)
-            if fail_on_error:
-                raise
-
-
-def save_n_best_models(train_dir, save_dir, evaluated_file,
-                       keep_n_best_models, sort_by, exceptions=tuple()):
-    logger.debug(
-        "save_n_best_models was called with %s, %s, %s, %s, %s, %s",
-        train_dir, save_dir, evaluated_file, keep_n_best_models, sort_by,
-        exceptions)
-
-    create_directories_safe(save_dir)
-    evaluated = read_evaluated_file(evaluated_file)
-
-    def _key(x):
-        x = x[1][sort_by]
-        if 'loss' in sort_by:
-            return x
-        else:
-            return -x
-
-    best_models = OrderedDict(
-        sorted(evaluated.items(), key=_key)[:keep_n_best_models])
-    logger.info("Best models: %s", best_models)
-
-    # delete the old saved models that are not in top N best anymore
-    saved_models = defaultdict(list)
-    for path in glob('{}/model.ckpt-*'.format(save_dir)):
-        global_step = path.split('model.ckpt-')[1].split('.')[0]
-        saved_models[global_step].append(path)
-
-    for global_step, paths in saved_models.items():
-        if global_step not in best_models and global_step not in exceptions:
-            for path in paths:
-                logger.info("Deleting `%s'", path)
-                os.remove(path)
-
-    # copy over the best models if not already there
-    for global_step in best_models:
-        copy_one_step(train_dir, global_step, save_dir)
-
-    # create a checkpoint file indicating to the best existing model:
-    # 1. filter non-existing models first
-    def _filter(x):
-        return len(glob('{}/model.ckpt-{}.*'.format(save_dir, x[0]))) > 0
-
-    best_models = OrderedDict(filter(_filter, best_models.items()))
-
-    # 2. create the checkpoint file
-    with open(os.path.join(save_dir, 'checkpoint'), 'wt') as f:
-        if not len(best_models):
-            return
-        the_best_global_step = list(best_models)[0]
-        f.write('model_checkpoint_path: "model.ckpt-{}"\n'.format(
-            the_best_global_step))
-        # reverse the models before saving since the last ones in checkpoints
-        # are usually more important. This aligns with the bob tf trim script.
-        for i, global_step in enumerate(reversed(best_models)):
-            f.write('all_model_checkpoint_paths: "model.ckpt-{}"\n'.format(
-                global_step))
-
-
-def read_evaluated_file(path):
-    evaluated = {}
-    with open(path) as f:
-        for line in f:
-            global_step, line = line.split(' ', 1)
-            temp = {}
-            for k_v in line.strip().split(', '):
-                k, v = k_v.split(' = ')
-                try:
-                    v = float(v)
-                except ValueError:  # not all values could be floats
-                    pass
-                if 'global_step' in k:
-                    v = int(v)
-                temp[k] = v
-            evaluated[global_step] = temp
-    return evaluated
-
-
-def append_evaluated_file(path, evaluations):
-    str_evaluations = ', '.join(
-        '%s = %s' % (k, v) for k, v in sorted(evaluations.items()))
-    with open(path, 'a') as f:
-        f.write('{} {}\n'.format(evaluations['global_step'], str_evaluations))
-    return str_evaluations
-
-
-@click.command(
-    entry_point_group='bob.learn.tensorflow.config', cls=ConfigCommand)
-@click.option(
-    '--estimator',
-    '-e',
-    required=True,
-    cls=ResourceOption,
-    entry_point_group='bob.learn.tensorflow.estimator',
-    help='The estimator that will be evaluated.')
-@click.option(
-    '--eval-input-fn',
-    '-i',
-    required=True,
-    cls=ResourceOption,
-    entry_point_group='bob.learn.tensorflow.input_fn',
-    help='The ``input_fn`` that will be given to '
-    ':any:`tf.estimator.Estimator.eval`.')
-@click.option(
-    '--hooks',
-    cls=ResourceOption,
-    multiple=True,
-    entry_point_group='bob.learn.tensorflow.hook',
-    help='List of SessionRunHook subclass instances. Used for callbacks '
-    'inside the evaluation loop.')
-@click.option(
-    '--run-once',
-    cls=ResourceOption,
-    default=False,
-    show_default=True,
-    is_flag=True,
-    help='If given, the model will be evaluated only once.')
-@click.option(
-    '--eval-interval-secs',
-    cls=ResourceOption,
-    type=click.INT,
-    default=60,
-    show_default=True,
-    help='The seconds to wait for the next evaluation.')
-@click.option('--name', cls=ResourceOption, help='Name of the evaluation')
-@click.option(
-    '--keep-n-best-models',
-    '-K',
-    type=click.INT,
-    cls=ResourceOption,
-    default=1,
-    show_default=True,
-    help='If more than 0, will keep the best N models in the evaluation folder'
-)
-@click.option(
-    '--sort-by',
-    cls=ResourceOption,
-    default="loss",
-    show_default=True,
-    help='The metric for sorting the N best models.')
-@click.option(
-    '--max-wait-intervals',
-    cls=ResourceOption,
-    type=click.INT,
-    default=-1,
-    show_default=True,
-    help='If given, the maximum number of intervals waiting for new training checkpoint.')
-@click.option(
-    '--force-re-run',
-    is_flag=True,
-    default=False,
-    help='A debugging flag. Do not use!')
-@verbosity_option(cls=ResourceOption)
-def eval(estimator, eval_input_fn, hooks, run_once, eval_interval_secs, name,
-         keep_n_best_models, sort_by, max_wait_intervals, force_re_run,
-         **kwargs):
-    """Evaluates networks using Tensorflow estimators."""
-    log_parameters(logger)
-
-    real_name = 'eval_' + name if name else 'eval'
-    eval_dir = os.path.join(estimator.model_dir, real_name)
-    os.makedirs(eval_dir, exist_ok=True)
-    evaluated_file = os.path.join(eval_dir, 'evaluated')
-    wait_interval_count = 0
-    evaluated_steps_count = 0
-    while True:
-        evaluated_steps = {}
-        ckpt = tf.train.get_checkpoint_state(estimator.model_dir)
-
-        if os.path.exists(evaluated_file):
-            evaluated_steps = read_evaluated_file(evaluated_file)
-
-            # create exceptions so we don't delete them
-            exceptions = []
-            if ckpt and ckpt.model_checkpoint_path:
-                for checkpoint_path in ckpt.all_model_checkpoint_paths:
-                    try:
-                        global_step = str(get_global_step(checkpoint_path))
-                    except Exception:
-                        logger.warning("Failed to find global_step.", exc_info=True)
-                        continue
-                    if global_step not in evaluated_steps:
-                        exceptions.append(global_step)
-
-            if max_wait_intervals > 0:
-                new_evaluated_count = len(evaluated_steps.keys())
-                if new_evaluated_count > 0:
-                    if new_evaluated_count == evaluated_steps_count:
-                        wait_interval_count += 1
-                        if wait_interval_count > max_wait_intervals:
-                            click.echo("Reached maximum wait interval!")
-                            break
-                    else:
-                        evaluated_steps_count = new_evaluated_count
-                        wait_interval_count = 0
-
-            # Save the best N models into the eval directory
-            save_n_best_models(estimator.model_dir, eval_dir, evaluated_file,
-                               keep_n_best_models, sort_by, exceptions)
-
-        if (not ckpt) or (not ckpt.model_checkpoint_path):
-            if max_wait_intervals > 0:
-                wait_interval_count += 1
-                if wait_interval_count > max_wait_intervals:
-                    break
-            time.sleep(eval_interval_secs)
-            continue
-
-        for checkpoint_path in ckpt.all_model_checkpoint_paths:
-            try:
-                global_step = str(get_global_step(checkpoint_path))
-            except Exception:
-                logger.warning(
-                    'Failed to find global_step for checkpoint_path {}, '
-                    'skipping ...'.format(checkpoint_path), exc_info=True)
-                continue
-            if global_step in evaluated_steps and not force_re_run:
-                continue
-
-            # copy over the checkpoint before evaluating since it might
-            # disappear after evaluation.
-            try:
-                copy_one_step(estimator.model_dir, global_step, eval_dir, fail_on_error=True)
-            except Exception:
-                # skip testing this checkpoint
-                continue
-
-            # evaluate based on the just copied checkpoint_path
-            checkpoint_path = checkpoint_path.replace(estimator.model_dir, eval_dir + os.sep)
-            checkpoint_path = os.path.abspath(checkpoint_path)
-            logger.debug("Evaluating the model from %s", checkpoint_path)
-
-            # Evaluate
-            try:
-                evaluations = estimator.evaluate(
-                    input_fn=eval_input_fn,
-                    steps=None,
-                    hooks=hooks,
-                    checkpoint_path=checkpoint_path,
-                    name=name,
-                )
-            except Exception:
-                logger.info("Something went wrong in evaluation.")
-                raise
-
-            str_evaluations = append_evaluated_file(evaluated_file,
-                                                    evaluations)
-            click.echo(str_evaluations)
-            sys.stdout.flush()
-
-            # Save the best N models into the eval directory
-            save_n_best_models(estimator.model_dir, eval_dir, evaluated_file,
-                               keep_n_best_models, sort_by)
-
-        if run_once:
-            break
-        time.sleep(eval_interval_secs)
diff --git a/bob/learn/tensorflow/script/fit.py b/bob/learn/tensorflow/script/fit.py
deleted file mode 100644
index f19776c5da551fa9dd611d0279eab99d0eee6469..0000000000000000000000000000000000000000
--- a/bob/learn/tensorflow/script/fit.py
+++ /dev/null
@@ -1,107 +0,0 @@
-#!/usr/bin/env python
-"""Trains networks using Keras Models.
-"""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-import click
-import json
-import logging
-import os
-import tensorflow as tf
-from bob.extension.scripts.click_helper import (
-    verbosity_option, ConfigCommand, ResourceOption, log_parameters)
-
-logger = logging.getLogger(__name__)
-
-
-@click.command(
-    entry_point_group='bob.learn.tensorflow.config', cls=ConfigCommand)
-@click.option(
-    '--model',
-    '-m',
-    required=True,
-    cls=ResourceOption,
-    entry_point_group='bob.learn.tensorflow.model',
-    help='The keras model that will be trained.')
-@click.option(
-    '--train-input-fn',
-    '-i',
-    required=True,
-    cls=ResourceOption,
-    entry_point_group='bob.learn.tensorflow.input_fn',
-    help='A function that will return the training data as a tf.data.Dataset '
-    'or tf.data.Iterator. This will be given as `x` to '
-    'tf.keras.Model.fit.')
-@click.option(
-    '--epochs',
-    '-e',
-    default=1,
-    type=click.types.INT,
-    cls=ResourceOption,
-    help='Number of epochs to train model. See '
-    'tf.keras.Model.fit.')
-@click.option(
-    '--callbacks',
-    cls=ResourceOption,
-    multiple=True,
-    entry_point_group='bob.learn.tensorflow.callback',
-    help='List of tf.keras.callbacks. Used for callbacks '
-    'inside the training loop.')
-@click.option(
-    '--eval-input-fn',
-    '-i',
-    cls=ResourceOption,
-    entry_point_group='bob.learn.tensorflow.input_fn',
-    help='A function that will return the validation data as a tf.data.Dataset'
-    ' or tf.data.Iterator. This will be given as `validation_data` to '
-    'tf.keras.Model.fit.')
-@click.option(
-    '--class-weight',
-    '-c',
-    cls=ResourceOption,
-    help='See tf.keras.Model.fit.')
-@click.option(
-    '--initial-epoch',
-    default=0,
-    type=click.types.INT,
-    cls=ResourceOption,
-    help='See tf.keras.Model.fit.')
-@click.option(
-    '--steps-per-epoch',
-    type=click.types.INT,
-    cls=ResourceOption,
-    help='See tf.keras.Model.fit.')
-@click.option(
-    '--validation-steps',
-    type=click.types.INT,
-    cls=ResourceOption,
-    help='See tf.keras.Model.fit.')
-@verbosity_option(cls=ResourceOption)
-def fit(model, train_input_fn, epochs, verbose, callbacks, eval_input_fn,
-        class_weight, initial_epoch, steps_per_epoch, validation_steps,
-        **kwargs):
-    """Trains networks using Keras models."""
-    log_parameters(logger)
-
-    # Train
-    save_callback = [c for c in callbacks if isinstance(c, tf.keras.callbacks.ModelCheckpoint)]
-    model_dir = None
-    if save_callback:
-        model_dir = save_callback[0].filepath
-        logger.info("Training a model in %s", model_dir)
-    history = model.fit(
-        x=train_input_fn(),
-        epochs=epochs,
-        verbose=max(verbose, 2),
-        callbacks=list(callbacks) if callbacks else None,
-        validation_data=None if eval_input_fn is None else eval_input_fn(),
-        class_weight=class_weight,
-        initial_epoch=initial_epoch,
-        steps_per_epoch=steps_per_epoch,
-        validation_steps=validation_steps,
-    )
-    click.echo(history.history)
-    if model_dir is not None:
-        with open(os.path.join(model_dir, 'keras_fit_history.json'), 'w') as f:
-            json.dump(history.history, f)
diff --git a/bob/learn/tensorflow/script/predict_bio.py b/bob/learn/tensorflow/script/predict_bio.py
deleted file mode 100644
index 54c5bb2c12cacac2c87416f4ded0df7a4140cfdf..0000000000000000000000000000000000000000
--- a/bob/learn/tensorflow/script/predict_bio.py
+++ /dev/null
@@ -1,451 +0,0 @@
-#!/usr/bin/env python
-"""Saves predictions or embeddings of tf.estimators.
-"""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-import os
-import sys
-import logging
-import click
-from bob.extension.scripts.click_helper import (
-    verbosity_option,
-    ConfigCommand,
-    ResourceOption,
-    log_parameters,
-)
-from collections import defaultdict
-import numpy as np
-import tensorflow as tf
-from bob.io.base import create_directories_safe
-from bob.bio.base.utils import save
-from bob.bio.base.tools.grid import indices
-from bob.learn.tensorflow.dataset.bio import BioGenerator
-
-try:
-    import bob.bio.video
-except ModuleNotFoundError:
-    pass
-
-logger = logging.getLogger(__name__)
-
-
-def make_output_path(output_dir, key):
-    """Returns an output path used for saving keys. You need to make sure the
-    directories leading to this output path exist.
-
-    Parameters
-    ----------
-    output_dir : str
-        The root directory to save the results
-    key : str
-        The key of the sample. Usually biofile.make_path("", "")
-
-    Returns
-    -------
-    str
-        The path for the provided key.
-    """
-    return os.path.join(output_dir, key + ".hdf5")
-
-
-def non_existing_files(paths, force=False):
-    if force:
-        for i in range(len(paths)):
-            yield i
-        return
-    for i, path in enumerate(paths):
-        if not os.path.isfile(path):
-            yield i
-
-
-def save_predictions(output_dir, key, pred_buffer, video_container, remove_nan=False):
-    outpath = make_output_path(output_dir, key)
-    create_directories_safe(os.path.dirname(outpath))
-    logger.debug("Saving predictions for %s", key)
-    if video_container:
-        fc = bob.bio.video.FrameContainer()
-        for i, v in enumerate(pred_buffer[key]):
-            if remove_nan and np.isnan(v):
-                continue
-            fc.add(i, v)
-        data = fc
-    else:
-        if remove_nan:
-            pred_buffer[key] = np.array(pred_buffer[key])
-            pred_buffer[key] = pred_buffer[key][~np.isnan(pred_buffer[key])]
-        data = np.mean(pred_buffer[key], axis=0)
-    save(data, outpath)
-
-
-@click.command(entry_point_group="bob.learn.tensorflow.config", cls=ConfigCommand)
-@click.option(
-    "--estimator",
-    "-e",
-    required=True,
-    cls=ResourceOption,
-    entry_point_group="bob.learn.tensorflow.estimator",
-    help="The estimator that will be evaluated.",
-)
-@click.option(
-    "--database",
-    "-d",
-    required=True,
-    cls=ResourceOption,
-    entry_point_group="bob.bio.database",
-    help="A bio database. Its original_directory must point to the correct " "path.",
-)
-@click.option(
-    "--biofiles",
-    required=True,
-    cls=ResourceOption,
-    help="The list of the bio files. You can only provide this through config "
-    "files.",
-)
-@click.option(
-    "--bio-predict-input-fn",
-    required=True,
-    cls=ResourceOption,
-    entry_point_group="bob.learn.tensorflow.biogenerator_input",
-    help="A callable with the signature of "
-    "`input_fn = bio_predict_input_fn(generator, output_types, output_shapes)`"
-    " The inputs are documented in :any:`tf.data.Dataset.from_generator`"
-    " and the output should be a function with no arguments and is passed"
-    " to :any:`tf.estimator.Estimator.predict`.",
-)
-@click.option(
-    "--output-dir",
-    "-o",
-    required=True,
-    cls=ResourceOption,
-    help="The directory to save the predictions.",
-)
-@click.option(
-    "--load-data",
-    cls=ResourceOption,
-    entry_point_group="bob.learn.tensorflow.load_data",
-    help="A callable with the signature of "
-    "``data = load_data(database, biofile)``. "
-    ":any:`bob.bio.base.read_original_data` is used by default.",
-)
-@click.option(
-    "--hooks",
-    cls=ResourceOption,
-    multiple=True,
-    entry_point_group="bob.learn.tensorflow.hook",
-    help="List of SessionRunHook subclass instances.",
-)
-@click.option(
-    "--predict-keys",
-    "-k",
-    multiple=True,
-    default=None,
-    cls=ResourceOption,
-    help="List of `str`, name of the keys to predict. It is used if the "
-    "`EstimatorSpec.predictions` is a `dict`. If `predict_keys` is used "
-    "then rest of the predictions will be filtered from the dictionary. "
-    "If `None`, returns all.",
-)
-@click.option(
-    "--checkpoint-path",
-    "-c",
-    cls=ResourceOption,
-    help="Path of a specific checkpoint to predict. If `None`, the "
-    "latest checkpoint in `model_dir` is used. This can also "
-    'be a folder which contains a "checkpoint" file where the '
-    "latest checkpoint from inside this file will be used as "
-    "checkpoint_path.",
-)
-@click.option(
-    "--multiple-samples",
-    "-m",
-    is_flag=True,
-    cls=ResourceOption,
-    help="If provided, it assumes that the db interface returns "
-    "several samples from a biofile. This option can be used "
-    "when you are working with videos.",
-)
-@click.option(
-    "--array",
-    "-t",
-    type=click.INT,
-    default=1,
-    cls=ResourceOption,
-    help="Use this option alongside gridtk to submit this script as " "an array job.",
-)
-@click.option(
-    "--force",
-    "-f",
-    is_flag=True,
-    cls=ResourceOption,
-    help="Whether to overwrite existing predictions.",
-)
-@click.option(
-    "--video-container",
-    "-vc",
-    is_flag=True,
-    cls=ResourceOption,
-    help="If provided, the predictions will be written in FrameContainers from"
-    " bob.bio.video. You need to install bob.bio.video as well.",
-)
-@click.option(
-    "--remove-nan",
-    "-rn",
-    is_flag=True,
-    cls=ResourceOption,
-    help="If provided, will remove nans before computing the mean or remove nans "
-    "from the frame container.",
-)
-@verbosity_option(cls=ResourceOption)
-def predict_bio(
-    estimator,
-    database,
-    biofiles,
-    bio_predict_input_fn,
-    output_dir,
-    load_data,
-    hooks,
-    predict_keys,
-    checkpoint_path,
-    multiple_samples,
-    array,
-    force,
-    video_container,
-    remove_nan,
-    **kwargs
-):
-    """Saves predictions or embeddings of tf.estimators.
-
-    This script works with bob.bio.base databases. This script works with
-    tensorflow 1.4 and above.
-
-    An example configuration for a trained model and its evaluation could be::
-
-        import tensorflow as tf
-
-        # define the database:
-        from bob.bio.base.test.dummy.database import database
-
-        # load the estimator model
-        estimator = tf.estimator.Estimator(model_fn, model_dir)
-
-        groups = ['dev']
-        biofiles = database.all_files(groups)
-
-
-        # the ``dataset = tf.data.Dataset.from_generator(generator,
-        # output_types, output_shapes)`` line is mandatory in the function
-        # below. You have to create it in your configuration file since you
-        # want it to be created in the same graph as your model.
-        def bio_predict_input_fn(generator, output_types, output_shapes):
-            def input_fn():
-                dataset = tf.data.Dataset.from_generator(
-                    generator, output_types, output_shapes)
-                # apply all kinds of transformations here, process the data
-                # even further if you want.
-                dataset = dataset.prefetch(1)
-                dataset = dataset.batch(10**3)
-                images, labels, keys = dataset.make_one_shot_iterator().get_next()
-
-                return {'data': images, 'key': keys}, labels
-            return input_fn
-    """
-    log_parameters(logger, ignore=("biofiles",))
-    logger.debug("len(biofiles): %d", len(biofiles))
-
-    assert len(biofiles), "biofiles are empty!"
-
-    if array > 1:
-        start, end = indices(biofiles, array)
-        biofiles = biofiles[start:end]
-
-    # filter the existing files
-    paths = [make_output_path(output_dir, f.make_path("", "")) for f in biofiles]
-    indexes = non_existing_files(paths, force)
-    biofiles = [biofiles[i] for i in indexes]
-
-    if len(biofiles) == 0:
-        logger.warning("The biofiles are empty after checking for existing files.")
-        return
-
-    generator = BioGenerator(
-        database, biofiles, load_data=load_data, multiple_samples=multiple_samples
-    )
-
-    predict_input_fn = bio_predict_input_fn(
-        generator, generator.output_types, generator.output_shapes
-    )
-
-    logger.info("Saving the predictions of %d files in %s", len(generator), output_dir)
-    generic_predict(
-        estimator=estimator,
-        predict_input_fn=predict_input_fn,
-        output_dir=output_dir,
-        predict_keys=predict_keys,
-        checkpoint_path=checkpoint_path,
-        hooks=hooks,
-        video_container=video_container,
-        remove_nan=remove_nan,
-    )
-
-
-@click.command(entry_point_group="bob.learn.tensorflow.config", cls=ConfigCommand)
-@click.option(
-    "--estimator",
-    "-e",
-    required=True,
-    cls=ResourceOption,
-    entry_point_group="bob.learn.tensorflow.estimator",
-    help="The estimator that will be evaluated.",
-)
-@click.option(
-    "--predict-input-fn",
-    required=True,
-    cls=ResourceOption,
-    entry_point_group="bob.learn.tensorflow.input_fn",
-    help="A callable with no arguments which will be used in estimator.predict.",
-)
-@click.option(
-    "--output-dir",
-    "-o",
-    required=True,
-    cls=ResourceOption,
-    help="The directory to save the predictions.",
-)
-@click.option(
-    "--predict-keys",
-    "-k",
-    multiple=True,
-    default=None,
-    cls=ResourceOption,
-    help="List of `str`, name of the keys to predict. It is used if the "
-    "`EstimatorSpec.predictions` is a `dict`. If `predict_keys` is used "
-    "then rest of the predictions will be filtered from the dictionary. "
-    "If `None`, returns all.",
-)
-@click.option(
-    "--checkpoint-path",
-    "-c",
-    cls=ResourceOption,
-    help="Path of a specific checkpoint to predict. If `None`, the "
-    "latest checkpoint in `model_dir` is used. This can also "
-    'be a folder which contains a "checkpoint" file where the '
-    "latest checkpoint from inside this file will be used as "
-    "checkpoint_path.",
-)
-@click.option(
-    "--hooks",
-    cls=ResourceOption,
-    multiple=True,
-    entry_point_group="bob.learn.tensorflow.hook",
-    help="List of SessionRunHook subclass instances.",
-)
-@click.option(
-    "--video-container",
-    "-vc",
-    is_flag=True,
-    cls=ResourceOption,
-    help="If provided, the predictions will be written in FrameContainers from"
-    " bob.bio.video. You need to install bob.bio.video as well.",
-)
-@click.option(
-    "--remove-nan",
-    "-rn",
-    is_flag=True,
-    cls=ResourceOption,
-    help="If provided, will remove nans before computing the mean or remove nans "
-    "from the frame container.",
-)
-@verbosity_option(cls=ResourceOption)
-def predict(
-    estimator,
-    predict_input_fn,
-    output_dir,
-    predict_keys,
-    checkpoint_path,
-    hooks,
-    video_container,
-    remove_nan,
-    **kwargs
-):
-
-    generic_predict(
-        estimator=estimator,
-        predict_input_fn=predict_input_fn,
-        output_dir=output_dir,
-        predict_keys=predict_keys,
-        checkpoint_path=checkpoint_path,
-        hooks=hooks,
-        video_container=video_container,
-        remove_nan=remove_nan,
-    )
-
-
-def generic_predict(
-    estimator,
-    predict_input_fn,
-    output_dir,
-    predict_keys=None,
-    checkpoint_path=None,
-    hooks=None,
-    video_container=False,
-    remove_nan=False,
-):
-    # if the checkpoint_path is a directory, pick the latest checkpoint from
-    # that directory
-    if checkpoint_path:
-        if os.path.isdir(checkpoint_path):
-            ckpt = tf.train.get_checkpoint_state(checkpoint_path)
-            if ckpt and ckpt.model_checkpoint_path:
-                checkpoint_path = ckpt.model_checkpoint_path
-
-        logger.info("Restoring the model from %s", checkpoint_path)
-
-    predictions = estimator.predict(
-        predict_input_fn,
-        predict_keys=predict_keys,
-        hooks=hooks,
-        checkpoint_path=checkpoint_path,
-    )
-
-    if video_container:
-        try:
-            import bob.bio.video
-        except ModuleNotFoundError:
-            raise click.ClickException(
-                "Could not import bob.bio.video. Have you installed it?"
-            )
-
-    pred_buffer = defaultdict(list)
-    for i, pred in enumerate(predictions):
-        key = pred["key"]
-        # key is in bytes format in Python 3
-        if sys.version_info >= (3,):
-            key = key.decode(errors="replace")
-        if predict_keys:
-            prob = pred[predict_keys[0]]
-        else:
-            prob = pred.get(
-                "probabilities", pred.get("embeddings", pred.get("predictions"))
-            )
-        assert prob is not None
-        pred_buffer[key].append(prob)
-        if i == 0:
-            last_key = key
-        if last_key == key:
-            continue
-        else:
-            save_predictions(output_dir, last_key, pred_buffer, video_container, remove_nan)
-            # delete saved data so we don't run out of RAM
-            del pred_buffer[last_key]
-            # start saving this new key
-            last_key = key
-
-    try:
-        key
-        # save the final returned key as well:
-        save_predictions(output_dir, key, pred_buffer, video_container, remove_nan)
-    except UnboundLocalError:
-        # if the input_fn was empty and hence key is not defined
-        click.echo("predict_input_fn returned no samples.")
-        pass
diff --git a/bob/learn/tensorflow/script/style_transfer.py b/bob/learn/tensorflow/script/style_transfer.py
deleted file mode 100644
index 85c1db77b6e58d24fed3aa942c2dbbab7a17ec3f..0000000000000000000000000000000000000000
--- a/bob/learn/tensorflow/script/style_transfer.py
+++ /dev/null
@@ -1,199 +0,0 @@
-#!/usr/bin/env python
-"""Trains networks using Tensorflow estimators.
-"""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-import logging
-import click
-import tensorflow as tf
-from bob.extension.scripts.click_helper import (verbosity_option,
-                                                ConfigCommand, ResourceOption)
-import bob.io.image
-import bob.io.base
-import numpy
-import bob.ip.base
-import bob.ip.color
-import sys
-import os
-from bob.learn.tensorflow.style_transfer import do_style_transfer
-
-
-logger = logging.getLogger(__name__)
-
-
-@click.command(
-    entry_point_group='bob.learn.tensorflow.config', cls=ConfigCommand)
-@click.argument('content_image_path', required=True)
-@click.argument('output_path', required=True)
-@click.option('--style-image-paths',
-              cls=ResourceOption,
-              required=True,
-              multiple=True,
-              entry_point_group='bob.learn.tensorflow.style_images',
-              help='List of images that encodes the style.')
-@click.option('--architecture',
-              '-a',
-              required=True,
-              cls=ResourceOption,
-              entry_point_group='bob.learn.tensorflow.architecture',
-              help='The base architecure.')
-@click.option('--checkpoint-dir',
-              '-c',
-              required=True,
-              cls=ResourceOption,
-              help='CNN checkpoint path')
-@click.option('--iterations',
-              '-i',
-              type=click.types.INT,
-              help='Number of iterations to generate the image',
-              default=1000)
-@click.option('--learning_rate',
-              '-r',
-              type=click.types.FLOAT,
-              help='Learning rate.',
-              default=1.)
-@click.option('--content-weight',
-              type=click.types.FLOAT,
-              help='Weight of the content loss.',
-              default=5.)
-@click.option('--style-weight',
-              type=click.types.FLOAT,
-              help='Weight of the style loss.',
-              default=100.)
-@click.option('--denoise-weight',
-              type=click.types.FLOAT,
-              help='Weight denoising loss.',
-              default=100.)
-@click.option('--content-end-points',
-              cls=ResourceOption,
-              multiple=True,
-              entry_point_group='bob.learn.tensorflow.end_points',
-              help='List of end_points for the used to encode the content')
-@click.option('--style-end-points',
-              cls=ResourceOption,
-              multiple=True,
-              entry_point_group='bob.learn.tensorflow.end_points',
-              help='List of end_points for the used to encode the style')
-@click.option('--scopes',
-              cls=ResourceOption,
-              entry_point_group='bob.learn.tensorflow.scopes',
-              help='Dictionary containing the mapping scores',
-              required=True)
-@click.option('--pure-noise',
-               is_flag=True,
-               help="If set will save the raw noisy generated image."
-                    "If not set, the output will be RGB = stylizedYUV.Y, originalYUV.U, originalYUV.V"
-              )
-@click.option('--preprocess-fn',
-              '-pr',
-              cls=ResourceOption,
-              entry_point_group='bob.learn.tensorflow.preprocess_fn',
-              help='Preprocess function. Pointer to a function that preprocess the INPUT signal')
-@click.option('--un-preprocess-fn',
-              '-un',
-              cls=ResourceOption,
-              entry_point_group='bob.learn.tensorflow.preprocess_fn',
-              help='Un preprocess function. Pointer to a function that preprocess the OUTPUT signal')
-@click.option(
-  '--start-from',
-  '-sf',
-  cls=ResourceOption,
-  default="noise",
-  type=click.Choice(["noise", "content", "style"]),
-  help="Starts from this image for reconstruction",
-)
-@verbosity_option(cls=ResourceOption)
-def style_transfer(content_image_path, output_path, style_image_paths,
-                   architecture, checkpoint_dir,
-                   iterations, learning_rate,
-                   content_weight, style_weight, denoise_weight, content_end_points,
-                   style_end_points, scopes, pure_noise, preprocess_fn,
-                   un_preprocess_fn, start_from, **kwargs):
-    """
-     Trains neural style transfer using the approach presented in:
-
-    Gatys, Leon A., Alexander S. Ecker, and Matthias Bethge. "A neural algorithm of artistic style." arXiv preprint arXiv:1508.06576 (2015).
-
-    \b
-
-    If you want run a style transfer using InceptionV2 as basis, use the following template
-
-    Below follow a CONFIG template
-
-    CONFIG.PY
-    ```
-
-       from bob.extension import rc
-
-       from bob.learn.tensorflow.network import inception_resnet_v2_batch_norm
-       architecture = inception_resnet_v2_batch_norm
-
-       checkpoint_dir = rc["bob.bio.face_ongoing.idiap_casia_inception_v2_centerloss_rgb"]
-
-       style_end_points = ["Conv2d_1a_3x3", "Conv2d_2b_3x3", "Conv2d_3b_1x1", "Conv2d_4a_3x3"]
-
-       content_end_points = ["Bottleneck", "PreLogitsFlatten"]
-
-       scopes = {"InceptionResnetV2/":"InceptionResnetV2/"}
-
-    ```
-    \b
-
-    Then run::
-
-       $ bob tf style <content-image> <output-image> --style-image-paths <style-image> CONFIG.py
-
-
-    You can also provide a list of images to encode the style using the config file as in the example below.
-
-    CONFIG.PY
-    ```
-
-       from bob.extension import rc
-
-       from bob.learn.tensorflow.network import inception_resnet_v2_batch_norm
-       architecture = inception_resnet_v2_batch_norm
-
-       checkpoint_dir = rc["bob.bio.face_ongoing.idiap_casia_inception_v2_centerloss_rgb"]
-
-       style_end_points = ["Conv2d_1a_3x3", "Conv2d_2b_3x3", "Conv2d_3b_1x1", "Conv2d_4a_3x3"]
-
-       content_end_points = ["Bottleneck", "PreLogitsFlatten"]
-
-       scopes = {"InceptionResnetV2/":"InceptionResnetV2/"}
-
-       style_image_paths = ["STYLE_1.png",
-                            "STYLE_2.png"]
-
-    ```
-
-    Then run::
-
-       $ bob tf style <content-image> <output-image> CONFIG.py
-
-    \b \b
-
-    """
-
-    logger.info("Style transfer, content_image={0}, style_image={1}".format(content_image_path, style_image_paths))
-
-    # Loading content image
-    content_image = bob.io.base.load(content_image_path)
-
-    # Reading and converting to the tensorflow format
-    style_images = []
-    for path in style_image_paths:
-        style_images.append(bob.io.base.load(path))
-
-    output = do_style_transfer(content_image, style_images,
-                               architecture, checkpoint_dir, scopes,
-                               content_end_points, style_end_points,
-                               preprocess_fn=preprocess_fn, un_preprocess_fn=un_preprocess_fn,
-                               pure_noise=pure_noise,
-                               iterations=iterations, learning_rate=learning_rate,
-                               content_weight=content_weight, style_weight=style_weight,
-                               denoise_weight=denoise_weight, start_from=start_from)
-
-    os.makedirs(os.path.dirname(output_path), exist_ok=True)
-    bob.io.base.save(output, output_path)
diff --git a/bob/learn/tensorflow/script/train.py b/bob/learn/tensorflow/script/train.py
deleted file mode 100644
index d33d5767abb82a3c012805df4117cce227df3bf5..0000000000000000000000000000000000000000
--- a/bob/learn/tensorflow/script/train.py
+++ /dev/null
@@ -1,61 +0,0 @@
-#!/usr/bin/env python
-"""Trains networks using Tensorflow estimators.
-"""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-import logging
-import click
-from bob.extension.scripts.click_helper import (
-    verbosity_option, ConfigCommand, ResourceOption, log_parameters)
-
-logger = logging.getLogger(__name__)
-
-
-@click.command(
-    entry_point_group='bob.learn.tensorflow.config', cls=ConfigCommand)
-@click.option(
-    '--estimator',
-    '-e',
-    required=True,
-    cls=ResourceOption,
-    entry_point_group='bob.learn.tensorflow.estimator',
-    help='The estimator that will be trained.')
-@click.option(
-    '--train-input-fn',
-    '-i',
-    required=True,
-    cls=ResourceOption,
-    entry_point_group='bob.learn.tensorflow.input_fn',
-    help='The ``input_fn`` that will be given to '
-    ':any:`tf.estimator.Estimator.train`.')
-@click.option(
-    '--hooks',
-    cls=ResourceOption,
-    multiple=True,
-    entry_point_group='bob.learn.tensorflow.hook',
-    help='List of SessionRunHook subclass instances. Used for callbacks '
-    'inside the training loop.')
-@click.option(
-    '--steps',
-    '-s',
-    cls=ResourceOption,
-    type=click.types.INT,
-    help='Number of steps for which to train model. See '
-    ':any:`tf.estimator.Estimator.train`.')
-@click.option(
-    '--max-steps',
-    '-m',
-    cls=ResourceOption,
-    type=click.types.INT,
-    help='Number of total steps for which to train model. See '
-    ':any:`tf.estimator.Estimator.train`.')
-@verbosity_option(cls=ResourceOption)
-def train(estimator, train_input_fn, hooks, steps, max_steps, **kwargs):
-    """Trains networks using Tensorflow estimators."""
-    log_parameters(logger)
-
-    # Train
-    logger.info("Training a model in %s", estimator.model_dir)
-    estimator.train(
-        input_fn=train_input_fn, hooks=hooks, steps=steps, max_steps=max_steps)
diff --git a/bob/learn/tensorflow/script/train_and_evaluate.py b/bob/learn/tensorflow/script/train_and_evaluate.py
deleted file mode 100644
index 3ba4ebde0c874750ef97ea6ec8a942c1a3d5bc85..0000000000000000000000000000000000000000
--- a/bob/learn/tensorflow/script/train_and_evaluate.py
+++ /dev/null
@@ -1,71 +0,0 @@
-#!/usr/bin/env python
-"""Trains and evaluates a network using Tensorflow estimators.
-"""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-import tensorflow as tf
-from bob.learn.tensorflow.utils.hooks import EarlyStopException
-import logging
-import click
-from bob.extension.scripts.click_helper import (verbosity_option,
-                                                ConfigCommand, ResourceOption)
-
-logger = logging.getLogger(__name__)
-
-
-@click.command(
-    entry_point_group='bob.learn.tensorflow.config', cls=ConfigCommand)
-@click.option(
-    '--estimator',
-    '-e',
-    required=True,
-    cls=ResourceOption,
-    entry_point_group='bob.learn.tensorflow.estimator',
-    help='The estimator that will be trained and evaluated.')
-@click.option(
-    '--train-spec',
-    '-it',
-    required=True,
-    cls=ResourceOption,
-    entry_point_group='bob.learn.tensorflow.trainspec',
-    help='See :any:`tf.estimator.Estimator.train_and_evaluate`.')
-@click.option(
-    '--eval-spec',
-    '-ie',
-    required=True,
-    cls=ResourceOption,
-    entry_point_group='bob.learn.tensorflow.evalspec',
-    help='See :any:`tf.estimator.Estimator.train_and_evaluate`.')
-@click.option(
-    '--exit-ok-exceptions',
-    cls=ResourceOption,
-    multiple=True,
-    default=(EarlyStopException, ),
-    show_default=True,
-    entry_point_group='bob.learn.tensorflow.exception',
-    help='A list of exceptions to exit properly if they occur. If nothing is '
-    'provided, the EarlyStopException is handled by default.')
-@verbosity_option(cls=ResourceOption)
-def train_and_evaluate(estimator, train_spec, eval_spec, exit_ok_exceptions,
-                       **kwargs):
-    """Trains and evaluates a network using Tensorflow estimators.
-
-    This script calls the estimator.train_and_evaluate function. Please see:
-    https://www.tensorflow.org/api_docs/python/tf/estimator/train_and_evaluate
-    https://www.tensorflow.org/api_docs/python/tf/estimator/TrainSpec
-    https://www.tensorflow.org/api_docs/python/tf/estimator/EvalSpec
-    for more details.
-    """
-    logger.debug('estimator: %s', estimator)
-    logger.debug('train_spec: %s', train_spec)
-    logger.debug('eval_spec: %s', eval_spec)
-    logger.debug('exit_ok_exceptions: %s', exit_ok_exceptions)
-    logger.debug('kwargs: %s', kwargs)
-
-    # Train and evaluate
-    try:
-        tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)
-    except exit_ok_exceptions as e:
-        logger.exception(e)
-        return
diff --git a/bob/learn/tensorflow/script/trim.py b/bob/learn/tensorflow/script/trim.py
deleted file mode 100644
index d89bb292a75e75b191efc7e9542a083d4e9d9c45..0000000000000000000000000000000000000000
--- a/bob/learn/tensorflow/script/trim.py
+++ /dev/null
@@ -1,87 +0,0 @@
-#!/usr/bin/env python
-"""Deletes extra tensorflow checkpoints.
-"""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-import click
-import logging
-import os
-import glob
-import tensorflow as tf
-from bob.extension.scripts.click_helper import verbosity_option, log_parameters
-
-logger = logging.getLogger(__name__)
-
-
-def delete_extra_checkpoints(directory, keep_last_n, dry_run):
-    try:
-        ckpt = tf.train.get_checkpoint_state(directory)
-    except Exception:
-        return
-    if (not ckpt) or (not ckpt.model_checkpoint_path):
-        logger.debug('Could not find a checkpoint in %s', directory)
-        return
-    for checkpoint_path in ckpt.all_model_checkpoint_paths[:-keep_last_n]:
-        if checkpoint_path == ckpt.model_checkpoint_path:
-            continue
-        if dry_run:
-            click.echo('Would delete {}.*'.format(checkpoint_path))
-        else:
-            logger.info('Deleting %s.*', checkpoint_path)
-            for path in glob.glob('{}.*'.format(checkpoint_path)):
-                os.remove(path)
-
-    def _existing(x):
-        return glob.glob('{}.*'.format(x))
-
-    # update the checkpoint file
-    all_paths = filter(_existing, ckpt.all_model_checkpoint_paths)
-    all_paths = list(map(os.path.basename, all_paths))
-    model_checkpoint_path = os.path.basename(ckpt.model_checkpoint_path)
-    tf.train.update_checkpoint_state(
-        directory, model_checkpoint_path, all_paths)
-
-
-@click.command(epilog='''\b
-Examples:
-$ bob tf trim -vv ~/my_models/model_dir
-$ bob tf trim -vv ~/my_models/model_dir1 ~/my_models/model_dir2
-$ bob tf trim -vvr ~/my_models
-$ bob tf trim -vvrn ~/my_models
-$ bob tf trim -vvrK 2 ~/my_models
-''')
-@click.argument(
-    'root_dirs',
-    nargs=-1,
-    type=click.Path(exists=True, file_okay=False, dir_okay=True),
-)
-@click.option(
-    '--keep-last-n-models',
-    '-K',
-    type=click.INT,
-    default=1,
-    show_default=True,
-    help='The number of recent checkpoints to keep.')
-@click.option(
-    '--recurse',
-    '-r',
-    is_flag=True,
-    help='If given, it will delete checkpoints recursively.')
-@click.option(
-    '--dry-run',
-    '-n',
-    is_flag=True,
-    help='If given, will only print what will be deleted.')
-@verbosity_option()
-def trim(root_dirs, keep_last_n_models, recurse, dry_run, **kwargs):
-    """Deletes extra tensorflow checkpoints."""
-    log_parameters(logger)
-
-    for root_dir in root_dirs:
-        if recurse:
-            for directory, _, _ in os.walk(root_dir):
-                delete_extra_checkpoints(directory, keep_last_n_models,
-                                         dry_run)
-        else:
-            delete_extra_checkpoints(root_dir, keep_last_n_models, dry_run)
diff --git a/bob/learn/tensorflow/script/utils.py b/bob/learn/tensorflow/script/utils.py
deleted file mode 100644
index 12b360c00dff65ca02d8825ce95fcd68d5b14da3..0000000000000000000000000000000000000000
--- a/bob/learn/tensorflow/script/utils.py
+++ /dev/null
@@ -1,27 +0,0 @@
-import click
-
-
-def eager_execution_option(**kwargs):
-    """Adds an option to your command to enable eager execution of tensorflow
-
-    Returns
-    -------
-     callable
-      A decorator to be used for adding this option to click commands
-    """
-    def custom_eager_execution_option(func):
-        def callback(ctx, param, value):
-            if not value or ctx.resilient_parsing:
-                return
-            import tensorflow as tf
-            tf.enable_eager_execution()
-            if not tf.executing_eagerly():
-                raise click.ClickException(
-                    "Could not enable tensorflow eager execution mode!")
-            else:
-                click.echo("Executing tensorflow operations eagerly!")
-
-        return click.option(
-            '-e', '--eager', is_flag=True, callback=callback,
-            expose_value=False, is_eager=True, **kwargs)(func)
-    return custom_eager_execution_option
diff --git a/bob/learn/tensorflow/scripts/__init__.py b/bob/learn/tensorflow/scripts/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..435eb712f6b140c26c5402f125616b558e7e223d
--- /dev/null
+++ b/bob/learn/tensorflow/scripts/__init__.py
@@ -0,0 +1,2 @@
+# gets sphinx autodoc done right - don't remove it
+__all__ = [_ for _ in dir() if not _.startswith("_")]
diff --git a/bob/learn/tensorflow/scripts/datasets_to_tfrecords.py b/bob/learn/tensorflow/scripts/datasets_to_tfrecords.py
new file mode 100644
index 0000000000000000000000000000000000000000..96f16e7ae1e0e93752e0c383a4c769665de3e6ad
--- /dev/null
+++ b/bob/learn/tensorflow/scripts/datasets_to_tfrecords.py
@@ -0,0 +1,69 @@
+"""Convert datasets to TFRecords
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import logging
+
+import click
+
+from bob.extension.scripts.click_helper import ConfigCommand
+from bob.extension.scripts.click_helper import ResourceOption
+from bob.extension.scripts.click_helper import verbosity_option
+
+
+logger = logging.getLogger(__name__)
+
+
+@click.command(entry_point_group="bob.learn.tensorflow.config", cls=ConfigCommand)
+@click.option(
+    "--dataset",
+    required=True,
+    cls=ResourceOption,
+    entry_point_group="bob.learn.tensorflow.dataset",
+    help="A tf.data.Dataset to be used.",
+)
+@click.option(
+    "--output", "-o", required=True, cls=ResourceOption, help="Name of the output file."
+)
+@click.option(
+    "--force",
+    "-f",
+    is_flag=True,
+    cls=ResourceOption,
+    help="Whether to overwrite existing files.",
+)
+@verbosity_option(cls=ResourceOption)
+def datasets_to_tfrecords(dataset, output, force, **kwargs):
+    """Converts tensorflow datasets into TFRecords.
+    Takes a list of datasets and outputs and writes each dataset into its output.
+    ``datasets`` and ``outputs`` variables must be lists.
+    You can convert the written TFRecord files back to datasets using
+    :any:`bob.learn.tensorflow.dataset.tfrecords.dataset_from_tfrecord`.
+
+    To use this script with SGE, change your dataset (like shard it) and output a part
+    of the dataset based on the SGE_TASK_ID environment variable in your config file.
+    """
+    from bob.extension.scripts.click_helper import log_parameters
+    import os
+    from bob.learn.tensorflow.data.tfrecords import dataset_to_tfrecord
+    from bob.learn.tensorflow.data.tfrecords import tfrecord_name_and_json_name
+
+    log_parameters(logger)
+
+    output, json_output = tfrecord_name_and_json_name(output)
+    if not force and os.path.isfile(output):
+        click.echo("Output file already exists: {}".format(output))
+        return
+
+    click.echo("Writing tfrecod to: {}".format(output))
+    os.makedirs(os.path.dirname(output), exist_ok=True)
+    try:
+        dataset_to_tfrecord(dataset, output)
+    except Exception:
+        click.echo("Something failed. Deleting unfinished files.")
+        os.remove(output)
+        os.remove(json_output)
+        raise
+    click.echo("Successfully wrote all files.")
diff --git a/bob/learn/tensorflow/scripts/fit.py b/bob/learn/tensorflow/scripts/fit.py
new file mode 100644
index 0000000000000000000000000000000000000000..bb3085924b3d9c7f5ff768c06f22d03f6d3b505a
--- /dev/null
+++ b/bob/learn/tensorflow/scripts/fit.py
@@ -0,0 +1,120 @@
+#!/usr/bin/env python
+"""Trains networks using Keras Models.
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import json
+import logging
+import os
+
+import click
+import tensorflow as tf
+
+from bob.extension.scripts.click_helper import ConfigCommand
+from bob.extension.scripts.click_helper import ResourceOption
+from bob.extension.scripts.click_helper import log_parameters
+from bob.extension.scripts.click_helper import verbosity_option
+
+logger = logging.getLogger(__name__)
+
+
+@click.command(entry_point_group="bob.learn.tensorflow.config", cls=ConfigCommand)
+@click.option(
+    "--model-fn",
+    "-m",
+    required=True,
+    cls=ResourceOption,
+    entry_point_group="bob.learn.tensorflow.model",
+    help="The keras model that will be trained.",
+)
+@click.option(
+    "--train-input-fn",
+    "-i",
+    required=True,
+    cls=ResourceOption,
+    entry_point_group="bob.learn.tensorflow.input_fn",
+    help="A function that will return the training data as a tf.data.Dataset "
+    "or tf.data.Iterator. This will be given as `x` to "
+    "tf.keras.Model.fit.",
+)
+@click.option(
+    "--epochs",
+    "-e",
+    default=10,
+    type=click.types.INT,
+    cls=ResourceOption,
+    help="Number of epochs to train model. See " "tf.keras.Model.fit.",
+)
+@click.option(
+    "--callbacks",
+    cls=ResourceOption,
+    multiple=True,
+    entry_point_group="bob.learn.tensorflow.callback",
+    help="List of tf.keras.callbacks. Used for callbacks " "inside the training loop.",
+)
+@click.option(
+    "--eval-input-fn",
+    "-i",
+    cls=ResourceOption,
+    entry_point_group="bob.learn.tensorflow.input_fn",
+    help="A function that will return the validation data as a tf.data.Dataset"
+    " or tf.data.Iterator. This will be given as `validation_data` to "
+    "tf.keras.Model.fit.",
+)
+@click.option(
+    "--class-weight", "-c", cls=ResourceOption, help="See tf.keras.Model.fit."
+)
+@click.option(
+    "--steps-per-epoch",
+    type=click.types.INT,
+    cls=ResourceOption,
+    help="See tf.keras.Model.fit.",
+)
+@click.option(
+    "--validation-steps",
+    type=click.types.INT,
+    cls=ResourceOption,
+    help="See tf.keras.Model.fit.",
+)
+@verbosity_option(cls=ResourceOption)
+def fit(
+    model_fn,
+    train_input_fn,
+    epochs,
+    verbose,
+    callbacks,
+    eval_input_fn,
+    class_weight,
+    steps_per_epoch,
+    validation_steps,
+    **kwargs
+):
+    """Trains networks using Keras models."""
+    log_parameters(logger)
+
+    # Train
+    save_callback = [
+        c for c in callbacks if isinstance(c, tf.keras.callbacks.ModelCheckpoint)
+    ]
+    model_dir = None
+    if save_callback:
+        model_dir = save_callback[0].filepath
+        logger.info("Training a model in %s", model_dir)
+    model = model_fn()
+
+    history = model.fit(
+        x=train_input_fn(),
+        epochs=epochs,
+        verbose=max(verbose, 2),
+        callbacks=list(callbacks) if callbacks else None,
+        validation_data=None if eval_input_fn is None else eval_input_fn(),
+        class_weight=class_weight,
+        steps_per_epoch=steps_per_epoch,
+        validation_steps=validation_steps,
+    )
+    click.echo(history.history)
+    if model_dir is not None:
+        with open(os.path.join(model_dir, "keras_fit_history.json"), "w") as f:
+            json.dump(history.history, f)
diff --git a/bob/learn/tensorflow/script/keras.py b/bob/learn/tensorflow/scripts/keras.py
similarity index 65%
rename from bob/learn/tensorflow/script/keras.py
rename to bob/learn/tensorflow/scripts/keras.py
index 9f6ee9c24b09f174ceba366b98e4d17cf0ebbdd0..3670421eaeba52bf2f40db2384f8c0f6e5e7c198 100644
--- a/bob/learn/tensorflow/script/keras.py
+++ b/bob/learn/tensorflow/scripts/keras.py
@@ -3,13 +3,12 @@
 import click
 import pkg_resources
 from click_plugins import with_plugins
+
 from bob.extension.scripts.click_helper import AliasedGroup
-from .utils import eager_execution_option
 
 
-@with_plugins(pkg_resources.iter_entry_points('bob.learn.tensorflow.keras_cli'))
+@with_plugins(pkg_resources.iter_entry_points("bob.learn.tensorflow.keras_cli"))
 @click.group(cls=AliasedGroup)
-@eager_execution_option()
 def keras():
     """Keras-related commands."""
     pass
diff --git a/bob/learn/tensorflow/script/tf.py b/bob/learn/tensorflow/scripts/tf.py
similarity index 66%
rename from bob/learn/tensorflow/script/tf.py
rename to bob/learn/tensorflow/scripts/tf.py
index f460df4333a100a9809b89d84012dec9ba0e3eb3..c7ccc63e485408d3a3683026b69ce3813f8b4d44 100644
--- a/bob/learn/tensorflow/script/tf.py
+++ b/bob/learn/tensorflow/scripts/tf.py
@@ -3,13 +3,12 @@
 import click
 import pkg_resources
 from click_plugins import with_plugins
+
 from bob.extension.scripts.click_helper import AliasedGroup
-from .utils import eager_execution_option
 
 
-@with_plugins(pkg_resources.iter_entry_points('bob.learn.tensorflow.cli'))
+@with_plugins(pkg_resources.iter_entry_points("bob.learn.tensorflow.cli"))
 @click.group(cls=AliasedGroup)
-@eager_execution_option()
 def tf():
     """Tensorflow-related commands."""
     pass
diff --git a/bob/learn/tensorflow/style_transfer/neural_transfer.py b/bob/learn/tensorflow/style_transfer/neural_transfer.py
deleted file mode 100644
index b65bd853273959a73f1499d34ab464ab26adc066..0000000000000000000000000000000000000000
--- a/bob/learn/tensorflow/style_transfer/neural_transfer.py
+++ /dev/null
@@ -1,276 +0,0 @@
-#!/usr/bin/env python
-# vim: set fileencoding=utf-8 :
-# @author: Tiago de Freitas Pereira <tiago.pereira@idiap.ch>
-
-
-import tensorflow as tf
-import numpy
-import os
-from bob.learn.tensorflow.loss import linear_gram_style_loss, content_loss, denoising_loss
-import bob.io.image
-import bob.ip.color
-
-import logging
-logger = logging.getLogger(__name__)
-
-
-def compute_features(input_image, architecture, checkpoint_dir, target_end_points, preprocess_fn=None):
-    """
-    For a given set of end_points, convolve the input image until these points
-
-    Parameters
-    ----------
-
-    input_image: :any:`numpy.array`
-        Input image in the format WxHxC
-
-    architecture:
-        Pointer to the architecture function
-
-    checkpoint_dir: str
-        DCNN checkpoint directory
-
-    end_points: dict
-       Dictionary containing the end point tensors
-
-    preprocess_fn:
-       Pointer to a preprocess function
-
-    """
-
-    input_pl = tf.placeholder('float32', shape=(1, input_image.shape[1],
-                                                   input_image.shape[2],
-                                                   input_image.shape[3]))
-
-    if preprocess_fn is None:
-        _, end_points = architecture(input_pl, mode=tf.estimator.ModeKeys.PREDICT, trainable_variables=None)
-    else:
-        _, end_points = architecture(tf.stack([preprocess_fn(i) for i in tf.unstack(input_pl)]), mode=tf.estimator.ModeKeys.PREDICT, trainable_variables=None)
-    with tf.Session() as sess:
-        # Restoring the checkpoint for the given architecture
-        sess.run(tf.global_variables_initializer())
-        saver = tf.train.Saver()
-
-        if os.path.isdir(checkpoint_dir):
-            saver.restore(sess, tf.train.latest_checkpoint(checkpoint_dir))
-        else:
-            saver.restore(sess, checkpoint_dir)
-
-        #content_feature = sess.run(end_points[CONTENT_END_POINTS], feed_dict={input_image: content_image})
-        features = sess.run([end_points[ep] for ep in target_end_points], feed_dict={input_pl: input_image})
-
-    # Killing the graph
-    tf.reset_default_graph()
-    return features
-
-
-def compute_gram(features):
-    """
-    Given a list of features (as numpy.arrays) comput the gram matrices of each
-    pinning the channel as in:
-
-    Gatys, Leon A., Alexander S. Ecker, and Matthias Bethge. "A neural algorithm of artistic style." arXiv preprint arXiv:1508.06576 (2015).
-
-    Parameters
-    ----------
-
-    features: :any:`numpy.array`
-      Convolved features in the format NxWxHxC
-
-    """
-
-    grams = []
-    for f in features:
-        f = numpy.reshape(f, (-1, f.shape[3]))
-        grams.append(numpy.matmul(f.T, f) / f.size)
-
-    return grams
-
-
-def do_style_transfer(content_image, style_images,
-                      architecture, checkpoint_dir, scopes,
-                      content_end_points, style_end_points,
-                      preprocess_fn=None, un_preprocess_fn=None, pure_noise=False,
-                      iterations=1000, learning_rate=0.1,
-                      content_weight=5., style_weight=500., denoise_weight=500., start_from="noise"):
-
-    """
-    Trains neural style transfer using the approach presented in:
-
-    Gatys, Leon A., Alexander S. Ecker, and Matthias Bethge. "A neural algorithm of artistic style." arXiv preprint arXiv:1508.06576 (2015).
-
-    Parameters
-    ----------
-
-    content_image: :any:`numpy.array`
-       Content image in the Bob format (C x W x H)
-
-    style_images: :any:`list`
-       List of numpy.array (Bob format (C x W x H)) that encodes the style
-
-    architecture:
-       Point to a function with the base architecture
-
-    checkpoint_dir:
-       CNN checkpoint path
-
-    scopes:
-       Dictionary containing the mapping scores
-
-    content_end_points:
-       List of end_points (from the architecture) for the used to encode the content
-
-    style_end_points:
-       List of end_points (from the architecture) for the used to encode the style
-
-    preprocess_fn:
-       Preprocess function. Pointer to a function that preprocess the INPUT signal
-
-    unpreprocess_fn:
-       Un preprocess function. Pointer to a function that preprocess the OUTPUT signal
-
-    pure_noise:
-       If set will save the raw noisy generated image.
-       If not set, the output will be RGB = stylizedYUV.Y, originalYUV.U, originalYUV.V
-
-    iterations:
-       Number of iterations to generate the image
-
-    learning_rate:
-       Adam learning rate
-
-    content_weight:
-       Weight of the content loss
-
-    style_weight:
-       Weight of the style loss
-
-    denoise_weight:
-       Weight denoising loss
-    """
-
-    def wise_shape(shape):
-        if len(shape)==2:
-            return (1, shape[0], shape[1], 1)
-        else:
-            return (1, shape[0], shape[1], shape[2])
-
-    def normalize4save(img):
-        return (255 * ((img - numpy.min(img)) / (numpy.max(img)-numpy.min(img)))).astype("uint8")
-
-    # Reshaping to NxWxHxC and converting to the tensorflow format
-    # content
-    original_image = content_image
-    content_image = bob.io.image.to_matplotlib(content_image).astype("float32")
-    content_image = numpy.reshape(content_image, wise_shape(content_image.shape))
-
-    # and style
-    for i in range(len(style_images)):
-        image = bob.io.image.to_matplotlib(style_images[i])
-        image = numpy.reshape(image, wise_shape(image.shape))
-        style_images[i] = image
-
-    # Base content features
-    logger.info("Computing content features")
-    content_features = compute_features(content_image, architecture, checkpoint_dir,
-                                        content_end_points, preprocess_fn)
-
-    # Base style features
-    logger.info("Computing style features")
-    style_grams = []
-    for image in style_images:
-        style_features = compute_features(image, architecture, checkpoint_dir,
-                                          style_end_points, preprocess_fn)
-        style_grams.append(compute_gram(style_features))
-
-    # Organizing the trainer
-    logger.info("Training.....")
-    with tf.Graph().as_default():
-        tf.set_random_seed(0)
-
-        # Random noise
-        if start_from == "noise":
-            starting_image = tf.random_normal(shape=content_image.shape) * 0.256
-        elif start_from == "content":
-            starting_image = preprocess_fn(content_image)
-        elif start_from == "style":
-            starting_image = preprocess_fn(style_images[0])
-        else:
-            raise ValueError(f"Unknown starting image: {start_from}")
-
-        noise = tf.Variable(starting_image, dtype="float32", trainable=True)
-        _, end_points = architecture(noise,
-                                      mode=tf.estimator.ModeKeys.PREDICT,
-                                      trainable_variables=[])
-
-        # Computing content loss
-        content_noises = []
-        for c in content_end_points:
-            content_noises.append(end_points[c])
-        c_loss = content_loss(content_noises, content_features)
-
-        # Computing style_loss
-        style_gram_noises = []
-        s_loss = 0
-
-        for grams_per_image in style_grams:
-
-            for c in style_end_points:
-                layer = end_points[c]
-                _, height, width, number = map(lambda i: i.value, layer.get_shape())
-                size = height * width * number
-                features = tf.reshape(layer, (-1, number))
-                style_gram_noises.append(tf.matmul(tf.transpose(features), features) / size)
-            s_loss += linear_gram_style_loss(style_gram_noises, grams_per_image)
-
-        # Variation denoise
-        d_loss = denoising_loss(noise)
-
-        #Total loss
-        total_loss = content_weight*c_loss + style_weight*s_loss + denoise_weight*d_loss
-
-        solver = tf.train.AdamOptimizer(learning_rate).minimize(total_loss)
-
-        tf.contrib.framework.init_from_checkpoint(tf.train.latest_checkpoint(checkpoint_dir) if os.path.isdir(checkpoint_dir) else checkpoint_dir, scopes)
-        # Training
-        with tf.Session() as sess:
-            sess.run(tf.global_variables_initializer())
-
-            for i in range(iterations):
-                _, loss = sess.run([solver, total_loss])
-                logger.info("Iteration {0}, loss {1}".format(i, loss))
-
-            # Saving generated image
-            raw_style_image = sess.run(noise)[0, :, :,:]
-            # Unpreprocessing the signal
-            if un_preprocess_fn is not None:
-                raw_style_image = un_preprocess_fn(raw_style_image)
-
-            raw_style_image = bob.io.image.to_bob(raw_style_image)
-            normalized_style_image = normalize4save(raw_style_image)
-
-            if pure_noise:
-                if normalized_style_image.shape[0] == 1:
-                    return normalized_style_image[0, :, :]
-                else:
-                    return normalized_style_image
-            else:
-                # Original output
-                if normalized_style_image.shape[0] == 1:
-                    normalized_style_image_yuv = bob.ip.color.rgb_to_yuv(bob.ip.color.gray_to_rgb(normalized_style_image[0,:,:]))
-                    # Loading the content image and clipping from 0-255 in case is in another scale
-                    #scaled_content_image = normalize4save(bob.io.base.load(content_image_path).astype("float32")).astype("float64")
-                    scaled_content_image = original_image.astype("float64")
-                    content_image_yuv = bob.ip.color.rgb_to_yuv(bob.ip.color.gray_to_rgb(scaled_content_image))
-                else:
-                    normalized_style_image_yuv = bob.ip.color.rgb_to_yuv(bob.ip.color.gray_to_rgb(bob.ip.color.rgb_to_gray(normalized_style_image)))
-                    content_image_yuv = bob.ip.color.rgb_to_yuv(original_image)
-
-                output_image = numpy.zeros(shape=content_image_yuv.shape, dtype="uint8")
-                output_image[0,:,:] = normalized_style_image_yuv[0,:,:]
-                output_image[1,:,:] = content_image_yuv[1,:,:]
-                output_image[2,:,:] = content_image_yuv[2,:,:]
-
-                output_image = bob.ip.color.yuv_to_rgb(output_image)
-                return output_image
-
diff --git a/bob/learn/tensorflow/test/data/cnn_mnist/checkpoint b/bob/learn/tensorflow/test/data/cnn_mnist/checkpoint
deleted file mode 100644
index b12413de4ce3b1d66ef87edc2c45f1025ade150a..0000000000000000000000000000000000000000
--- a/bob/learn/tensorflow/test/data/cnn_mnist/checkpoint
+++ /dev/null
@@ -1,2 +0,0 @@
-model_checkpoint_path: "model.ckp"
-all_model_checkpoint_paths: "model.ckp"
diff --git a/bob/learn/tensorflow/test/data/cnn_mnist/model.ckp b/bob/learn/tensorflow/test/data/cnn_mnist/model.ckp
deleted file mode 100644
index 2ebd4543b072d46406c33cf840cf3e8609d3a1a7..0000000000000000000000000000000000000000
Binary files a/bob/learn/tensorflow/test/data/cnn_mnist/model.ckp and /dev/null differ
diff --git a/bob/learn/tensorflow/test/data/cnn_mnist/model.ckp.meta b/bob/learn/tensorflow/test/data/cnn_mnist/model.ckp.meta
deleted file mode 100644
index b81dee07f766e28d90741d0f04c48603bb99a1e7..0000000000000000000000000000000000000000
Binary files a/bob/learn/tensorflow/test/data/cnn_mnist/model.ckp.meta and /dev/null differ
diff --git a/bob/learn/tensorflow/test/data/cnn_mnist/model.ckp_sequence_net.pickle b/bob/learn/tensorflow/test/data/cnn_mnist/model.ckp_sequence_net.pickle
deleted file mode 100644
index fedb9de4d16da446d0bea13e0744528135ada209..0000000000000000000000000000000000000000
--- a/bob/learn/tensorflow/test/data/cnn_mnist/model.ckp_sequence_net.pickle
+++ /dev/null
@@ -1,147 +0,0 @@
-ccollections
-OrderedDict
-p0
-((lp1
-(lp2
-S'conv1'
-p3
-accopy_reg
-_reconstructor
-p4
-(cbob.learn.tensorflow.layers
-Conv2D
-p5
-c__builtin__
-object
-p6
-Ntp7
-Rp8
-(dp9
-S'batch_var'
-p10
-NsS'name'
-p11
-g3
-sS'filters'
-p12
-I10
-sS'use_gpu'
-p13
-I00
-sS'activation'
-p14
-ctensorflow.python.ops.math_ops
-tanh
-p15
-sS'W'
-p16
-NsS'stride'
-p17
-(lp18
-I1
-aI1
-aI1
-aI1
-asS'beta'
-p19
-NsS'b'
-p20
-NsS'weights_initialization'
-p21
-g4
-(cbob.learn.tensorflow.initialization
-Xavier
-p22
-g6
-Ntp23
-Rp24
-(dp25
-g13
-I00
-sS'seed'
-p26
-F10.0
-sbsS'input_layer'
-p27
-NsS'batch_mean'
-p28
-NsS'bias_initialization'
-p29
-g4
-(cbob.learn.tensorflow.initialization
-Constant
-p30
-g6
-Ntp31
-Rp32
-(dp33
-g13
-I00
-sg26
-NsS'constant_value'
-p34
-F0.1
-sbsS'kernel_size'
-p35
-I3
-sS'gamma'
-p36
-NsS'batch_norm'
-p37
-I00
-sbaa(lp38
-S'fc1'
-p39
-ag4
-(cbob.learn.tensorflow.layers
-FullyConnected
-p40
-g6
-Ntp41
-Rp42
-(dp43
-g10
-Nsg11
-g39
-sg13
-I00
-sg14
-NsS'shape'
-p44
-Nsg16
-Nsg19
-Nsg20
-Nsg21
-g4
-(g22
-g6
-Ntp45
-Rp46
-(dp47
-g13
-I00
-sg26
-F10.0
-sbsg27
-Nsg28
-Nsg29
-g4
-(g30
-g6
-Ntp48
-Rp49
-(dp50
-g13
-I00
-sg26
-Nsg34
-F0.1
-sbsS'output_dim'
-p51
-I10
-sg36
-Nsg37
-I00
-sbaatp52
-Rp53
-.
\ No newline at end of file
diff --git a/bob/learn/tensorflow/test/data/db_to_tfrecords_config.py b/bob/learn/tensorflow/test/data/db_to_tfrecords_config.py
deleted file mode 100644
index 6fa669656cafc38cf2f17d57df7389be6baa519c..0000000000000000000000000000000000000000
--- a/bob/learn/tensorflow/test/data/db_to_tfrecords_config.py
+++ /dev/null
@@ -1,28 +0,0 @@
-from bob.bio.base.test.dummy.database import database
-from bob.bio.base.utils import read_original_data
-from bob.learn.tensorflow.dataset.generator import dataset_using_generator
-
-groups = ["dev"]
-
-samples = database.all_files(groups=groups)
-
-CLIENT_IDS = (str(f.client_id) for f in database.all_files(groups=groups))
-CLIENT_IDS = list(set(CLIENT_IDS))
-CLIENT_IDS = dict(zip(CLIENT_IDS, range(len(CLIENT_IDS))))
-
-
-def file_to_label(f):
-    return CLIENT_IDS[str(f.client_id)]
-
-
-def reader(biofile):
-    data = read_original_data(
-        biofile, database.original_directory, database.original_extension
-    )
-    label = file_to_label(biofile)
-    key = str(biofile.path).encode("utf-8")
-    return (data, label, key)
-
-
-dataset = dataset_using_generator(samples, reader)
-datasets = [dataset]
diff --git a/bob/learn/tensorflow/test/data/dummy_audio/attack_sentence01.wav b/bob/learn/tensorflow/test/data/dummy_audio/attack_sentence01.wav
deleted file mode 100644
index c8b59a609a1873cf8dd58d78f1d55a3e28ea2dac..0000000000000000000000000000000000000000
Binary files a/bob/learn/tensorflow/test/data/dummy_audio/attack_sentence01.wav and /dev/null differ
diff --git a/bob/learn/tensorflow/test/data/dummy_audio/genuine_sentence01.wav b/bob/learn/tensorflow/test/data/dummy_audio/genuine_sentence01.wav
deleted file mode 100644
index e61fa13c19f6af658512e6c0adfb8cf6396498c8..0000000000000000000000000000000000000000
Binary files a/bob/learn/tensorflow/test/data/dummy_audio/genuine_sentence01.wav and /dev/null differ
diff --git a/bob/learn/tensorflow/test/data/dummy_database/m301_01_p01_i0_0.hdf5 b/bob/learn/tensorflow/test/data/dummy_database/m301_01_p01_i0_0.hdf5
deleted file mode 100644
index b93e629fba47281a4db4eab67d52bf2246ff1e9f..0000000000000000000000000000000000000000
Binary files a/bob/learn/tensorflow/test/data/dummy_database/m301_01_p01_i0_0.hdf5 and /dev/null differ
diff --git a/bob/learn/tensorflow/test/data/dummy_database/m301_01_p02_i0_0.hdf5 b/bob/learn/tensorflow/test/data/dummy_database/m301_01_p02_i0_0.hdf5
deleted file mode 100644
index 25adbd614647f8acc1f884da696784111308a186..0000000000000000000000000000000000000000
Binary files a/bob/learn/tensorflow/test/data/dummy_database/m301_01_p02_i0_0.hdf5 and /dev/null differ
diff --git a/bob/learn/tensorflow/test/data/dummy_database/m304_01_p01_i0_0.hdf5 b/bob/learn/tensorflow/test/data/dummy_database/m304_01_p01_i0_0.hdf5
deleted file mode 100644
index b642314a42b20fa2dc7c9f8f1d56e549410ca7fc..0000000000000000000000000000000000000000
Binary files a/bob/learn/tensorflow/test/data/dummy_database/m304_01_p01_i0_0.hdf5 and /dev/null differ
diff --git a/bob/learn/tensorflow/test/data/dummy_database/m304_02_f12_i0_0.hdf5 b/bob/learn/tensorflow/test/data/dummy_database/m304_02_f12_i0_0.hdf5
deleted file mode 100644
index de17e9c26e60bbf88eea470a77be1a75507268c2..0000000000000000000000000000000000000000
Binary files a/bob/learn/tensorflow/test/data/dummy_database/m304_02_f12_i0_0.hdf5 and /dev/null differ
diff --git a/bob/learn/tensorflow/test/data/dummy_image_database/m301_01_p01_i0_0.png b/bob/learn/tensorflow/test/data/dummy_image_database/m301_01_p01_i0_0.png
deleted file mode 100644
index 52d39487637b8a7ba460c93ecc9e1bb92e5ca42f..0000000000000000000000000000000000000000
Binary files a/bob/learn/tensorflow/test/data/dummy_image_database/m301_01_p01_i0_0.png and /dev/null differ
diff --git a/bob/learn/tensorflow/test/data/dummy_image_database/m301_01_p01_i0_0_GRAY.png b/bob/learn/tensorflow/test/data/dummy_image_database/m301_01_p01_i0_0_GRAY.png
deleted file mode 100644
index e7de9b7d4b792351e2724ada32bd88d9dc5d3ff0..0000000000000000000000000000000000000000
Binary files a/bob/learn/tensorflow/test/data/dummy_image_database/m301_01_p01_i0_0_GRAY.png and /dev/null differ
diff --git a/bob/learn/tensorflow/test/data/dummy_image_database/m301_01_p02_i0_0.png b/bob/learn/tensorflow/test/data/dummy_image_database/m301_01_p02_i0_0.png
deleted file mode 100644
index 0c7e298de460379d02de275c38ebc24840a258fa..0000000000000000000000000000000000000000
Binary files a/bob/learn/tensorflow/test/data/dummy_image_database/m301_01_p02_i0_0.png and /dev/null differ
diff --git a/bob/learn/tensorflow/test/data/dummy_image_database/m304_01_p01_i0_0.png b/bob/learn/tensorflow/test/data/dummy_image_database/m304_01_p01_i0_0.png
deleted file mode 100644
index 53c25af50711c607d2d05cb9566acfe2b140977d..0000000000000000000000000000000000000000
Binary files a/bob/learn/tensorflow/test/data/dummy_image_database/m304_01_p01_i0_0.png and /dev/null differ
diff --git a/bob/learn/tensorflow/test/data/dummy_image_database/m304_02_f12_i0_0.png b/bob/learn/tensorflow/test/data/dummy_image_database/m304_02_f12_i0_0.png
deleted file mode 100644
index 0fdf6b4d8fa118657bddd7b2b219d96085180d74..0000000000000000000000000000000000000000
Binary files a/bob/learn/tensorflow/test/data/dummy_image_database/m304_02_f12_i0_0.png and /dev/null differ
diff --git a/bob/learn/tensorflow/test/data/estimator_atnt_faces_config.py b/bob/learn/tensorflow/test/data/estimator_atnt_faces_config.py
deleted file mode 100644
index 3dbcc3d2aef3140239c0d3d5beafb09a30906a4b..0000000000000000000000000000000000000000
--- a/bob/learn/tensorflow/test/data/estimator_atnt_faces_config.py
+++ /dev/null
@@ -1,59 +0,0 @@
-import tensorflow as tf
-
-model_dir = "%(model_dir)s"
-learning_rate = 0.00001
-
-
-def architecture(images):
-    images = tf.cast(images, tf.float32)
-    logits = tf.reshape(images, [-1, 92 * 112])
-    logits = tf.contrib.slim.fully_connected(inputs=logits, num_outputs=20)
-    return logits
-
-
-def model_fn(features, labels, mode, config):
-    key = features['key']
-    features = features['data']
-
-    logits = architecture(features)
-
-    predictions = {
-        # Generate predictions (for PREDICT and EVAL mode)
-        "classes": tf.argmax(input=logits, axis=1),
-        # Add `softmax_tensor` to the graph. It is used for PREDICT and by the
-        # `logging_hook`.
-        "probabilities": tf.nn.softmax(logits, name="softmax_tensor"),
-        "key": key,
-    }
-    if mode == tf.estimator.ModeKeys.PREDICT:
-        return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)
-
-    # Calculate Loss (for both TRAIN and EVAL modes)
-    loss = tf.losses.sparse_softmax_cross_entropy(
-        logits=logits, labels=labels)
-    accuracy = tf.metrics.accuracy(
-        labels=labels, predictions=predictions["classes"])
-    metrics = {'accuracy': accuracy}
-
-    # Configure the training op
-    if mode == tf.estimator.ModeKeys.TRAIN:
-        optimizer = tf.train.GradientDescentOptimizer(
-            learning_rate=learning_rate)
-        train_op = optimizer.minimize(
-            loss=loss, global_step=tf.train.get_or_create_global_step())
-        # Log accuracy and loss
-        with tf.name_scope('train_metrics'):
-            tf.summary.scalar('accuracy', accuracy[1])
-            tf.summary.scalar('loss', loss)
-    else:
-        train_op = None
-
-    return tf.estimator.EstimatorSpec(
-        mode=mode,
-        predictions=predictions,
-        loss=loss,
-        train_op=train_op,
-        eval_metric_ops=metrics)
-
-
-estimator = tf.estimator.Estimator(model_fn=model_fn, model_dir=model_dir)
diff --git a/bob/learn/tensorflow/test/data/input_biogenerator_config.py b/bob/learn/tensorflow/test/data/input_biogenerator_config.py
deleted file mode 100644
index e4516be98af48040233f26bf42f61c11deec3899..0000000000000000000000000000000000000000
--- a/bob/learn/tensorflow/test/data/input_biogenerator_config.py
+++ /dev/null
@@ -1,66 +0,0 @@
-from bob.learn.tensorflow.dataset.bio import BioGenerator
-from bob.learn.tensorflow.utils import to_channels_last
-import tensorflow as tf
-
-batch_size = 2
-epochs = 2
-
-
-def input_fn(mode):
-    from bob.bio.base.test.dummy.database import database as db
-
-    if mode == tf.estimator.ModeKeys.TRAIN:
-        groups = 'world'
-    elif mode == tf.estimator.ModeKeys.EVAL:
-        groups = 'dev'
-
-    files = db.objects(groups=groups)
-
-    # construct integer labels for each identity in the database
-    CLIENT_IDS = (str(f.client_id) for f in files)
-    CLIENT_IDS = list(set(CLIENT_IDS))
-    CLIENT_IDS = dict(zip(CLIENT_IDS, range(len(CLIENT_IDS))))
-
-    def biofile_to_label(f):
-        return CLIENT_IDS[str(f.client_id)]
-
-    def load_data(database, f):
-        img = f.load(database.original_directory, database.original_extension)
-        # make a channels_first image (bob format) with 1 channel
-        img = img.reshape(1, 112, 92)
-        return img
-
-    generator = BioGenerator(db, files, load_data, biofile_to_label)
-
-    dataset = tf.data.Dataset.from_generator(
-        generator, generator.output_types, generator.output_shapes)
-
-    def transform(image, label, key):
-        # convert to channels last
-        image = to_channels_last(image)
-
-        # per_image_standardization
-        image = tf.image.per_image_standardization(image)
-        return (image, label, key)
-
-    dataset = dataset.map(transform)
-    if mode == tf.estimator.ModeKeys.TRAIN:
-        # since we are caching to memory, caching only in training makes sense.
-        dataset = dataset.cache()
-        dataset = dataset.repeat(epochs)
-    dataset = dataset.batch(batch_size)
-
-    data, label, key = dataset.make_one_shot_iterator().get_next()
-    return {'data': data, 'key': key}, label
-
-
-def train_input_fn():
-    return input_fn(tf.estimator.ModeKeys.TRAIN)
-
-
-def eval_input_fn():
-    return input_fn(tf.estimator.ModeKeys.EVAL)
-
-
-train_spec = tf.estimator.TrainSpec(input_fn=train_input_fn, max_steps=50)
-eval_spec = tf.estimator.EvalSpec(input_fn=eval_input_fn)
diff --git a/bob/learn/tensorflow/test/data/input_predict_bio_config.py b/bob/learn/tensorflow/test/data/input_predict_bio_config.py
deleted file mode 100644
index 2b8687f77bde2f024b213df0a1b49bc2231d5867..0000000000000000000000000000000000000000
--- a/bob/learn/tensorflow/test/data/input_predict_bio_config.py
+++ /dev/null
@@ -1,17 +0,0 @@
-import tensorflow as tf
-from bob.bio.base.test.dummy.database import database
-biofiles = database.all_files(['dev'])
-
-
-def bio_predict_input_fn(generator, output_types, output_shapes):
-    def input_fn():
-        dataset = tf.data.Dataset.from_generator(
-            generator, output_types, output_shapes)
-        # apply all kinds of transformations here, process the data
-        # even further if you want.
-        dataset = dataset.prefetch(1)
-        dataset = dataset.batch(10**3)
-        images, labels, keys = dataset.make_one_shot_iterator().get_next()
-
-        return {'data': images, 'key': keys}, labels
-    return input_fn
diff --git a/bob/learn/tensorflow/test/data/input_tfrecords_config.py b/bob/learn/tensorflow/test/data/input_tfrecords_config.py
deleted file mode 100644
index 2c5ec8f02e8f547bc31de2e062f2240ab044d0e5..0000000000000000000000000000000000000000
--- a/bob/learn/tensorflow/test/data/input_tfrecords_config.py
+++ /dev/null
@@ -1,24 +0,0 @@
-import tensorflow as tf
-from bob.learn.tensorflow.dataset.tfrecords import shuffle_data_and_labels, \
-    batch_data_and_labels
-
-tfrecord_filenames = ['%(tfrecord_filenames)s']
-data_shape = (1, 112, 92)  # size of atnt images
-data_type = tf.uint8
-batch_size = 2
-epochs = 2
-
-
-def train_input_fn():
-    return shuffle_data_and_labels(tfrecord_filenames, data_shape, data_type,
-                                   batch_size, epochs=epochs)
-
-
-def eval_input_fn():
-    return batch_data_and_labels(tfrecord_filenames, data_shape, data_type,
-                                 batch_size, epochs=1)
-
-
-# config for train_and_evaluate
-train_spec = tf.estimator.TrainSpec(input_fn=train_input_fn, max_steps=200)
-eval_spec = tf.estimator.EvalSpec(input_fn=eval_input_fn)
diff --git a/bob/learn/tensorflow/test/data/mnist_estimator.py b/bob/learn/tensorflow/test/data/mnist_estimator.py
deleted file mode 100644
index 224e974c2d8dc9627fd38e0a8fead61318f3b425..0000000000000000000000000000000000000000
--- a/bob/learn/tensorflow/test/data/mnist_estimator.py
+++ /dev/null
@@ -1,3 +0,0 @@
-import tensorflow as tf
-data = tf.feature_column.numeric_column('data', shape=[784])
-estimator = tf.estimator.LinearClassifier(feature_columns=[data], n_classes=10)
diff --git a/bob/learn/tensorflow/test/data/mnist_input_fn.py b/bob/learn/tensorflow/test/data/mnist_input_fn.py
deleted file mode 100644
index e5bf1f4a058a96b529613a879e396c66a410afa6..0000000000000000000000000000000000000000
--- a/bob/learn/tensorflow/test/data/mnist_input_fn.py
+++ /dev/null
@@ -1,29 +0,0 @@
-from bob.db.mnist import Database
-import tensorflow as tf
-
-database = Database()
-
-
-def input_fn(mode):
-    if mode == tf.estimator.ModeKeys.TRAIN:
-        groups = 'train'
-        num_epochs = None
-        shuffle = True
-    else:
-        groups = 'test'
-        num_epochs = 1
-        shuffle = True
-    data, labels = database.data(groups=groups)
-    return tf.estimator.inputs.numpy_input_fn(
-        x={
-            "data": data.astype('float32'),
-            'key': labels.astype('float32')
-        },
-        y=labels.astype('int32'),
-        batch_size=128,
-        num_epochs=num_epochs,
-        shuffle=shuffle)
-
-
-train_input_fn = input_fn(tf.estimator.ModeKeys.TRAIN)
-eval_input_fn = input_fn(tf.estimator.ModeKeys.EVAL)
diff --git a/bob/learn/tensorflow/test/data/train_scripts/siamese.py b/bob/learn/tensorflow/test/data/train_scripts/siamese.py
deleted file mode 100644
index 3658e71f5103b4a4f7e8f7603dcf51413165edca..0000000000000000000000000000000000000000
--- a/bob/learn/tensorflow/test/data/train_scripts/siamese.py
+++ /dev/null
@@ -1,39 +0,0 @@
-from bob.learn.tensorflow.datashuffler import SiameseMemory, ScaleFactor
-from bob.learn.tensorflow.network import Chopra
-from bob.learn.tensorflow.trainers import SiameseTrainer as Trainer
-from bob.learn.tensorflow.trainers import constant
-from bob.learn.tensorflow.loss import ContrastiveLoss
-from bob.learn.tensorflow.utils import load_mnist
-import tensorflow as tf
-import numpy
-
-BATCH_SIZE = 32
-INPUT_SHAPE = [None, 28, 28, 1]
-SEED = 10
-
-### PREPARING DATASHUFFLER ###
-train_data, train_labels, validation_data, validation_labels = \
-    load_mnist()
-train_data = numpy.reshape(train_data, (train_data.shape[0], 28, 28, 1))
-
-train_data_shuffler = SiameseMemory(
-    train_data,
-    train_labels,
-    input_shape=INPUT_SHAPE,
-    batch_size=BATCH_SIZE,
-    normalizer=ScaleFactor())
-
-### ARCHITECTURE ###
-architecture = Chopra(seed=SEED, n_classes=10)
-
-### LOSS ###
-loss = ContrastiveLoss(contrastive_margin=4.)
-
-### LEARNING RATE ###
-learning_rate = constant(base_learning_rate=0.01)
-
-### SOLVER ###
-optimizer = tf.train.GradientDescentOptimizer(learning_rate)
-
-### Trainer ###
-trainer = Trainer
diff --git a/bob/learn/tensorflow/test/data/train_scripts/softmax.py b/bob/learn/tensorflow/test/data/train_scripts/softmax.py
deleted file mode 100644
index ed48ececfe66b19d7d5d83dbed6166de0d75f90e..0000000000000000000000000000000000000000
--- a/bob/learn/tensorflow/test/data/train_scripts/softmax.py
+++ /dev/null
@@ -1,38 +0,0 @@
-from bob.learn.tensorflow.datashuffler import Memory, ScaleFactor
-from bob.learn.tensorflow.network import chopra
-from bob.learn.tensorflow.trainers import Trainer, constant
-from bob.learn.tensorflow.loss import MeanSoftMaxLoss
-from bob.learn.tensorflow.utils import load_mnist
-import tensorflow as tf
-import numpy
-
-BATCH_SIZE = 32
-INPUT_SHAPE = [None, 28, 28, 1]
-SEED = 10
-USE_GPU = False
-
-### PREPARING DATASHUFFLER ###
-train_data, train_labels, validation_data, validation_labels = load_mnist()
-train_data = numpy.reshape(train_data, (train_data.shape[0], 28, 28, 1))
-
-train_data_shuffler = Memory(
-    train_data,
-    train_labels,
-    input_shape=INPUT_SHAPE,
-    batch_size=BATCH_SIZE,
-    normalizer=ScaleFactor())
-
-### ARCHITECTURE ###
-architecture = chopra(seed=SEED, n_classes=10)
-
-### LOSS ###
-loss = MeanSoftMaxLoss()
-
-### LEARNING RATE ###
-learning_rate = constant(base_learning_rate=0.01)
-
-### SOLVER ###
-optimizer = tf.train.GradientDescentOptimizer(learning_rate)
-
-### Trainer ###
-trainer = Trainer
diff --git a/bob/learn/tensorflow/test/data/train_scripts/triplet.py b/bob/learn/tensorflow/test/data/train_scripts/triplet.py
deleted file mode 100644
index 35908c8e305eb64359f183d3fd74afee541a8a01..0000000000000000000000000000000000000000
--- a/bob/learn/tensorflow/test/data/train_scripts/triplet.py
+++ /dev/null
@@ -1,35 +0,0 @@
-from bob.learn.tensorflow.datashuffler import TripletMemory
-from bob.learn.tensorflow.network import Chopra
-from bob.learn.tensorflow.trainers import TripletTrainer as Trainer
-from bob.learn.tensorflow.trainers import constant
-from bob.learn.tensorflow.loss import TripletLoss
-from bob.learn.tensorflow.utils import load_mnist
-import tensorflow as tf
-import numpy
-
-BATCH_SIZE = 32
-INPUT_SHAPE = [None, 28, 28, 1]
-SEED = 10
-
-### PREPARING DATASHUFFLER ###
-train_data, train_labels, validation_data, validation_labels = \
-    load_mnist()
-train_data = numpy.reshape(train_data, (train_data.shape[0], 28, 28, 1))
-
-train_data_shuffler = TripletMemory(
-    train_data, train_labels, input_shape=INPUT_SHAPE, batch_size=BATCH_SIZE)
-
-### ARCHITECTURE ###
-architecture = Chopra(seed=SEED, n_classes=10)
-
-### LOSS ###
-loss = TripletLoss(margin=4.)
-
-### LEARNING RATE ###
-learning_rate = constant(base_learning_rate=0.01)
-
-### SOLVER ###
-optimizer = tf.train.GradientDescentOptimizer(learning_rate)
-
-### Trainer ###
-trainer = Trainer
diff --git a/bob/learn/tensorflow/test/test_architectures.py b/bob/learn/tensorflow/test/test_architectures.py
deleted file mode 100644
index 740768d10588265fadd4c0843fdbbd8a7b221506..0000000000000000000000000000000000000000
--- a/bob/learn/tensorflow/test/test_architectures.py
+++ /dev/null
@@ -1,152 +0,0 @@
-#!/usr/bin/env python
-# vim: set fileencoding=utf-8 :
-# @author: Tiago de Freitas Pereira <tiago.pereira@idiap.ch>
-
-from nose.plugins.attrib import attr
-import tensorflow as tf
-from bob.learn.tensorflow.network import inception_resnet_v2, inception_resnet_v2_batch_norm,\
-    inception_resnet_v1, inception_resnet_v1_batch_norm,\
-    vgg_19, vgg_16, mlp_with_batchnorm_and_dropout
-
-# @attr('slow')
-# def test_inceptionv2():
-
-#     tf.reset_default_graph()
-#     # Testing WITHOUT batch norm
-#     inputs = tf.placeholder(tf.float32, shape=(1, 160, 160, 1))
-#     graph, _ = inception_resnet_v2(inputs)
-#     assert len(tf.trainable_variables()) == 490
-
-#     tf.reset_default_graph()
-#     assert len(tf.global_variables()) == 0
-
-#     # Testing WITH batch norm
-#     inputs = tf.placeholder(tf.float32, shape=(1, 160, 160, 1))
-#     graph, _ = inception_resnet_v2_batch_norm(inputs)
-#     assert len(tf.trainable_variables()) == 490, len(tf.trainable_variables())
-
-#     tf.reset_default_graph()
-#     assert len(tf.global_variables()) == 0
-
-# @attr('slow')
-# def test_inceptionv2_adaptation():
-
-#     tf.reset_default_graph()
-#     for n, trainable_variables in [
-#         (490, None),
-#         (0, []),
-#         (2, ['Conv2d_1a_3x3', 'Conv2d_1a_3x3_BN']),
-#         (4, ['Conv2d_1a_3x3', 'Conv2d_2a_3x3', 'Conv2d_1a_3x3_BN',
-#              'Conv2d_2a_3x3_BN']),
-#         (6, ['Conv2d_1a_3x3', 'Conv2d_2a_3x3', 'Conv2d_2b_3x3',
-#              'Conv2d_1a_3x3_BN', 'Conv2d_2a_3x3_BN', 'Conv2d_2b_3x3_BN']),
-#         (1, ['Conv2d_1a_3x3_BN']),
-#         (2, ['Conv2d_1a_3x3_BN', 'Conv2d_2a_3x3_BN']),
-#         (3, ['Conv2d_1a_3x3_BN', 'Conv2d_2a_3x3_BN', 'Conv2d_2b_3x3_BN']),
-#     ]:
-#         input = tf.placeholder(tf.float32, shape=(1, 160, 160, 1))
-#         net, end_points = inception_resnet_v2_batch_norm(
-#             input, trainable_variables=trainable_variables)
-#         l = len(tf.trainable_variables())
-#         assert l == n, (l, n)
-#         tf.reset_default_graph()
-#     tf.reset_default_graph()
-#     assert len(tf.global_variables()) == 0
-
-# @attr('slow')
-# def test_inceptionv1():
-
-#     tf.reset_default_graph()
-#     # Testing WITHOUT batch norm
-#     inputs = tf.placeholder(tf.float32, shape=(1, 160, 160, 1))
-#     graph, _ = inception_resnet_v1(inputs)
-#     assert len(tf.trainable_variables()) == 266
-
-#     tf.reset_default_graph()
-#     assert len(tf.global_variables()) == 0
-
-#     # Testing WITH batch norm
-#     inputs = tf.placeholder(tf.float32, shape=(1, 160, 160, 1))
-#     graph, _ = inception_resnet_v1_batch_norm(inputs)
-#     assert len(tf.trainable_variables()) == 266
-
-#     tf.reset_default_graph()
-#     assert len(tf.global_variables()) == 0
-
-# @attr('slow')
-# def test_inceptionv1_adaptation():
-
-#     tf.reset_default_graph()
-#     for n, trainable_variables in [
-#         (266, None),
-#         (0, []),
-#         (2, ['Conv2d_1a_3x3', 'Conv2d_1a_3x3_BN']),
-#         (4, ['Conv2d_1a_3x3', 'Conv2d_2a_3x3', 'Conv2d_1a_3x3_BN',
-#              'Conv2d_2a_3x3_BN']),
-#         (6, ['Conv2d_1a_3x3', 'Conv2d_2a_3x3', 'Conv2d_2b_3x3',
-#              'Conv2d_1a_3x3_BN', 'Conv2d_2a_3x3_BN', 'Conv2d_2b_3x3_BN']),
-#         (1, ['Conv2d_1a_3x3_BN']),
-#         (2, ['Conv2d_1a_3x3_BN', 'Conv2d_2a_3x3_BN']),
-#         (3, ['Conv2d_1a_3x3_BN', 'Conv2d_2a_3x3_BN', 'Conv2d_2b_3x3_BN']),
-#     ]:
-#         input = tf.placeholder(tf.float32, shape=(1, 160, 160, 1))
-#         net, end_points = inception_resnet_v1_batch_norm(
-#             input, trainable_variables=trainable_variables)
-#         l = len(tf.trainable_variables())
-#         assert l == n, (l, n)
-#         tf.reset_default_graph()
-#     tf.reset_default_graph()
-#     assert len(tf.global_variables()) == 0
-
-
-def test_vgg():
-    tf.reset_default_graph()
-
-    # Testing VGG19 Training mode
-    inputs = tf.placeholder(tf.float32, shape=(1, 224, 224, 3))
-    graph, _ = vgg_19(inputs)
-    assert len(tf.trainable_variables()) == 38
-
-    tf.reset_default_graph()
-    assert len(tf.global_variables()) == 0
-
-    # Testing VGG19 predicting mode
-    inputs = tf.placeholder(tf.float32, shape=(1, 224, 224, 3))
-    graph, _ = vgg_19(inputs, mode=tf.estimator.ModeKeys.PREDICT)
-    assert len(tf.trainable_variables()) == 0
-
-    tf.reset_default_graph()
-    assert len(tf.global_variables()) == 0
-
-
-    # Testing VGG 16 training mode
-    inputs = tf.placeholder(tf.float32, shape=(1, 224, 224, 3))
-    graph, _ = vgg_16(inputs)
-    assert len(tf.trainable_variables()) == 30
-
-    tf.reset_default_graph()
-    assert len(tf.global_variables()) == 0
-
-    # Testing VGG 16 predicting mode
-    inputs = tf.placeholder(tf.float32, shape=(1, 224, 224, 3))
-    graph, _ = vgg_16(inputs, mode=tf.estimator.ModeKeys.PREDICT)
-    assert len(tf.trainable_variables()) == 0
-
-    tf.reset_default_graph()
-    assert len(tf.global_variables()) == 0
-
-
-def test_mlp():
-
-    tf.reset_default_graph()
-    # Testing MLP Training mode
-    inputs = tf.placeholder(tf.float32, shape=(1, 10, 10, 3))
-    graph, _ = mlp_with_batchnorm_and_dropout(inputs, [6, 5])
-    assert len(tf.trainable_variables()) == 4
-
-    tf.reset_default_graph()
-    # Testing MLP Predicting mode
-    inputs = tf.placeholder(tf.float32, shape=(1, 10, 10, 3))
-    graph, _ = mlp_with_batchnorm_and_dropout(inputs, [6, 5], mode=tf.estimator.ModeKeys.PREDICT)
-    assert len(tf.trainable_variables()) == 0
-
diff --git a/bob/learn/tensorflow/test/test_dataset.py b/bob/learn/tensorflow/test/test_dataset.py
deleted file mode 100644
index 0c5fe8cf4458669e7be26d26b74d70e45cd778ee..0000000000000000000000000000000000000000
--- a/bob/learn/tensorflow/test/test_dataset.py
+++ /dev/null
@@ -1,105 +0,0 @@
-#!/usr/bin/env python
-# vim: set fileencoding=utf-8 :
-# @author: Tiago de Freitas Pereira <tiago.pereira@idiap.ch>
-
-import pkg_resources
-import numpy
-import tensorflow as tf
-from bob.learn.tensorflow.dataset.siamese_image import shuffle_data_and_labels_image_augmentation as siamese_batch
-from bob.learn.tensorflow.dataset.triplet_image import shuffle_data_and_labels_image_augmentation as triplet_batch
-from bob.learn.tensorflow.dataset.generator import dataset_using_generator
-
-data_shape = (250, 250, 3)
-output_shape = (50, 50)
-data_type = tf.float32
-batch_size = 2
-validation_batch_size = 250
-epochs = 1
-
-# Trainer logits
-filenames = [
-    pkg_resources.resource_filename(
-        __name__, 'data/dummy_image_database/m301_01_p01_i0_0.png'),
-    pkg_resources.resource_filename(
-        __name__, 'data/dummy_image_database/m301_01_p02_i0_0.png'),
-    pkg_resources.resource_filename(
-        __name__, 'data/dummy_image_database/m301_01_p01_i0_0.png'),
-    pkg_resources.resource_filename(
-        __name__, 'data/dummy_image_database/m301_01_p02_i0_0.png'),
-    pkg_resources.resource_filename(
-        __name__, 'data/dummy_image_database/m301_01_p01_i0_0.png'),
-    pkg_resources.resource_filename(
-        __name__, 'data/dummy_image_database/m301_01_p02_i0_0.png'),
-    pkg_resources.resource_filename(
-        __name__, 'data/dummy_image_database/m304_01_p01_i0_0.png'),
-    pkg_resources.resource_filename(
-        __name__, 'data/dummy_image_database/m304_02_f12_i0_0.png'),
-    pkg_resources.resource_filename(
-        __name__, 'data/dummy_image_database/m304_01_p01_i0_0.png'),
-    pkg_resources.resource_filename(
-        __name__, 'data/dummy_image_database/m304_02_f12_i0_0.png'),
-    pkg_resources.resource_filename(
-        __name__, 'data/dummy_image_database/m304_01_p01_i0_0.png'),
-    pkg_resources.resource_filename(
-        __name__, 'data/dummy_image_database/m304_02_f12_i0_0.png')
-]
-labels = [0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1]
-
-
-def test_siamese_dataset():
-    data, label = siamese_batch(
-        filenames,
-        labels,
-        data_shape,
-        data_type,
-        2,
-        per_image_normalization=False,
-        output_shape=output_shape)
-
-    with tf.Session() as session:
-        d, l = session.run([data, label])
-        assert len(l) == 2
-        assert d['left'].shape == (2, 50, 50, 3)
-        assert d['right'].shape == (2, 50, 50, 3)
-
-
-def test_triplet_dataset():
-    data = triplet_batch(
-        filenames,
-        labels,
-        data_shape,
-        data_type,
-        2,
-        per_image_normalization=False,
-        output_shape=output_shape)
-    with tf.Session() as session:
-        d = session.run([data])[0]
-        assert len(d.keys()) == 3
-        assert d['anchor'].shape == (2, 50, 50, 3)
-        assert d['positive'].shape == (2, 50, 50, 3)
-        assert d['negative'].shape == (2, 50, 50, 3)
-
-
-def test_dataset_using_generator():
-    
-    def reader(f):
-        key = 0
-        label = 0
-        yield {'data': f, 'key': key}, label
-
-    shape = (2, 2, 1)    
-    samples = [numpy.ones(shape, dtype="float32")*i for i in range(10)]
-    
-    with tf.Session() as session: 
-        dataset = dataset_using_generator(samples,\
-                                          reader,\
-                                          multiple_samples=True)
-        iterator = dataset.make_one_shot_iterator().get_next()
-        for i in range(11):
-            try:
-                sample = session.run(iterator)                
-                assert sample[0]["data"].shape == shape
-                assert numpy.allclose(sample[0]["data"], samples[i])
-            except tf.errors.OutOfRangeError:
-                break
-
diff --git a/bob/learn/tensorflow/test/test_db_to_tfrecords.py b/bob/learn/tensorflow/test/test_db_to_tfrecords.py
deleted file mode 100644
index db5491d08066aa5e5138b447d7e32677576e1900..0000000000000000000000000000000000000000
--- a/bob/learn/tensorflow/test/test_db_to_tfrecords.py
+++ /dev/null
@@ -1,119 +0,0 @@
-import os
-import shutil
-import pkg_resources
-import tempfile
-import tensorflow as tf
-import numpy as np
-from click.testing import CliRunner
-from bob.io.base import create_directories_safe
-from bob.learn.tensorflow.script.db_to_tfrecords import (
-    db_to_tfrecords, describe_tf_record, datasets_to_tfrecords)
-from bob.learn.tensorflow.utils import load_mnist, create_mnist_tfrecord
-from bob.extension.scripts.click_helper import assert_click_runner_result
-from bob.extension.config import load
-from bob.learn.tensorflow.dataset.tfrecords import dataset_from_tfrecord
-
-regenerate_reference = False
-
-dummy_config = pkg_resources.resource_filename(
-    'bob.learn.tensorflow', 'test/data/db_to_tfrecords_config.py')
-
-
-def compare_datasets(ds1, ds2, sess=None):
-    if tf.executing_eagerly():
-        for values1, values2 in zip(ds1, ds2):
-            values1 = tf.contrib.framework.nest.flatten(values1)
-            values2 = tf.contrib.framework.nest.flatten(values2)
-            for v1, v2 in zip(values1, values2):
-                if not tf.reduce_all(tf.math.equal(v1, v2)):
-                    return False
-    else:
-        ds1 = ds1.make_one_shot_iterator().get_next()
-        ds2 = ds2.make_one_shot_iterator().get_next()
-        while True:
-            try:
-                values1, values2 = sess.run([ds1, ds2])
-            except tf.errors.OutOfRangeError:
-                break
-            values1 = tf.contrib.framework.nest.flatten(values1)
-            values2 = tf.contrib.framework.nest.flatten(values2)
-            for v1, v2 in zip(values1, values2):
-                v1, v2 = np.asarray(v1), np.asarray(v2)
-                if not np.all(v1 == v2):
-                    return False
-    return True
-
-
-def test_db_to_tfrecords():
-    test_dir = tempfile.mkdtemp(prefix='bobtest_')
-    output_path = os.path.join(test_dir, 'dev.tfrecords')
-
-    try:
-        runner = CliRunner()
-        result = runner.invoke(
-            db_to_tfrecords,
-            args=(dummy_config, '--output', output_path),
-            standalone_mode=False)
-        assert result.exit_code == 0, '%s\n%s\n%s' % (
-            result.exc_info, result.output, result.exception)
-
-        # TODO: test if the generated tfrecords file is equal with a reference
-        # file
-
-    finally:
-        shutil.rmtree(test_dir)
-
-
-def test_db_to_tfrecords_size_estimate():
-    test_dir = tempfile.mkdtemp(prefix='bobtest_')
-    output_path = os.path.join(test_dir, 'dev.tfrecords')
-
-    try:
-        args = (dummy_config, '--size-estimate', '--output', output_path)
-        runner = CliRunner()
-        result = runner.invoke(
-            db_to_tfrecords, args=args, standalone_mode=False)
-        assert result.exit_code == 0, '%s\n%s\n%s' % (
-            result.exc_info, result.output, result.exception)
-        assert '2.0 M bytes' in result.output, result.output
-
-    finally:
-        shutil.rmtree(test_dir)
-
-
-def test_tfrecord_counter():
-    tfrecord_train = "./tf-train-test/train_mnist.tfrecord"
-    shape = (3136,)  # I'm saving the thing as float
-    batch_size = 1000
-
-    try:
-        train_data, train_labels, validation_data, validation_labels = \
-            load_mnist()
-        create_directories_safe(os.path.dirname(tfrecord_train))
-        create_mnist_tfrecord(
-            tfrecord_train, train_data, train_labels, n_samples=6000)
-
-        n_samples, n_labels = describe_tf_record(
-            os.path.dirname(tfrecord_train), shape, batch_size)
-
-        assert n_samples == 6000
-        assert n_labels == 10
-
-    finally:
-        shutil.rmtree(os.path.dirname(tfrecord_train))
-
-
-def test_datasets_to_tfrecords():
-    runner = CliRunner()
-    with runner.isolated_filesystem():
-        output_path = './test'
-        args = (dummy_config, '--output', output_path)
-        result = runner.invoke(
-            datasets_to_tfrecords, args=args, standalone_mode=False)
-        assert_click_runner_result(result)
-        # read back the tfrecod
-        with tf.Session() as sess:
-            dataset2 = dataset_from_tfrecord(output_path)
-            dataset1 = load(
-                [dummy_config], attribute_name='dataset', entry_point_group='bob')
-            assert compare_datasets(dataset1, dataset2, sess)
diff --git a/bob/learn/tensorflow/test/test_estimator_onegraph.py b/bob/learn/tensorflow/test/test_estimator_onegraph.py
deleted file mode 100644
index 3dea478fa1ff3cce39c4c2dd8d3cecceb79c4c46..0000000000000000000000000000000000000000
--- a/bob/learn/tensorflow/test/test_estimator_onegraph.py
+++ /dev/null
@@ -1,336 +0,0 @@
-#!/usr/bin/env python
-# vim: set fileencoding=utf-8 :
-# @author: Tiago de Freitas Pereira <tiago.pereira@idiap.ch>
-
-from nose.plugins.attrib import attr
-import tensorflow as tf
-
-from bob.learn.tensorflow.network import dummy
-from bob.learn.tensorflow.estimators import Logits, LogitsCenterLoss
-
-from bob.learn.tensorflow.dataset.tfrecords import shuffle_data_and_labels, batch_data_and_labels, \
-    shuffle_data_and_labels_image_augmentation
-
-from bob.learn.tensorflow.utils import load_mnist, create_mnist_tfrecord
-from bob.learn.tensorflow.utils.hooks import LoggerHookEstimator
-from bob.learn.tensorflow.loss import mean_cross_entropy_loss
-from bob.learn.tensorflow.utils import reproducible
-
-import numpy
-
-import shutil
-import os
-
-# Fixing problem with MAC https://github.com/dmlc/xgboost/issues/1715
-os.environ['KMP_DUPLICATE_LIB_OK']='True'
-
-tfrecord_train = "./train_mnist.tfrecord"
-tfrecord_validation = "./validation_mnist.tfrecord"
-model_dir = "./temp"
-
-learning_rate = 0.1
-data_shape = (28, 28, 1)  # size of atnt images
-data_type = tf.float32
-batch_size = 32
-validation_batch_size = 250
-epochs = 6
-steps = 5000
-reproducible.set_seed()
-
-# @attr('slow')
-# def test_logitstrainer():
-#     # Trainer logits
-#     try:
-#         embedding_validation = False
-#         _, run_config, _, _, _ = reproducible.set_seed()
-#         run_config = run_config.replace(
-#             keep_checkpoint_max=10, save_checkpoints_steps=100, save_checkpoints_secs=None)
-#         trainer = Logits(
-#             model_dir=model_dir,
-#             architecture=dummy,
-#             optimizer=tf.train.GradientDescentOptimizer(learning_rate),
-#             n_classes=10,
-#             loss_op=mean_cross_entropy_loss,
-#             embedding_validation=embedding_validation,
-#             validation_batch_size=validation_batch_size,
-#             config=run_config)
-#         run_logitstrainer_mnist(trainer, augmentation=True)
-#     finally:
-#         try:
-#             os.unlink(tfrecord_train)
-#             os.unlink(tfrecord_validation)
-#             shutil.rmtree(model_dir, ignore_errors=True)
-#         except Exception:
-#             pass
-
-# @attr('slow')
-# def test_logitstrainer_embedding():
-#     try:
-#         embedding_validation = True
-#         _, run_config, _, _, _ = reproducible.set_seed()
-#         trainer = Logits(
-#             model_dir=model_dir,
-#             architecture=dummy,
-#             optimizer=tf.train.GradientDescentOptimizer(learning_rate),
-#             n_classes=10,
-#             loss_op=mean_cross_entropy_loss,
-#             embedding_validation=embedding_validation,
-#             validation_batch_size=validation_batch_size,
-#             config=run_config)
-
-#         run_logitstrainer_mnist(trainer)
-#     finally:
-#         try:
-#             os.unlink(tfrecord_train)
-#             os.unlink(tfrecord_validation)
-#             shutil.rmtree(model_dir, ignore_errors=True)
-#         except Exception:
-#             pass
-
-# @attr('slow')
-# def test_logitstrainer_centerloss():
-#     try:
-#         embedding_validation = False
-#         _, run_config, _, _, _ = reproducible.set_seed()
-#         run_config = run_config.replace(save_checkpoints_steps=1000)
-#         trainer = LogitsCenterLoss(
-#             model_dir=model_dir,
-#             architecture=dummy,
-#             optimizer=tf.train.GradientDescentOptimizer(learning_rate),
-#             n_classes=10,
-#             embedding_validation=embedding_validation,
-#             validation_batch_size=validation_batch_size,
-#             factor=0.01,
-#             config=run_config)
-
-#         run_logitstrainer_mnist(trainer)
-
-#         # Checking if the centers were updated
-#         sess = tf.Session()
-#         checkpoint_path = tf.train.get_checkpoint_state(
-#             model_dir).model_checkpoint_path
-#         saver = tf.train.import_meta_graph(
-#             checkpoint_path + ".meta", clear_devices=True)
-#         saver.restore(sess, tf.train.latest_checkpoint(model_dir))
-#         centers = tf.get_collection(
-#             tf.GraphKeys.GLOBAL_VARIABLES, scope="center_loss/centers:0")[0]
-#         assert numpy.sum(numpy.abs(centers.eval(sess))) > 0.0
-
-#     finally:
-#         try:
-#             os.unlink(tfrecord_train)
-#             os.unlink(tfrecord_validation)
-#             shutil.rmtree(model_dir, ignore_errors=True)
-#         except Exception:
-#             pass
-
-# @attr('slow')
-# def test_logitstrainer_centerloss_embedding():
-#     try:
-#         embedding_validation = True
-#         _, run_config, _, _, _ = reproducible.set_seed()
-#         trainer = LogitsCenterLoss(
-#             model_dir=model_dir,
-#             architecture=dummy,
-#             optimizer=tf.train.GradientDescentOptimizer(learning_rate),
-#             n_classes=10,
-#             embedding_validation=embedding_validation,
-#             validation_batch_size=validation_batch_size,
-#             factor=0.01,
-#             config=run_config)
-#         run_logitstrainer_mnist(trainer)
-
-#         # Checking if the centers were updated
-#         sess = tf.Session()
-#         checkpoint_path = tf.train.get_checkpoint_state(
-#             model_dir).model_checkpoint_path
-#         saver = tf.train.import_meta_graph(
-#             checkpoint_path + ".meta", clear_devices=True)
-#         saver.restore(sess, tf.train.latest_checkpoint(model_dir))
-#         centers = tf.get_collection(
-#             tf.GraphKeys.GLOBAL_VARIABLES, scope="center_loss/centers:0")[0]
-#         assert numpy.sum(numpy.abs(centers.eval(sess))) > 0.0
-#     finally:
-#         try:
-#             os.unlink(tfrecord_train)
-#             os.unlink(tfrecord_validation)
-#             shutil.rmtree(model_dir, ignore_errors=True)
-#         except Exception:
-#             pass
-
-
-def run_logitstrainer_mnist(trainer, augmentation=False):
-    # Cleaning up
-    tf.reset_default_graph()
-    assert len(tf.global_variables()) == 0
-
-    # Creating tf records for mnist
-    train_data, train_labels, validation_data, validation_labels = load_mnist()
-    create_mnist_tfrecord(
-        tfrecord_train, train_data, train_labels, n_samples=6000)
-    create_mnist_tfrecord(
-        tfrecord_validation,
-        validation_data,
-        validation_labels,
-        n_samples=validation_batch_size)
-
-    def input_fn():
-        if augmentation:
-            return shuffle_data_and_labels_image_augmentation(
-                tfrecord_train,
-                data_shape,
-                data_type,
-                batch_size,
-                random_flip=True,
-                random_rotate=False,
-                epochs=epochs)
-        else:
-            return shuffle_data_and_labels(
-                tfrecord_train,
-                data_shape,
-                data_type,
-                batch_size,
-                epochs=epochs)
-
-    def input_fn_validation():
-        return batch_data_and_labels(
-            tfrecord_validation,
-            data_shape,
-            data_type,
-            validation_batch_size,
-            epochs=1000)
-
-    hooks = [
-        LoggerHookEstimator(trainer, 16, 300),
-        tf.train.SummarySaverHook(
-            save_steps=1000,
-            output_dir=model_dir,
-            scaffold=tf.train.Scaffold(),
-            summary_writer=tf.summary.FileWriter(model_dir))
-    ]
-    trainer.train(input_fn, steps=steps, hooks=hooks)
-    if not trainer.embedding_validation:
-        acc = trainer.evaluate(input_fn_validation)
-        assert acc['accuracy'] > 0.10
-    else:
-        acc = trainer.evaluate(input_fn_validation)
-        assert acc['accuracy'] > 0.10
-
-    # Cleaning up
-    tf.reset_default_graph()
-    assert len(tf.global_variables()) == 0
-
-# @attr('slow')
-# def test_moving_average_trainer():
-#     # define a fixed input data
-#     # train the same network with the same initialization
-#     # evaluate it
-#     # train and evaluate it again with moving average
-#     # Accuracy should be lower when moving average is on
-
-#     try:
-#         # Creating tf records for mnist
-#         train_data, train_labels, validation_data, validation_labels = load_mnist()
-#         create_mnist_tfrecord(
-#             tfrecord_train, train_data, train_labels, n_samples=6000)
-#         create_mnist_tfrecord(
-#             tfrecord_validation,
-#             validation_data,
-#             validation_labels,
-#             n_samples=validation_batch_size)
-
-#         def input_fn():
-#             return batch_data_and_labels(
-#                 tfrecord_train,
-#                 data_shape,
-#                 data_type,
-#                 batch_size,
-#                 epochs=1)
-
-#         def input_fn_validation():
-#             return batch_data_and_labels(
-#                 tfrecord_validation,
-#                 data_shape,
-#                 data_type,
-#                 validation_batch_size,
-#                 epochs=1)
-
-#         from bob.learn.tensorflow.network.Dummy import dummy as architecture
-
-#         run_config = reproducible.set_seed(183, 183)[1]
-#         run_config = run_config.replace(save_checkpoints_steps=2000)
-
-#         def _estimator(apply_moving_averages):
-#             return Logits(
-#                 architecture,
-#                 tf.train.GradientDescentOptimizer(1e-1),
-#                 tf.losses.sparse_softmax_cross_entropy,
-#                 10,
-#                 model_dir=model_dir,
-#                 config=run_config,
-#                 apply_moving_averages=apply_moving_averages,
-#             )
-
-#         def _evaluate(estimator, delete=True):
-#             try:
-#                 estimator.train(input_fn)
-#                 evaluations = estimator.evaluate(input_fn_validation)
-#             finally:
-#                 if delete:
-#                     shutil.rmtree(estimator.model_dir, ignore_errors=True)
-#             return evaluations
-
-#         estimator = _estimator(False)
-#         evaluations = _evaluate(estimator, delete=True)
-#         no_moving_average_acc = evaluations['accuracy']
-
-#         # same as above with moving average
-#         estimator = _estimator(True)
-#         evaluations = _evaluate(estimator, delete=False)
-#         with_moving_average_acc = evaluations['accuracy']
-
-#         assert no_moving_average_acc > with_moving_average_acc, \
-#             (no_moving_average_acc, with_moving_average_acc)
-
-#         # Can it resume training?
-#         del estimator
-#         tf.reset_default_graph()
-#         estimator = _estimator(True)
-#         _evaluate(estimator, delete=True)
-
-#     finally:
-#         try:
-#             os.unlink(tfrecord_train)
-#             os.unlink(tfrecord_validation)
-#             shutil.rmtree(model_dir, ignore_errors=True)
-#         except Exception:
-#             pass
-
-# @attr('slow')
-# def test_saver_with_moving_average():
-#     try:
-#         _, run_config, _, _, _ = reproducible.set_seed()
-#         run_config = run_config.replace(
-#             keep_checkpoint_max=10, save_checkpoints_steps=100,
-#             save_checkpoints_secs=None)
-#         estimator = Logits(
-#             model_dir=model_dir,
-#             architecture=dummy,
-#             optimizer=tf.train.GradientDescentOptimizer(learning_rate),
-#             n_classes=10,
-#             loss_op=mean_cross_entropy_loss,
-#             embedding_validation=False,
-#             validation_batch_size=validation_batch_size,
-#             config=run_config)
-#         run_logitstrainer_mnist(estimator, augmentation=True)
-#         ckpt = tf.train.get_checkpoint_state(estimator.model_dir)
-#         assert ckpt, "Failed to get any checkpoint!"
-#         assert len(
-#             ckpt.all_model_checkpoint_paths) == 10, ckpt.all_model_checkpoint_paths
-#     finally:
-#         try:
-#             os.unlink(tfrecord_train)
-#             os.unlink(tfrecord_validation)
-#             shutil.rmtree(model_dir, ignore_errors=True)
-#         except Exception:
-#             pass
diff --git a/bob/learn/tensorflow/test/test_estimator_scripts.py b/bob/learn/tensorflow/test/test_estimator_scripts.py
deleted file mode 100644
index b8caeca6f46ba16f4957f321d9c1cc6c59c485b5..0000000000000000000000000000000000000000
--- a/bob/learn/tensorflow/test/test_estimator_scripts.py
+++ /dev/null
@@ -1,227 +0,0 @@
-from __future__ import print_function
-import os
-import shutil
-from glob import glob
-from tempfile import mkdtemp
-from click.testing import CliRunner
-from bob.extension.scripts.click_helper import assert_click_runner_result
-from bob.io.base.test_utils import datafile
-
-from bob.learn.tensorflow.script.db_to_tfrecords import db_to_tfrecords
-from bob.learn.tensorflow.script.train import train
-from bob.learn.tensorflow.script.eval import eval as eval_script
-from bob.learn.tensorflow.script.train_and_evaluate import train_and_evaluate
-from bob.learn.tensorflow.script.predict_bio import predict_bio
-from nose.plugins.attrib import attr
-
-
-db_to_tfrecords_config = datafile('db_to_tfrecords_config.py', __name__)
-input_predict_bio_config = datafile('input_predict_bio_config.py', __name__)
-input_biogenerator_config = datafile('input_biogenerator_config.py', __name__)
-
-
-def input_tfrecords_config(tfrecord_path):
-    with open(datafile('input_tfrecords_config.py', __name__)) as f:
-        doc = '\n' + f.read() + '\n'
-    return doc % {'tfrecord_filenames': tfrecord_path}
-
-
-def estimator_atnt_faces_config(model_dir):
-    with open(datafile('estimator_atnt_faces_config.py', __name__)) as f:
-        doc = '\n' + f.read() + '\n'
-    return doc % {'model_dir': model_dir}
-
-
-def _create_tfrecord(test_dir):
-    output = os.path.join(test_dir, 'dev.tfrecords')
-    runner = CliRunner()
-    result = runner.invoke(
-        db_to_tfrecords, args=[db_to_tfrecords_config, '--output', output])
-    assert_click_runner_result(result)
-    return output
-
-
-def _create_checkpoint(tmpdir, model_dir, tfrecord_path):
-    config = input_tfrecords_config(
-        tfrecord_path) + estimator_atnt_faces_config(model_dir)
-    config_path = os.path.join(tmpdir, 'train_config.py')
-    with open(config_path, 'w') as f:
-        f.write(config)
-    runner = CliRunner()
-    result = runner.invoke(train, args=[config_path])
-    assert_click_runner_result(result)
-
-
-def _eval(tmpdir, model_dir, tfrecord_path, extra_args=['--run-once']):
-    config = input_tfrecords_config(
-        tfrecord_path) + estimator_atnt_faces_config(model_dir)
-    config_path = os.path.join(tmpdir, 'eval_config.py')
-    with open(config_path, 'w') as f:
-        f.write(config)
-    runner = CliRunner()
-    result = runner.invoke(eval_script, args=[config_path] + extra_args)
-    assert_click_runner_result(result)
-
-
-def _train_and_evaluate(tmpdir, model_dir, tfrecord_path):
-    config = input_tfrecords_config(
-        tfrecord_path) + estimator_atnt_faces_config(model_dir)
-    config_path = os.path.join(tmpdir, 'train_config.py')
-    with open(config_path, 'w') as f:
-        f.write(config)
-    runner = CliRunner()
-    runner.invoke(train_and_evaluate, args=[config_path])
-
-
-def _predict_bio(tmpdir, model_dir, tfrecord_path, extra_options=tuple()):
-    config = input_tfrecords_config(
-        tfrecord_path) + estimator_atnt_faces_config(model_dir)
-    config_path = os.path.join(tmpdir, 'train_config.py')
-    with open(config_path, 'w') as f:
-        f.write(config)
-    runner = CliRunner()
-    return runner.invoke(
-        predict_bio,
-        args=[config_path, input_predict_bio_config] + list(extra_options))
-
-@attr('slow')
-def test_eval():
-    tmpdir = mkdtemp(prefix='bob_')
-    try:
-        model_dir = os.path.join(tmpdir, 'model_dir')
-        eval_dir = os.path.join(model_dir, 'eval')
-
-        print('\nCreating a dummy tfrecord')
-        tfrecord_path = _create_tfrecord(tmpdir)
-
-        print('Training a dummy network')
-        _create_checkpoint(tmpdir, model_dir, tfrecord_path)
-
-        print('Evaluating a dummy network')
-        _eval(tmpdir, model_dir, tfrecord_path)
-
-        evaluated_path = os.path.join(eval_dir, 'evaluated')
-        assert os.path.exists(evaluated_path), evaluated_path
-        with open(evaluated_path) as f:
-            doc = f.read()
-
-        assert '0' in doc, doc
-        assert '200' in doc, doc
-
-        print('Train and evaluate a dummy network')
-        _train_and_evaluate(tmpdir, model_dir, tfrecord_path)
-
-    finally:
-        try:
-            shutil.rmtree(tmpdir)
-        except Exception:
-            pass
-
-@attr('slow')
-def test_eval_keep_n_model():
-    tmpdir = mkdtemp(prefix='bob_')
-    try:
-        model_dir = os.path.join(tmpdir, 'model_dir')
-        eval_dir = os.path.join(model_dir, 'eval')
-
-        print('\nCreating a dummy tfrecord')
-        tfrecord_path = _create_tfrecord(tmpdir)
-
-        print('Training a dummy network')
-        _create_checkpoint(tmpdir, model_dir, tfrecord_path)
-
-        print('Evaluating a dummy network')
-        _eval(tmpdir, model_dir, tfrecord_path, ['-K', '1', '--run-once'])
-
-        evaluated_path = os.path.join(eval_dir, 'evaluated')
-        assert os.path.exists(evaluated_path), evaluated_path
-        with open(evaluated_path) as f:
-            doc = f.read()
-        assert '0 ' in doc, doc
-        assert '200 ' in doc, doc
-        assert len(glob('{}/model.ckpt-*'.format(eval_dir))) == 3, \
-            os.listdir(eval_dir)
-
-    finally:
-        try:
-            shutil.rmtree(tmpdir)
-        except Exception:
-            pass
-
-@attr('slow')
-def test_predict_bio():
-    tmpdir = mkdtemp(prefix='bob_')
-    try:
-        model_dir = os.path.join(tmpdir, 'model_dir')
-
-        tfrecord_path = _create_tfrecord(tmpdir)
-        _create_checkpoint(tmpdir, model_dir, tfrecord_path)
-
-        # Run predict_bio
-        result = _predict_bio(
-            tmpdir, model_dir, tfrecord_path, ['-o', tmpdir, '-vvv'])
-        assert_click_runner_result(result)
-
-    finally:
-        try:
-            shutil.rmtree(tmpdir)
-        except Exception:
-            pass
-
-@attr('slow')
-def test_predict_bio_empty_eval():
-    tmpdir = mkdtemp(prefix='bob_')
-    try:
-        model_dir = os.path.join(tmpdir, 'model_dir')
-        eval_dir = os.path.join(model_dir, 'eval')
-
-        tfrecord_path = _create_tfrecord(tmpdir)
-        _create_checkpoint(tmpdir, model_dir, tfrecord_path)
-
-        # Make an empty eval folder
-        os.makedirs(eval_dir)
-        open(os.path.join(eval_dir, 'checkpoint'), 'w')
-
-        # Run predict_bio
-        result = _predict_bio(
-            tmpdir, model_dir, tfrecord_path,
-            ['-o', tmpdir, '-c', eval_dir, '-vvv'])
-        # the command should fail when the checkpoint path is empty
-        assert_click_runner_result(result, 1)
-
-    finally:
-        try:
-            shutil.rmtree(tmpdir)
-        except Exception:
-            pass
-
-
-# uncomment to run this test locally
-# def test_eval_too_many_open_files_with_biogenerator():
-#     tmpdir = mkdtemp(prefix='bob_')
-#     try:
-#         # create estimator config file
-#         model_dir = os.path.join(tmpdir, 'model_dir')
-#         estimator_config = os.path.join(tmpdir, 'estimator_config.py')
-#         with open(estimator_config, 'w') as f:
-#             f.write(estimator_atnt_faces_config(model_dir))
-
-#         runner = CliRunner()
-
-#         # train and eval with biogenerators
-#         result = runner.invoke(
-#             train, args=[estimator_config, input_biogenerator_config])
-#         assert_click_runner_result(result)
-
-#         print("This test will not stop running. You should kill the process!")
-#         result = runner.invoke(
-#             eval_script, args=[estimator_config,
-#                                input_biogenerator_config,
-#                                '--force-re-run'])
-#         assert_click_runner_result(result)
-
-#     finally:
-#         try:
-#             shutil.rmtree(tmpdir)
-#         except Exception:
-#             pass
diff --git a/bob/learn/tensorflow/test/test_estimator_siamese.py b/bob/learn/tensorflow/test/test_estimator_siamese.py
deleted file mode 100644
index ee324520e4be30eb0a6135ed2cae8d290a263334..0000000000000000000000000000000000000000
--- a/bob/learn/tensorflow/test/test_estimator_siamese.py
+++ /dev/null
@@ -1,264 +0,0 @@
-#!/usr/bin/env python
-# vim: set fileencoding=utf-8 :
-# @author: Tiago de Freitas Pereira <tiago.pereira@idiap.ch>
-
-from nose.plugins.attrib import attr
-import tensorflow as tf
-
-from bob.learn.tensorflow.network import dummy
-from bob.learn.tensorflow.estimators import Siamese, Logits
-
-from bob.learn.tensorflow.dataset.siamese_image import shuffle_data_and_labels_image_augmentation as siamese_batch
-from bob.learn.tensorflow.dataset.image import shuffle_data_and_labels_image_augmentation as single_batch
-
-from bob.learn.tensorflow.loss import contrastive_loss, mean_cross_entropy_loss
-from bob.learn.tensorflow.utils.hooks import LoggerHookEstimator
-from .test_estimator_transfer import dummy_adapted
-from bob.learn.tensorflow.utils import reproducible
-
-import pkg_resources
-import shutil
-
-
-# Fixing problem with MAC https://github.com/dmlc/xgboost/issues/1715
-import os
-os.environ['KMP_DUPLICATE_LIB_OK']='True'
-
-tfrecord_train = "./train_mnist.tfrecord"
-tfrecord_validation = "./validation_mnist.tfrecord"
-model_dir = "./temp"
-model_dir_adapted = "./temp2"
-
-learning_rate = 0.0001
-data_shape = (250, 250, 3)  # size of atnt images
-output_shape = (50, 50)
-data_type = tf.float32
-batch_size = 4
-validation_batch_size = 2
-epochs = 1
-steps = 5000
-
-# Data
-filenames = [
-    pkg_resources.resource_filename(
-        __name__, 'data/dummy_image_database/m301_01_p01_i0_0.png'),
-    pkg_resources.resource_filename(
-        __name__, 'data/dummy_image_database/m301_01_p02_i0_0.png'),
-    pkg_resources.resource_filename(
-        __name__, 'data/dummy_image_database/m301_01_p01_i0_0.png'),
-    pkg_resources.resource_filename(
-        __name__, 'data/dummy_image_database/m301_01_p02_i0_0.png'),
-    pkg_resources.resource_filename(
-        __name__, 'data/dummy_image_database/m301_01_p01_i0_0.png'),
-    pkg_resources.resource_filename(
-        __name__, 'data/dummy_image_database/m301_01_p02_i0_0.png'),
-    pkg_resources.resource_filename(
-        __name__, 'data/dummy_image_database/m301_01_p02_i0_0.png'),
-    pkg_resources.resource_filename(
-        __name__, 'data/dummy_image_database/m301_01_p01_i0_0.png'),
-    pkg_resources.resource_filename(
-        __name__, 'data/dummy_image_database/m301_01_p02_i0_0.png'),
-    pkg_resources.resource_filename(
-        __name__, 'data/dummy_image_database/m304_01_p01_i0_0.png'),
-    pkg_resources.resource_filename(
-        __name__, 'data/dummy_image_database/m304_02_f12_i0_0.png'),
-    pkg_resources.resource_filename(
-        __name__, 'data/dummy_image_database/m304_01_p01_i0_0.png'),
-    pkg_resources.resource_filename(
-        __name__, 'data/dummy_image_database/m304_02_f12_i0_0.png'),
-    pkg_resources.resource_filename(
-        __name__, 'data/dummy_image_database/m304_01_p01_i0_0.png'),
-    pkg_resources.resource_filename(
-        __name__, 'data/dummy_image_database/m304_02_f12_i0_0.png'),
-    pkg_resources.resource_filename(
-        __name__, 'data/dummy_image_database/m304_01_p01_i0_0.png'),
-    pkg_resources.resource_filename(
-        __name__, 'data/dummy_image_database/m304_02_f12_i0_0.png'),
-    pkg_resources.resource_filename(
-        __name__, 'data/dummy_image_database/m304_02_f12_i0_0.png'),
-]
-labels = [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1]
-
-# @attr('slow')
-# def test_siamesetrainer():
-#     # Trainer logits
-#     try:
-
-#         # Setting seed
-#         session_config, run_config, _, _, _ = reproducible.set_seed()
-#         run_config = run_config.replace(save_checkpoints_steps=500)
-
-#         trainer = Siamese(
-#             model_dir=model_dir,
-#             architecture=dummy,
-#             config=run_config,
-#             optimizer=tf.train.GradientDescentOptimizer(learning_rate),
-#             loss_op=contrastive_loss,
-#             validation_batch_size=validation_batch_size)
-#         run_siamesetrainer(trainer)
-#     finally:
-#         try:
-#             shutil.rmtree(model_dir, ignore_errors=True)
-#             # pass
-#         except Exception:
-#             pass
-
-# @attr('slow')
-# def test_siamesetrainer_transfer():
-#     def logits_input_fn():
-#         return single_batch(
-#             filenames,
-#             labels,
-#             data_shape,
-#             data_type,
-#             batch_size,
-#             epochs=epochs,
-#             output_shape=output_shape)
-
-#     # Trainer logits first than siamese
-#     try:
-#         # Setting seed
-#         session_config, run_config, _, _, _ = reproducible.set_seed()
-#         run_config = run_config.replace(save_checkpoints_steps=500)
-
-#         extra_checkpoint = {
-#             "checkpoint_path": model_dir,
-#             "scopes": dict({
-#                 "Dummy/": "Dummy/"
-#             }),
-#             "trainable_variables": []
-#         }
-
-#         # LOGISTS
-#         logits_trainer = Logits(
-#             model_dir=model_dir,
-#             architecture=dummy,
-#             optimizer=tf.train.GradientDescentOptimizer(learning_rate),
-#             n_classes=2,
-#             config=run_config,
-#             loss_op=mean_cross_entropy_loss,
-#             embedding_validation=False,
-#             validation_batch_size=validation_batch_size)
-#         logits_trainer.train(logits_input_fn, steps=steps)
-
-#         # NOW THE FUCKING SIAMESE
-#         trainer = Siamese(
-#             model_dir=model_dir_adapted,
-#             architecture=dummy_adapted,
-#             optimizer=tf.train.GradientDescentOptimizer(learning_rate),
-#             config=run_config,
-#             loss_op=contrastive_loss,
-#             validation_batch_size=validation_batch_size,
-#             extra_checkpoint=extra_checkpoint)
-#         run_siamesetrainer(trainer)
-#     finally:
-#         try:
-#             shutil.rmtree(model_dir, ignore_errors=True)
-#             shutil.rmtree(model_dir_adapted, ignore_errors=True)
-#         except Exception:
-#             pass
-
-# @attr('slow')
-# def test_siamesetrainer_transfer_extraparams():
-#     def logits_input_fn():
-#         return single_batch(
-#             filenames,
-#             labels,
-#             data_shape,
-#             data_type,
-#             batch_size,
-#             epochs=epochs,
-#             output_shape=output_shape)
-
-#     # Trainer logits first than siamese
-#     try:
-
-#         extra_checkpoint = {
-#             "checkpoint_path": model_dir,
-#             "scopes": dict({
-#                 "Dummy/": "Dummy/"
-#             }),
-#             "trainable_variables": ["Dummy"]
-#         }
-
-#         # Setting seed
-#         session_config, run_config, _, _, _ = reproducible.set_seed()
-#         run_config = run_config.replace(save_checkpoints_steps=500)
-
-#         # LOGISTS
-#         logits_trainer = Logits(
-#             model_dir=model_dir,
-#             architecture=dummy,
-#             optimizer=tf.train.GradientDescentOptimizer(learning_rate),
-#             n_classes=2,
-#             config=run_config,
-#             loss_op=mean_cross_entropy_loss,
-#             embedding_validation=False,
-#             validation_batch_size=validation_batch_size)
-
-#         logits_trainer.train(logits_input_fn, steps=steps)
-
-#         # NOW THE FUCKING SIAMESE
-#         trainer = Siamese(
-#             model_dir=model_dir_adapted,
-#             architecture=dummy_adapted,
-#             optimizer=tf.train.GradientDescentOptimizer(learning_rate),
-#             loss_op=contrastive_loss,
-#             config=run_config,
-#             validation_batch_size=validation_batch_size,
-#             extra_checkpoint=extra_checkpoint)
-#         run_siamesetrainer(trainer)
-#     finally:
-#         try:
-#             shutil.rmtree(model_dir, ignore_errors=True)
-#             shutil.rmtree(model_dir_adapted, ignore_errors=True)
-#         except Exception:
-#             pass
-
-
-def run_siamesetrainer(trainer):
-    # Cleaning up
-    tf.reset_default_graph()
-    assert len(tf.global_variables()) == 0
-
-    def input_fn():
-        return siamese_batch(
-            filenames,
-            labels,
-            data_shape,
-            data_type,
-            batch_size,
-            epochs=epochs,
-            output_shape=output_shape,
-            random_flip=True,
-            random_brightness=True,
-            random_contrast=True,
-            random_saturation=True)
-
-    def input_validation_fn():
-        return single_batch(
-            filenames,
-            labels,
-            data_shape,
-            data_type,
-            validation_batch_size,
-            epochs=10,
-            output_shape=output_shape)
-
-    hooks = [
-        LoggerHookEstimator(trainer, batch_size, 300),
-        tf.train.SummarySaverHook(
-            save_steps=1000,
-            output_dir=model_dir,
-            scaffold=tf.train.Scaffold(),
-            summary_writer=tf.summary.FileWriter(model_dir))
-    ]
-
-    trainer.train(input_fn, steps=1, hooks=hooks)
-
-    acc = trainer.evaluate(input_validation_fn)
-    assert acc['accuracy'] > 0.3
-
-    # Cleaning up
-    tf.reset_default_graph()
-    assert len(tf.global_variables()) == 0
diff --git a/bob/learn/tensorflow/test/test_estimator_transfer.py b/bob/learn/tensorflow/test/test_estimator_transfer.py
deleted file mode 100644
index 97e692f85adad13b3a887071c0b410341ad4ac11..0000000000000000000000000000000000000000
--- a/bob/learn/tensorflow/test/test_estimator_transfer.py
+++ /dev/null
@@ -1,181 +0,0 @@
-#!/usr/bin/env python
-# vim: set fileencoding=utf-8 :
-# @author: Tiago de Freitas Pereira <tiago.pereira@idiap.ch>
-
-from nose.plugins.attrib import attr
-import tensorflow as tf
-
-from bob.learn.tensorflow.network import dummy
-from bob.learn.tensorflow.estimators import Logits, LogitsCenterLoss
-from bob.learn.tensorflow.utils import reproducible
-from bob.learn.tensorflow.loss import mean_cross_entropy_loss
-from .test_estimator_onegraph import run_logitstrainer_mnist
-
-import shutil
-import os
-
-# Fixing problem with MAC https://github.com/dmlc/xgboost/issues/1715
-os.environ['KMP_DUPLICATE_LIB_OK']='True'
-
-tfrecord_train = "./train_mnist.tfrecord"
-tfrecord_validation = "./validation_mnist.tfrecord"
-model_dir = "./temp"
-model_dir_adapted = "./temp2"
-
-learning_rate = 0.1
-data_shape = (28, 28, 1)  # size of atnt images
-data_type = tf.float32
-batch_size = 32
-validation_batch_size = 250
-epochs = 6
-steps = 5000
-
-
-def dummy_adapted(inputs,
-                  reuse=False,
-                  mode=tf.estimator.ModeKeys.TRAIN,
-                  trainable_variables=None,
-                  **kwargs):
-    """
-    Create all the necessary variables for this CNN
-
-    Parameters
-    ----------
-        inputs:
-
-        reuse:
-
-        mode:
-
-        trainable_variables:
-    """
-
-    slim = tf.contrib.slim
-    graph, end_points = dummy(
-        inputs,
-        reuse=reuse,
-        mode=mode,
-        trainable_variables=trainable_variables)
-
-    initializer = tf.contrib.layers.xavier_initializer()
-    with tf.variable_scope('Adapted', reuse=reuse):
-        name = 'fc2'
-        graph = slim.fully_connected(
-            graph,
-            50,
-            weights_initializer=initializer,
-            activation_fn=tf.nn.relu,
-            scope=name,
-            trainable=True)
-        end_points[name] = graph
-
-        name = 'fc3'
-        graph = slim.fully_connected(
-            graph,
-            25,
-            weights_initializer=initializer,
-            activation_fn=None,
-            scope=name,
-            trainable=True)
-        end_points[name] = graph
-
-    return graph, end_points
-
-# @attr('slow')
-# def test_logitstrainer():
-#     # Trainer logits
-#     try:
-#         _, run_config, _, _, _ = reproducible.set_seed()
-#         embedding_validation = False
-#         trainer = Logits(
-#             model_dir=model_dir,
-#             architecture=dummy,
-#             optimizer=tf.train.GradientDescentOptimizer(learning_rate),
-#             n_classes=10,
-#             loss_op=mean_cross_entropy_loss,
-#             embedding_validation=embedding_validation,
-#             validation_batch_size=validation_batch_size,
-#             config=run_config)
-#         run_logitstrainer_mnist(trainer, augmentation=True)
-#         del trainer
-
-#         ## Again
-#         extra_checkpoint = {
-#             "checkpoint_path": "./temp",
-#             "scopes": dict({
-#                 "Dummy/": "Dummy/"
-#             }),
-#             "trainable_variables": []
-#         }
-
-#         trainer = Logits(
-#             model_dir=model_dir_adapted,
-#             architecture=dummy_adapted,
-#             optimizer=tf.train.GradientDescentOptimizer(learning_rate),
-#             n_classes=10,
-#             loss_op=mean_cross_entropy_loss,
-#             embedding_validation=embedding_validation,
-#             validation_batch_size=validation_batch_size,
-#             extra_checkpoint=extra_checkpoint,
-#             config=run_config)
-
-#         run_logitstrainer_mnist(trainer, augmentation=True)
-
-#     finally:
-#         try:
-#             os.unlink(tfrecord_train)
-#             os.unlink(tfrecord_validation)
-#             shutil.rmtree(model_dir, ignore_errors=True)
-#             shutil.rmtree(model_dir_adapted, ignore_errors=True)
-#             pass
-#         except Exception:
-#             pass
-
-# @attr('slow')
-# def test_logitstrainer_center_loss():
-#     # Trainer logits
-#     try:
-#         embedding_validation = False
-#         _, run_config, _, _, _ = reproducible.set_seed()
-#         trainer = LogitsCenterLoss(
-#             model_dir=model_dir,
-#             architecture=dummy,
-#             optimizer=tf.train.GradientDescentOptimizer(learning_rate),
-#             n_classes=10,
-#             embedding_validation=embedding_validation,
-#             validation_batch_size=validation_batch_size,
-#             apply_moving_averages=False,
-#             config=run_config)
-#         run_logitstrainer_mnist(trainer, augmentation=True)
-#         del trainer
-
-#         ## Again
-#         extra_checkpoint = {
-#             "checkpoint_path": "./temp",
-#             "scopes": dict({
-#                 "Dummy/": "Dummy/"
-#             }),
-#             "trainable_variables": ["Dummy"]
-#         }
-
-#         trainer = LogitsCenterLoss(
-#             model_dir=model_dir_adapted,
-#             architecture=dummy_adapted,
-#             optimizer=tf.train.GradientDescentOptimizer(learning_rate),
-#             n_classes=10,
-#             embedding_validation=embedding_validation,
-#             validation_batch_size=validation_batch_size,
-#             extra_checkpoint=extra_checkpoint,
-#             apply_moving_averages=False,
-#             config=run_config)
-
-#         run_logitstrainer_mnist(trainer, augmentation=True)
-
-#     finally:
-#         try:
-#             os.unlink(tfrecord_train)
-#             os.unlink(tfrecord_validation)
-#             shutil.rmtree(model_dir, ignore_errors=True)
-#             shutil.rmtree(model_dir_adapted, ignore_errors=True)
-#         except Exception:
-#             pass
diff --git a/bob/learn/tensorflow/test/test_estimator_triplet.py b/bob/learn/tensorflow/test/test_estimator_triplet.py
deleted file mode 100644
index a376c4c4c2a630dff32c66be2c64c13c06f47692..0000000000000000000000000000000000000000
--- a/bob/learn/tensorflow/test/test_estimator_triplet.py
+++ /dev/null
@@ -1,194 +0,0 @@
-#!/usr/bin/env python
-# vim: set fileencoding=utf-8 :
-# @author: Tiago de Freitas Pereira <tiago.pereira@idiap.ch>
-
-from nose.plugins.attrib import attr
-import tensorflow as tf
-
-from bob.learn.tensorflow.network import dummy
-from bob.learn.tensorflow.estimators import Triplet, Logits
-from bob.learn.tensorflow.dataset.triplet_image import shuffle_data_and_labels_image_augmentation as triplet_batch
-from bob.learn.tensorflow.dataset.image import shuffle_data_and_labels_image_augmentation as single_batch
-
-from bob.learn.tensorflow.loss import triplet_loss, mean_cross_entropy_loss
-from bob.learn.tensorflow.utils.hooks import LoggerHookEstimator
-from bob.learn.tensorflow.utils import reproducible
-import pkg_resources
-from .test_estimator_transfer import dummy_adapted
-
-import shutil
-
-# Fixing problem with MAC https://github.com/dmlc/xgboost/issues/1715
-import os
-os.environ['KMP_DUPLICATE_LIB_OK']='True'
-
-tfrecord_train = "./train_mnist.tfrecord"
-tfrecord_validation = "./validation_mnist.tfrecord"
-model_dir = "./temp"
-model_dir_adapted = "./temp2"
-
-learning_rate = 0.001
-data_shape = (250, 250, 3)  # size of atnt images
-output_shape = (50, 50)
-data_type = tf.float32
-batch_size = 4
-validation_batch_size = 2
-epochs = 1
-steps = 5000
-
-# Data
-filenames = [
-    pkg_resources.resource_filename(
-        __name__, 'data/dummy_image_database/m301_01_p01_i0_0.png'),
-    pkg_resources.resource_filename(
-        __name__, 'data/dummy_image_database/m301_01_p02_i0_0.png'),
-    pkg_resources.resource_filename(
-        __name__, 'data/dummy_image_database/m301_01_p01_i0_0.png'),
-    pkg_resources.resource_filename(
-        __name__, 'data/dummy_image_database/m301_01_p02_i0_0.png'),
-    pkg_resources.resource_filename(
-        __name__, 'data/dummy_image_database/m301_01_p01_i0_0.png'),
-    pkg_resources.resource_filename(
-        __name__, 'data/dummy_image_database/m301_01_p02_i0_0.png'),
-    pkg_resources.resource_filename(
-        __name__, 'data/dummy_image_database/m301_01_p02_i0_0.png'),
-    pkg_resources.resource_filename(
-        __name__, 'data/dummy_image_database/m301_01_p01_i0_0.png'),
-    pkg_resources.resource_filename(
-        __name__, 'data/dummy_image_database/m301_01_p02_i0_0.png'),
-    pkg_resources.resource_filename(
-        __name__, 'data/dummy_image_database/m304_01_p01_i0_0.png'),
-    pkg_resources.resource_filename(
-        __name__, 'data/dummy_image_database/m304_02_f12_i0_0.png'),
-    pkg_resources.resource_filename(
-        __name__, 'data/dummy_image_database/m304_01_p01_i0_0.png'),
-    pkg_resources.resource_filename(
-        __name__, 'data/dummy_image_database/m304_02_f12_i0_0.png'),
-    pkg_resources.resource_filename(
-        __name__, 'data/dummy_image_database/m304_01_p01_i0_0.png'),
-    pkg_resources.resource_filename(
-        __name__, 'data/dummy_image_database/m304_02_f12_i0_0.png'),
-    pkg_resources.resource_filename(
-        __name__, 'data/dummy_image_database/m304_01_p01_i0_0.png'),
-    pkg_resources.resource_filename(
-        __name__, 'data/dummy_image_database/m304_02_f12_i0_0.png'),
-    pkg_resources.resource_filename(
-        __name__, 'data/dummy_image_database/m304_02_f12_i0_0.png'),
-]
-labels = [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1]
-
-# @attr('slow')
-# def test_triplet_estimator():
-#     # Trainer logits
-#     try:
-#         trainer = Triplet(
-#             model_dir=model_dir,
-#             architecture=dummy,
-#             optimizer=tf.train.GradientDescentOptimizer(learning_rate),
-#             loss_op=triplet_loss,
-#             validation_batch_size=validation_batch_size)
-#         run_triplet_estimator(trainer)
-#     finally:
-#         try:
-#             shutil.rmtree(model_dir, ignore_errors=True)
-#             # pass
-#         except Exception:
-#             pass
-
-# @attr('slow')
-# def test_triplettrainer_transfer():
-#     def logits_input_fn():
-#         return single_batch(
-#             filenames,
-#             labels,
-#             data_shape,
-#             data_type,
-#             batch_size,
-#             epochs=epochs,
-#             output_shape=output_shape)
-
-#     # Trainer logits first than siamese
-#     try:
-
-#         extra_checkpoint = {
-#             "checkpoint_path": model_dir,
-#             "scopes": dict({
-#                 "Dummy/": "Dummy/"
-#             }),
-#             "trainable_variables": []
-#         }
-
-#         # LOGISTS
-#         logits_trainer = Logits(
-#             model_dir=model_dir,
-#             architecture=dummy,
-#             optimizer=tf.train.GradientDescentOptimizer(learning_rate),
-#             n_classes=2,
-#             loss_op=mean_cross_entropy_loss,
-#             embedding_validation=False,
-#             validation_batch_size=validation_batch_size)
-#         logits_trainer.train(logits_input_fn, steps=steps)
-
-#         # NOW THE FUCKING SIAMESE
-#         trainer = Triplet(
-#             model_dir=model_dir_adapted,
-#             architecture=dummy_adapted,
-#             optimizer=tf.train.GradientDescentOptimizer(learning_rate),
-#             loss_op=triplet_loss,
-#             validation_batch_size=validation_batch_size,
-#             extra_checkpoint=extra_checkpoint)
-#         run_triplet_estimator(trainer)
-#     finally:
-#         try:
-#             shutil.rmtree(model_dir, ignore_errors=True)
-#             shutil.rmtree(model_dir_adapted, ignore_errors=True)
-#         except Exception:
-#             pass
-
-
-def run_triplet_estimator(trainer):
-    # Cleaning up
-    tf.reset_default_graph()
-    assert len(tf.global_variables()) == 0
-
-    def input_fn():
-        return triplet_batch(
-            filenames,
-            labels,
-            data_shape,
-            data_type,
-            batch_size,
-            epochs=epochs,
-            output_shape=output_shape,
-            random_flip=True,
-            random_brightness=True,
-            random_contrast=True,
-            random_saturation=True)
-
-    def input_validation_fn():
-        return single_batch(
-            filenames,
-            labels,
-            data_shape,
-            data_type,
-            validation_batch_size,
-            epochs=10,
-            output_shape=output_shape)
-
-    hooks = [
-        LoggerHookEstimator(trainer, batch_size, 300),
-        tf.train.SummarySaverHook(
-            save_steps=1000,
-            output_dir=model_dir,
-            scaffold=tf.train.Scaffold(),
-            summary_writer=tf.summary.FileWriter(model_dir))
-    ]
-
-    trainer.train(input_fn, steps=steps, hooks=hooks)
-
-    acc = trainer.evaluate(input_validation_fn)
-    assert acc['accuracy'] > 0.3
-
-    # Cleaning up
-    tf.reset_default_graph()
-    assert len(tf.global_variables()) == 0
diff --git a/bob/learn/tensorflow/test/test_hooks.py b/bob/learn/tensorflow/test/test_hooks.py
deleted file mode 100644
index 62013907744764b930f508a20727fb6bc2faca75..0000000000000000000000000000000000000000
--- a/bob/learn/tensorflow/test/test_hooks.py
+++ /dev/null
@@ -1,67 +0,0 @@
-from bob.extension.config import load as read_config_files
-from bob.io.base.test_utils import datafile
-from bob.learn.tensorflow.estimators import Logits
-from bob.learn.tensorflow.loss.BaseLoss import mean_cross_entropy_loss
-from bob.learn.tensorflow.utils.hooks import EarlyStopping, EarlyStopException
-import nose
-import tensorflow as tf
-import shutil
-from nose.plugins.attrib import attr
-
-
-# @nose.tools.raises(EarlyStopException)
-# @attr('slow')
-# def test_early_stopping_linear_classifier():
-#     config = read_config_files([
-#         datafile('mnist_input_fn.py', __name__),
-#         datafile('mnist_estimator.py', __name__),
-#     ])
-#     estimator = config.estimator
-#     train_input_fn = config.train_input_fn
-#     eval_input_fn = config.eval_input_fn
-
-#     hooks = [
-#         EarlyStopping(
-#             'linear/metrics/accuracy/total', min_delta=0.001, patience=1),
-#     ]
-
-#     train_spec = tf.estimator.TrainSpec(input_fn=train_input_fn)
-#     eval_spec = tf.estimator.EvalSpec(
-#         input_fn=eval_input_fn, hooks=hooks, throttle_secs=2, steps=10)
-
-#     try:
-#         tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)
-#     finally:
-#         shutil.rmtree(estimator.model_dir)
-
-
-# @nose.tools.raises(EarlyStopException)
-# @attr('slow')
-# def test_early_stopping_logit_trainer():
-#     config = read_config_files([
-#         datafile('mnist_input_fn.py', __name__),
-#     ])
-#     train_input_fn = config.train_input_fn
-#     eval_input_fn = config.eval_input_fn
-
-#     hooks = [
-#         EarlyStopping('accuracy/value', min_delta=0.001, patience=1),
-#     ]
-
-#     train_spec = tf.estimator.TrainSpec(input_fn=train_input_fn)
-#     eval_spec = tf.estimator.EvalSpec(
-#         input_fn=eval_input_fn, hooks=hooks, throttle_secs=2, steps=10)
-
-#     def architecture(data, mode, **kwargs):
-#         return data, dict()
-
-#     optimizer = tf.train.GradientDescentOptimizer(learning_rate=1e-1)
-#     loss_op = mean_cross_entropy_loss
-
-#     estimator = Logits(
-#         architecture, optimizer, loss_op, n_classes=10, model_dir=None)
-
-#     try:
-#         tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)
-#     finally:
-#         shutil.rmtree(estimator.model_dir)
diff --git a/bob/learn/tensorflow/test/test_image_dataset.py b/bob/learn/tensorflow/test/test_image_dataset.py
deleted file mode 100644
index 959c219dbb5282db842ac48e80d05aa8fd591662..0000000000000000000000000000000000000000
--- a/bob/learn/tensorflow/test/test_image_dataset.py
+++ /dev/null
@@ -1,104 +0,0 @@
-#!/usr/bin/env python
-# vim: set fileencoding=utf-8 :
-# @author: Tiago de Freitas Pereira <tiago.pereira@idiap.ch>
-
-import tensorflow as tf
-
-from bob.learn.tensorflow.network import dummy
-from bob.learn.tensorflow.estimators import Logits, LogitsCenterLoss
-
-from bob.learn.tensorflow.dataset.image import shuffle_data_and_labels_image_augmentation
-import pkg_resources
-
-from bob.learn.tensorflow.utils.hooks import LoggerHookEstimator
-from bob.learn.tensorflow.loss import mean_cross_entropy_loss
-from nose.plugins.attrib import attr
-
-import shutil
-import os
-
-model_dir = "./temp"
-
-learning_rate = 0.1
-data_shape = (250, 250, 3)  # size of atnt images
-data_type = tf.float32
-batch_size = 16
-validation_batch_size = 250
-epochs = 1
-steps = 5000
-
-@attr('slow')
-def test_logitstrainer_images():
-    # Trainer logits
-    try:
-        embedding_validation = False
-        trainer = Logits(
-            model_dir=model_dir,
-            architecture=dummy,
-            optimizer=tf.train.GradientDescentOptimizer(learning_rate),
-            n_classes=10,
-            loss_op=mean_cross_entropy_loss,
-            embedding_validation=embedding_validation,
-            validation_batch_size=validation_batch_size,
-            apply_moving_averages=False)
-        run_logitstrainer_images(trainer)
-    finally:
-        try:
-            shutil.rmtree(model_dir, ignore_errors=True)
-        except Exception:
-            pass
-
-
-def run_logitstrainer_images(trainer):
-    # Cleaning up
-    tf.reset_default_graph()
-    assert len(tf.global_variables()) == 0
-
-    filenames = [
-        pkg_resources.resource_filename(
-            __name__, 'data/dummy_image_database/m301_01_p01_i0_0.png'),
-        pkg_resources.resource_filename(
-            __name__, 'data/dummy_image_database/m301_01_p02_i0_0.png'),
-        pkg_resources.resource_filename(
-            __name__, 'data/dummy_image_database/m304_01_p01_i0_0.png'),
-        pkg_resources.resource_filename(
-            __name__, 'data/dummy_image_database/m304_02_f12_i0_0.png')
-    ]
-    labels = [0, 0, 1, 1]
-
-    def input_fn():
-
-        return shuffle_data_and_labels_image_augmentation(
-            filenames,
-            labels,
-            data_shape,
-            data_type,
-            batch_size,
-            epochs=epochs)
-
-    def input_fn_validation():
-        return shuffle_data_and_labels_image_augmentation(
-            filenames,
-            labels,
-            data_shape,
-            data_type,
-            validation_batch_size,
-            epochs=1000)
-
-    hooks = [
-        LoggerHookEstimator(trainer, 16, 300),
-        tf.train.SummarySaverHook(
-            save_steps=1000,
-            output_dir=model_dir,
-            scaffold=tf.train.Scaffold(),
-            summary_writer=tf.summary.FileWriter(model_dir))
-    ]
-
-    trainer.train(input_fn, steps=steps, hooks=hooks)
-
-    acc = trainer.evaluate(input_fn_validation)
-    assert acc['accuracy'] > 0.30, acc['accuracy']
-
-    # Cleaning up
-    tf.reset_default_graph()
-    assert len(tf.global_variables()) == 0
diff --git a/bob/learn/tensorflow/test/test_layers.py b/bob/learn/tensorflow/test/test_layers.py
deleted file mode 100644
index 43064bde7f07b9424cb91d7c9bae99087bb58456..0000000000000000000000000000000000000000
--- a/bob/learn/tensorflow/test/test_layers.py
+++ /dev/null
@@ -1,41 +0,0 @@
-#!/usr/bin/env python
-# vim: set fileencoding=utf-8 :
-# @author: Tiago de Freitas Pereira <tiago.pereira@idiap.ch>
-# @date: Thu 13 Oct 2016 13:35 CEST
-
-import tensorflow as tf
-import numpy as np
-from bob.learn.tensorflow.layers import maxout
-from nose.tools import assert_raises_regexp
-
-slim = tf.contrib.slim
-
-
-def test_fully_connected():
-    tf.reset_default_graph()
-    x = np.zeros([64, 50])
-    graph = slim.fully_connected(x, 50, activation_fn=None)
-    graph = maxout(graph, num_units=10)
-    assert graph.get_shape().as_list() == [64, 10]
-    tf.reset_default_graph()
-    assert len(tf.global_variables()) == 0
-
-
-def test_nchw():
-    tf.reset_default_graph()
-    x = np.random.uniform(size=(10, 100, 100, 3)).astype(np.float32)
-    graph = slim.conv2d(x, 10, [3, 3])
-    graph = maxout(graph, num_units=1)
-    assert graph.get_shape().as_list() == [10, 100, 100, 1]
-    tf.reset_default_graph()
-    assert len(tf.global_variables()) == 0
-
-
-def test_invalid_shape():
-    tf.reset_default_graph()
-    x = np.random.uniform(size=(10, 100, 100, 3)).astype(np.float32)
-    graph = slim.conv2d(x, 3, [3, 3])
-    with assert_raises_regexp(ValueError, 'number of features'):
-        graph = maxout(graph, num_units=2)
-    tf.reset_default_graph()
-    assert len(tf.global_variables()) == 0
diff --git a/bob/learn/tensorflow/test/test_loss.py b/bob/learn/tensorflow/test/test_loss.py
deleted file mode 100644
index 92d94c8f1812512b23167be086807a0ffe65a0ce..0000000000000000000000000000000000000000
--- a/bob/learn/tensorflow/test/test_loss.py
+++ /dev/null
@@ -1,76 +0,0 @@
-#!/usr/bin/env python
-# vim: set fileencoding=utf-8 :
-# @author: Tiago de Freitas Pereira <tiago.pereira@idiap.ch>
-
-import tensorflow as tf
-import numpy
-from bob.learn.tensorflow.loss import balanced_softmax_cross_entropy_loss_weights,\
-                                      balanced_sigmoid_cross_entropy_loss_weights
-
-
-def test_balanced_softmax_cross_entropy_loss_weights():
-    labels = numpy.array([[1, 0, 0],
-                          [1, 0, 0],
-                          [0, 0, 1],
-                          [0, 1, 0],
-                          [0, 0, 1],
-                          [1, 0, 0],
-                          [1, 0, 0],
-                          [0, 0, 1],
-                          [1, 0, 0],
-                          [1, 0, 0],
-                          [1, 0, 0],
-                          [1, 0, 0],
-                          [1, 0, 0],
-                          [1, 0, 0],
-                          [0, 1, 0],
-                          [1, 0, 0],
-                          [0, 1, 0],
-                          [1, 0, 0],
-                          [0, 0, 1],
-                          [0, 0, 1],
-                          [1, 0, 0],
-                          [0, 0, 1],
-                          [1, 0, 0],
-                          [1, 0, 0],
-                          [0, 1, 0],
-                          [1, 0, 0],
-                          [1, 0, 0],
-                          [1, 0, 0],
-                          [0, 1, 0],
-                          [1, 0, 0],
-                          [0, 0, 1],
-                          [1, 0, 0]], dtype="int32")
-
-    with tf.Session() as session:
-        weights = session.run(balanced_softmax_cross_entropy_loss_weights(labels))
- 
-    expected_weights = numpy.array([0.53333336, 0.53333336, 1.5238096 , 2.1333334,\
-                                    1.5238096 , 0.53333336, 0.53333336, 1.5238096,\
-                                    0.53333336, 0.53333336, 0.53333336, 0.53333336,\
-                                    0.53333336, 0.53333336, 2.1333334 , 0.53333336,\
-                                    2.1333334 , 0.53333336, 1.5238096 , 1.5238096 ,\
-                                    0.53333336, 1.5238096 , 0.53333336, 0.53333336,\
-                                    2.1333334 , 0.53333336, 0.53333336, 0.53333336,\
-                                    2.1333334 , 0.53333336, 1.5238096 , 0.53333336],\
-                                    dtype="float32")
-
-    assert numpy.allclose(weights, expected_weights)
-
-
-def test_balanced_sigmoid_cross_entropy_loss_weights():
-    labels = numpy.array([1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0,
-                          1, 1, 0, 1, 1, 1, 0, 1, 0, 1], dtype="int32")
-    
-    with tf.Session() as session:
-        weights = session.run(balanced_sigmoid_cross_entropy_loss_weights(labels, dtype='float32'))
-        
-    expected_weights = numpy.array([0.8, 0.8, 1.3333334, 1.3333334, 1.3333334, 0.8,
-                                    0.8, 1.3333334, 0.8, 0.8, 0.8, 0.8,
-                                    0.8, 0.8, 1.3333334, 0.8, 1.3333334, 0.8,
-                                    1.3333334, 1.3333334, 0.8, 1.3333334, 0.8, 0.8,
-                                    1.3333334, 0.8, 0.8, 0.8, 1.3333334, 0.8,
-                                    1.3333334, 0.8], dtype="float32")
-
-    assert numpy.allclose(weights, expected_weights)
-
diff --git a/bob/learn/tensorflow/test/test_regression.py b/bob/learn/tensorflow/test/test_regression.py
deleted file mode 100644
index 6d8e05ff03e3385d20745ae0b1e625e46c8bde82..0000000000000000000000000000000000000000
--- a/bob/learn/tensorflow/test/test_regression.py
+++ /dev/null
@@ -1,63 +0,0 @@
-from nose.plugins.attrib import attr
-from bob.learn.tensorflow.estimators import Regressor
-from tensorflow import keras
-import tensorflow as tf
-import tensorflow.contrib.slim as slim
-
-# @attr('slow')
-# def test_regressor():
-
-#     boston_housing = keras.datasets.boston_housing
-#     (train_data, train_labels), (test_data,
-#                                  test_labels) = boston_housing.load_data()
-
-#     mean = train_data.mean(axis=0)
-#     std = train_data.std(axis=0)
-#     train_data = (train_data - mean) / std
-#     test_data = (test_data - mean) / std
-
-#     def input_fn(mode):
-#         if mode == tf.estimator.ModeKeys.TRAIN:
-#             features, labels = train_data, train_labels
-#         else:
-#             features, labels, = test_data, test_labels
-#         dataset = tf.data.Dataset.from_tensor_slices(
-#             (features, labels, [str(x) for x in labels]))
-#         dataset = dataset.batch(1)
-#         if mode == tf.estimator.ModeKeys.TRAIN:
-#             dataset = dataset.apply(
-#                 tf.contrib.data.shuffle_and_repeat(len(labels), 2))
-#         data, label, key = dataset.make_one_shot_iterator().get_next()
-#         return {'data': data, 'key': key}, label
-
-#     def train_input_fn():
-#         return input_fn(tf.estimator.ModeKeys.TRAIN)
-
-#     def eval_input_fn():
-#         return input_fn(tf.estimator.ModeKeys.EVAL)
-
-#     def architecture(data, mode, **kwargs):
-#         endpoints = {}
-
-#         with tf.variable_scope('DNN'):
-
-#             name = 'fc1'
-#             net = slim.fully_connected(data, 64, scope=name)
-#             endpoints[name] = net
-
-#             name = 'fc2'
-#             net = slim.fully_connected(net, 64, scope=name)
-#             endpoints[name] = net
-
-#         return net, endpoints
-
-#     estimator = Regressor(architecture)
-
-#     estimator.train(train_input_fn)
-
-#     list(estimator.predict(eval_input_fn))
-
-#     evaluations = estimator.evaluate(eval_input_fn)
-
-#     assert 'rmse' in evaluations
-#     assert 'loss' in evaluations
diff --git a/bob/learn/tensorflow/test/test_style_transfer.py b/bob/learn/tensorflow/test/test_style_transfer.py
deleted file mode 100644
index 0a91f74b0d143a99811b09914be497de8887d5d5..0000000000000000000000000000000000000000
--- a/bob/learn/tensorflow/test/test_style_transfer.py
+++ /dev/null
@@ -1,80 +0,0 @@
-from __future__ import print_function
-import os
-import shutil
-from glob import glob
-from tempfile import mkdtemp
-from click.testing import CliRunner
-from bob.io.base.test_utils import datafile
-import pkg_resources
-
-import tensorflow as tf
-
-from bob.learn.tensorflow.utils import load_mnist, create_mnist_tfrecord
-from bob.learn.tensorflow.utils.hooks import LoggerHookEstimator
-from bob.learn.tensorflow.loss import mean_cross_entropy_loss
-from bob.learn.tensorflow.utils import reproducible
-from .test_estimator_onegraph import run_logitstrainer_mnist
-
-from bob.learn.tensorflow.estimators import Logits
-from bob.learn.tensorflow.network import dummy
-from bob.learn.tensorflow.script.style_transfer import style_transfer
-from nose.plugins.attrib import attr
-
-dummy_config = datafile('style_transfer.py', __name__)
-CONFIG = '''
-from bob.learn.tensorflow.network import dummy
-architecture = dummy
-import pkg_resources
-
-checkpoint_dir = "./temp/"
-
-style_end_points = ["conv1"]
-content_end_points = ["fc1"]
-
-scopes = {"Dummy/":"Dummy/"}
-
-'''
-
-
-#tfrecord_train = "./train_mnist.tfrecord"
-model_dir = "./temp"
-output_style_image = 'output_style.png'
-
-learning_rate = 0.1
-data_shape = (28, 28, 1)  # size of atnt images
-data_type = tf.float32
-batch_size = 32
-epochs = 1
-steps = 100
-
-# @attr('slow')
-# def test_style_transfer():
-#     with open(dummy_config, 'w') as f:
-#         f.write(CONFIG)
-
-#     # Trainer logits
-
-#     # CREATING FAKE MODEL USING MNIST
-#     _, run_config,_,_,_ = reproducible.set_seed()
-#     trainer = Logits(
-#         model_dir=model_dir,
-#         architecture=dummy,
-#         optimizer=tf.train.GradientDescentOptimizer(learning_rate),
-#         n_classes=10,
-#         loss_op=mean_cross_entropy_loss,
-#         config=run_config)
-#     run_logitstrainer_mnist(trainer)
-
-#     # Style transfer using this fake model
-#     runner = CliRunner()
-#     result = runner.invoke(style_transfer,
-#                            args=[pkg_resources.resource_filename( __name__, 'data/dummy_image_database/m301_01_p01_i0_0_GRAY.png'),
-#                                output_style_image, dummy_config])
-
-#     try:
-#         os.unlink(dummy_config)
-#         shutil.rmtree(model_dir, ignore_errors=True)
-#     except Exception:
-#         pass
-
-
diff --git a/bob/learn/tensorflow/test/test_utils.py b/bob/learn/tensorflow/test/test_utils.py
deleted file mode 100644
index e08b5311581c25f46dccc47b78a8a73549ab5142..0000000000000000000000000000000000000000
--- a/bob/learn/tensorflow/test/test_utils.py
+++ /dev/null
@@ -1,64 +0,0 @@
-#!/usr/bin/env python
-# vim: set fileencoding=utf-8 :
-# @author: Tiago de Freitas Pereira <tiago.pereira@idiap.ch>
-
-import numpy
-from bob.learn.tensorflow.utils import compute_embedding_accuracy, \
-     compute_embedding_accuracy_tensors
-
-import tensorflow as tf
-"""
-Some unit tests for the datashuffler
-"""
-
-
-def test_embedding_accuracy():
-
-    numpy.random.seed(10)
-    samples_per_class = 5
-
-    class_a = numpy.random.normal(
-        loc=0, scale=0.1, size=(samples_per_class, 2))
-    labels_a = numpy.zeros(samples_per_class)
-
-    class_b = numpy.random.normal(
-        loc=10, scale=0.1, size=(samples_per_class, 2))
-    labels_b = numpy.ones(samples_per_class)
-
-    data = numpy.vstack((class_a, class_b))
-    labels = numpy.concatenate((labels_a, labels_b))
-
-    assert compute_embedding_accuracy(data, labels) == 1.
-
-    # Adding noise
-    noise = numpy.random.normal(loc=0, scale=0.1, size=(samples_per_class, 2))
-    noise_labels = numpy.ones(samples_per_class)
-
-    data = numpy.vstack((data, noise))
-    labels = numpy.concatenate((labels, noise_labels))
-
-    assert compute_embedding_accuracy(data, labels) == 10 / 15.
-
-
-def test_embedding_accuracy_tensors():
-
-    numpy.random.seed(10)
-    samples_per_class = 5
-
-    class_a = numpy.random.normal(
-        loc=0, scale=0.1, size=(samples_per_class, 2))
-    labels_a = numpy.zeros(samples_per_class)
-
-    class_b = numpy.random.normal(
-        loc=10, scale=0.1, size=(samples_per_class, 2))
-    labels_b = numpy.ones(samples_per_class)
-
-    data = numpy.vstack((class_a, class_b))
-    labels = numpy.concatenate((labels_a, labels_b))
-
-    data = tf.convert_to_tensor(data.astype("float32"))
-    labels = tf.convert_to_tensor(labels.astype("int64"))
-
-    sess = tf.Session()
-    accuracy = sess.run(compute_embedding_accuracy_tensors(data, labels))
-    assert accuracy == 1.
diff --git a/bob/learn/tensorflow/test/__init__.py b/bob/learn/tensorflow/tests/__init__.py
similarity index 100%
rename from bob/learn/tensorflow/test/__init__.py
rename to bob/learn/tensorflow/tests/__init__.py
diff --git a/bob/learn/tensorflow/tests/data/db_to_tfrecords_config.py b/bob/learn/tensorflow/tests/data/db_to_tfrecords_config.py
new file mode 100644
index 0000000000000000000000000000000000000000..52799ddd8bf9cd2c1cbf2f52bab0419c90678d62
--- /dev/null
+++ b/bob/learn/tensorflow/tests/data/db_to_tfrecords_config.py
@@ -0,0 +1,17 @@
+import tensorflow as tf
+from bob.learn.tensorflow.data import dataset_using_generator
+
+mnist = tf.keras.datasets.mnist
+
+(x_train, y_train), (_, _) = mnist.load_data()
+samples = (tf.keras.backend.arange(len(x_train)), x_train, y_train)
+
+
+def reader(sample):
+    data = sample[1]
+    label = sample[2]
+    key = str(sample[0]).encode("utf-8")
+    return ({"data": data, "key": key}, label)
+
+
+dataset = dataset_using_generator(samples, reader)
diff --git a/bob/learn/tensorflow/tests/test_dataset.py b/bob/learn/tensorflow/tests/test_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..2b5d46fa91ac00ad7ac064da962bb729f106eb94
--- /dev/null
+++ b/bob/learn/tensorflow/tests/test_dataset.py
@@ -0,0 +1,18 @@
+import numpy as np
+
+from bob.learn.tensorflow.data import dataset_using_generator
+
+
+def test_dataset_using_generator():
+    def reader(f):
+        key = 0
+        label = 0
+        yield {"data": f, "key": key}, label
+
+    shape = (2, 2, 1)
+    samples = [np.ones(shape, dtype="float32") * i for i in range(10)]
+
+    dataset = dataset_using_generator(samples, reader, multiple_samples=True)
+    for i, sample in enumerate(dataset):
+        assert sample[0]["data"].shape == shape
+        assert np.allclose(sample[0]["data"], samples[i])
diff --git a/bob/learn/tensorflow/tests/test_datasets_to_tfrecords.py b/bob/learn/tensorflow/tests/test_datasets_to_tfrecords.py
new file mode 100644
index 0000000000000000000000000000000000000000..4b6548abab4d14e5b308cb73a96fe7ddacc89d25
--- /dev/null
+++ b/bob/learn/tensorflow/tests/test_datasets_to_tfrecords.py
@@ -0,0 +1,38 @@
+import pkg_resources
+import tensorflow as tf
+from bob.extension.config import load
+from bob.extension.scripts.click_helper import assert_click_runner_result
+from bob.learn.tensorflow.data.tfrecords import dataset_from_tfrecord
+from bob.learn.tensorflow.scripts.datasets_to_tfrecords import datasets_to_tfrecords
+from click.testing import CliRunner
+
+regenerate_reference = False
+
+dummy_config = pkg_resources.resource_filename(
+    "bob.learn.tensorflow", "tests/data/db_to_tfrecords_config.py"
+)
+
+
+def compare_datasets(ds1, ds2):
+    for values1, values2 in zip(ds1, ds2):
+        values1 = tf.nest.flatten(values1)
+        values2 = tf.nest.flatten(values2)
+        for v1, v2 in zip(values1, values2):
+            if not tf.reduce_all(input_tensor=tf.math.equal(v1, v2)):
+                return False
+    return True
+
+
+def test_datasets_to_tfrecords():
+    runner = CliRunner()
+    with runner.isolated_filesystem():
+        output_path = "./test"
+        args = (dummy_config, "--output", output_path)
+        result = runner.invoke(datasets_to_tfrecords, args=args, standalone_mode=False)
+        assert_click_runner_result(result)
+        # read back the tfrecod
+        dataset2 = dataset_from_tfrecord(output_path)
+        dataset1 = load(
+            [dummy_config], attribute_name="dataset", entry_point_group="bob"
+        )
+        assert compare_datasets(dataset1, dataset2)
diff --git a/bob/learn/tensorflow/tests/test_mine.py b/bob/learn/tensorflow/tests/test_mine.py
new file mode 100644
index 0000000000000000000000000000000000000000..89cf943b858bd87e7c61707a9616c7894254c1fd
--- /dev/null
+++ b/bob/learn/tensorflow/tests/test_mine.py
@@ -0,0 +1,33 @@
+import numpy as np
+import tensorflow as tf
+from bob.learn.tensorflow.models import MineModel
+
+def run_mine(is_mine_f):
+    np.random.seed(10)
+    N = 20000
+    d = 1
+    EPOCHS = 100
+
+    X = np.sign(np.random.normal(0.,1.,[N, d]))
+    Z = X + np.random.normal(0.,np.sqrt(0.2),[N, d])
+
+
+    from sklearn.feature_selection import mutual_info_regression
+    mi_numerical = mutual_info_regression(X.reshape(-1, 1), Z.ravel())[0]
+
+    model = MineModel(is_mine_f=is_mine_f)
+    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.01))
+
+    callback = model.fit(
+        x=[X, Z], epochs=EPOCHS, verbose=1, batch_size=100
+    )
+    mine = -np.array(callback.history["loss"])[-1]
+
+    np.allclose(mine,mi_numerical, atol=0.01)
+
+
+def test_mine():
+    run_mine(False)
+
+def test_mine_f():
+    run_mine(True)
\ No newline at end of file
diff --git a/bob/learn/tensorflow/tests/test_utils.py b/bob/learn/tensorflow/tests/test_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..b6ae3aee142ff30fd3bb4ccde5d9aadce8542c76
--- /dev/null
+++ b/bob/learn/tensorflow/tests/test_utils.py
@@ -0,0 +1,26 @@
+import numpy
+import tensorflow as tf
+
+from bob.learn.tensorflow.metrics import EmbeddingAccuracy
+
+
+def test_embedding_accuracy_tensors():
+
+    numpy.random.seed(10)
+    samples_per_class = 5
+    m = EmbeddingAccuracy()
+
+    class_a = numpy.random.normal(loc=0, scale=0.1, size=(samples_per_class, 2))
+    labels_a = numpy.zeros(samples_per_class)
+
+    class_b = numpy.random.normal(loc=10, scale=0.1, size=(samples_per_class, 2))
+    labels_b = numpy.ones(samples_per_class)
+
+    data = numpy.vstack((class_a, class_b))
+    labels = numpy.concatenate((labels_a, labels_b))
+
+    data = tf.convert_to_tensor(value=data.astype("float32"))
+    labels = tf.convert_to_tensor(value=labels.astype("int64"))
+    m(labels, data)
+
+    assert m.result() == 1.0
diff --git a/bob/learn/tensorflow/utils/__init__.py b/bob/learn/tensorflow/utils/__init__.py
index aab52096a4dfd437279f16c55e808b5f9b06e8f1..444a481681f2f06bb9bb06377db7c492e3f201fc 100644
--- a/bob/learn/tensorflow/utils/__init__.py
+++ b/bob/learn/tensorflow/utils/__init__.py
@@ -1,11 +1,3 @@
-from .util import *
-from .singleton import Singleton
-from .session import Session
-from .hooks import *
-from .eval import *
 from .keras import *
-from .train import *
-from .graph import *
-from .network import *
 from .math import *
-from .reproducible import *
+from .image import *
diff --git a/bob/learn/tensorflow/utils/eval.py b/bob/learn/tensorflow/utils/eval.py
deleted file mode 100644
index cf836f6e0d1f6742b10fee3d5fa9390d255f1dab..0000000000000000000000000000000000000000
--- a/bob/learn/tensorflow/utils/eval.py
+++ /dev/null
@@ -1,23 +0,0 @@
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-import tensorflow as tf
-
-
-def get_global_step(path):
-    """Returns the global number associated with the model checkpoint path. The
-    checkpoint must have been saved with the
-    :any:`tf.train.MonitoredTrainingSession`.
-
-    Parameters
-    ----------
-    path : str
-        The path to model checkpoint, usually ckpt.model_checkpoint_path
-
-    Returns
-    -------
-    global_step : int
-        The global step number.
-    """
-    checkpoint_reader = tf.train.NewCheckpointReader(path)
-    return checkpoint_reader.get_tensor(tf.GraphKeys.GLOBAL_STEP)
diff --git a/bob/learn/tensorflow/utils/graph.py b/bob/learn/tensorflow/utils/graph.py
deleted file mode 100644
index e434289d12a4fad5cfa5cfab41d4555fa39fc485..0000000000000000000000000000000000000000
--- a/bob/learn/tensorflow/utils/graph.py
+++ /dev/null
@@ -1,47 +0,0 @@
-import tensorflow as tf
-
-
-def call_on_frozen_graph(
-    graph_def_path,
-    input,
-    return_elements,
-    input_name,
-    name=None,
-    **kwargs
-):
-    """Loads a frozen graph def file (.pb) and replaces its input with the given input
-    and return the requested output tensors.
-
-    Parameters
-    ----------
-    graph_def_path : str
-        Path to the graph definition file
-    input : object
-        Input tensor
-    return_elements : [str]
-        A list of strings which corresponds to operations in the graph.
-    input_name : str, optional
-        The name of input in the graph that will be replaced by input.
-    name : str, optional
-        The scope of the imported operations. Defaults to "import".
-    **kwargs
-        Extra arguments to be passed to tf.import_graph_def
-
-    Returns
-    -------
-    list
-        List of requested operations. Normally you would use
-        ``returned_operations[0].outputs[0]``
-    """
-    with tf.gfile.GFile(graph_def_path, "rb") as f:
-        graph_def = tf.GraphDef()
-        graph_def.ParseFromString(f.read())
-    input_map = {input_name: input}
-
-    return tf.import_graph_def(
-        graph_def,
-        input_map=input_map,
-        return_elements=return_elements,
-        name=name,
-        **kwargs
-    )
diff --git a/bob/learn/tensorflow/utils/hooks.py b/bob/learn/tensorflow/utils/hooks.py
deleted file mode 100644
index 37adae26163d5b8ef0cef188c9408f14b22d7923..0000000000000000000000000000000000000000
--- a/bob/learn/tensorflow/utils/hooks.py
+++ /dev/null
@@ -1,199 +0,0 @@
-from datetime import datetime
-from tensorflow.python.training.basic_session_run_hooks import _as_graph_element
-import logging
-import numpy as np
-import tensorflow as tf
-import time
-
-logger = logging.getLogger(__name__)
-
-
-class TensorSummary(tf.estimator.SessionRunHook):
-    """Adds the given (scalar) tensors to tensorboard summaries"""
-
-    def __init__(self, tensors, tensor_names=None, **kwargs):
-        super().__init__(**kwargs)
-        self.tensors = list(tensors)
-        if tensor_names is None:
-            tensor_names = [t.name for t in self.tensors]
-        self.tensor_names = list(tensor_names)
-
-    def begin(self):
-        for name, tensor in zip(self.tensor_names, self.tensors):
-            tf.summary.scalar(name, tensor)
-
-
-class LoggerHook(tf.estimator.SessionRunHook):
-    """Logs loss and runtime."""
-
-    def __init__(self, loss, batch_size, log_frequency):
-        self.loss = loss
-        self.batch_size = batch_size
-        self.log_frequency = log_frequency
-
-    def begin(self):
-        self._step = -1
-        self._start_time = time.time()
-
-    def before_run(self, run_context):
-        self._step += 1
-        return tf.train.SessionRunArgs(self.loss)  # Asks for loss value.
-
-    def after_run(self, run_context, run_values):
-        if self._step % self.log_frequency == 0:
-            current_time = time.time()
-            duration = current_time - self._start_time
-            self._start_time = current_time
-
-            loss_value = run_values.results
-            examples_per_sec = self.log_frequency * self.batch_size / duration
-            sec_per_batch = float(duration / self.log_frequency)
-
-            format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f '
-                          'sec/batch)')
-            print(format_str % (datetime.now(), self._step, loss_value,
-                                examples_per_sec, sec_per_batch))
-
-
-class LoggerHookEstimator(tf.estimator.SessionRunHook):
-    """Logs loss and runtime."""
-
-    def __init__(self, estimator, batch_size, log_frequency):
-        self.estimator = estimator
-        self.batch_size = batch_size
-        self.log_frequency = log_frequency
-
-    def begin(self):
-        self._step = -1
-        self._start_time = time.time()
-
-    def before_run(self, run_context):
-        self._step += 1
-        # Asks for loss value.
-        return tf.train.SessionRunArgs(self.estimator.loss)
-
-    def after_run(self, run_context, run_values):
-        if self._step % self.log_frequency == 0:
-            current_time = time.time()
-            duration = current_time - self._start_time
-            self._start_time = current_time
-
-            loss_value = run_values.results
-            examples_per_sec = self.log_frequency * self.batch_size / duration
-            sec_per_batch = float(duration / self.log_frequency)
-
-            format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f '
-                          'sec/batch)')
-            print(format_str % (datetime.now(), self._step, loss_value,
-                                examples_per_sec, sec_per_batch))
-
-
-class EarlyStopException(Exception):
-    pass
-
-
-class EarlyStopping(tf.estimator.SessionRunHook):
-    """Stop training when a monitored quantity has stopped improving.
-    Based on Keras's EarlyStopping callback:
-    https://keras.io/callbacks/#earlystopping
-    The original implementation worked for epochs. Currently there is no way
-    to know the epoch count in estimator training. Hence, the criteria is done
-    using steps instead of epochs.
-
-    Parameters
-    ----------
-    monitor
-        quantity to be monitored.
-    min_delta
-        minimum change in the monitored quantity to qualify as an improvement,
-        i.e. an absolute change of less than min_delta, will count as no
-        improvement.
-    patience
-        number of steps with no improvement after which training will be
-        stopped. Please use large patience values since this hook is
-        implemented using steps instead of epochs compared to the equivalent
-        one in Keras.
-    mode
-        one of {auto, min, max}. In `min` mode, training will stop when the
-        quantity monitored has stopped decreasing; in `max` mode it will stop
-        when the quantity monitored has stopped increasing; in `auto` mode, the
-        direction is automatically inferred from the name of the monitored
-        quantity.
-    """
-
-    def __init__(self,
-                 monitor='accuracy/value',
-                 min_delta=0,
-                 patience=0,
-                 mode='auto'):
-        super(EarlyStopping, self).__init__()
-
-        self.monitor = monitor
-        self.patience = patience
-        self.min_delta = min_delta
-        self.wait = 0
-
-        if mode not in ['auto', 'min', 'max']:
-            logger.warn('EarlyStopping mode %s is unknown, '
-                        'fallback to auto mode.' % mode)
-            mode = 'auto'
-
-        if mode == 'min':
-            self.monitor_op = np.less
-        elif mode == 'max':
-            self.monitor_op = np.greater
-        else:
-            if 'acc' in self.monitor:
-                self.monitor_op = np.greater
-            else:
-                self.monitor_op = np.less
-
-        if self.monitor_op == np.greater:
-            self.min_delta *= 1
-        else:
-            self.min_delta *= -1
-        # Allow instances to be re-used
-        self.wait = 0
-        self.best = np.Inf if self.monitor_op == np.less else -np.Inf
-        self.global_step_of_best = 0
-
-    def begin(self):
-        self.values = []
-        if isinstance(self.monitor, str):
-            self.monitor = _as_graph_element(self.monitor)
-        else:
-            self.monitor = _as_graph_element(self.monitor.name)
-        self.global_step_tensor = tf.train.get_global_step()
-
-    def before_run(self, run_context):
-        return tf.train.SessionRunArgs([self.monitor, self.global_step_tensor])
-
-    def after_run(self, run_context, run_values):
-        monitor, global_step = run_values.results
-        self.values.append(monitor)
-        # global step does not change during evaluation so keeping one of them
-        # is enough.
-        self.global_step_value = global_step
-
-    def _should_stop(self):
-        current = np.mean(self.values)
-        logger.info(
-            '%s is currently at %f (at step of %d) and the best value was %f '
-            '(at step of %d)', self.monitor.name, current,
-            self.global_step_value, self.best, self.global_step_of_best)
-        if self.monitor_op(current - self.min_delta, self.best):
-            self.best = current
-            self.wait = 0
-            self.global_step_of_best = self.global_step_value
-        else:
-            if self.wait >= self.patience:
-                message = 'Early stopping happened with {} at best of ' \
-                    '{} (at step {}) and current of {} (at step {})'.format(
-                        self.monitor.name, self.best, self.global_step_of_best,
-                        current, self.global_step_value)
-                logger.info(message)
-                raise EarlyStopException(message)
-            self.wait += 1
-
-    def end(self, session):
-        self._should_stop()
diff --git a/bob/learn/tensorflow/utils/image.py b/bob/learn/tensorflow/utils/image.py
new file mode 100644
index 0000000000000000000000000000000000000000..b05b2509e52319d09b3aa1529377fed58c8ac6d8
--- /dev/null
+++ b/bob/learn/tensorflow/utils/image.py
@@ -0,0 +1,63 @@
+import tensorflow as tf
+
+
+def to_channels_last(image):
+    """Converts the image to channel_last format. This is the same format as in
+    matplotlib, skimage, and etc.
+
+    Parameters
+    ----------
+    image : `tf.Tensor`
+        At least a 3 dimensional image. If the dimension is more than 3, the
+        last 3 dimensions are assumed to be [C, H, W].
+
+    Returns
+    -------
+    image : `tf.Tensor`
+        The image in [..., H, W, C] format.
+
+    Raises
+    ------
+    ValueError
+        If dim of image is less than 3.
+    """
+    ndim = image.ndim
+    if ndim < 3:
+        raise ValueError(
+            "The image needs to be at least 3 dimensional but it " "was {}".format(ndim)
+        )
+    axis_order = [1, 2, 0]
+    shift = ndim - 3
+    axis_order = list(range(ndim - 3)) + [n + shift for n in axis_order]
+    return tf.transpose(a=image, perm=axis_order)
+
+
+def to_channels_first(image):
+    """Converts the image to channel_first format. This is the same format as
+    in bob.io.image and bob.io.video.
+
+    Parameters
+    ----------
+    image : `tf.Tensor`
+        At least a 3 dimensional image. If the dimension is more than 3, the
+        last 3 dimensions are assumed to be [H, W, C].
+
+    Returns
+    -------
+    image : `tf.Tensor`
+        The image in [..., C, H, W] format.
+
+    Raises
+    ------
+    ValueError
+        If dim of image is less than 3.
+    """
+    ndim = image.ndim
+    if ndim < 3:
+        raise ValueError(
+            "The image needs to be at least 3 dimensional but it " "was {}".format(ndim)
+        )
+    axis_order = [2, 0, 1]
+    shift = ndim - 3
+    axis_order = list(range(ndim - 3)) + [n + shift for n in axis_order]
+    return tf.transpose(a=image, perm=axis_order)
diff --git a/bob/learn/tensorflow/utils/keras.py b/bob/learn/tensorflow/utils/keras.py
index c74021620069100b909d82a2c77998b976e4643a..6fceb5a4192266b663fe2e1957040c2571bf45c4 100644
--- a/bob/learn/tensorflow/utils/keras.py
+++ b/bob/learn/tensorflow/utils/keras.py
@@ -1,10 +1,82 @@
-import tensorflow.keras.backend as K
-from .network import is_trainable
-import tensorflow as tf
+import copy
 import logging
 
+import tensorflow as tf
+import tensorflow.keras.backend as K
+from tensorflow.python.keras import layers as layer_module
+from tensorflow.python.keras.utils import generic_utils
+from tensorflow.python.util import nest
+from tensorflow.python.util import tf_inspect
+
 logger = logging.getLogger(__name__)
 
+SINGLE_LAYER_OUTPUT_ERROR_MSG = (
+    "All layers in a Sequential model should have "
+    "a single output tensor. For multi-output "
+    "layers, use the functional API."
+)
+
+
+class SequentialLayer(tf.keras.layers.Layer):
+    """A Layer that does the same thing as tf.keras.Sequential but
+    its variables can be scoped.
+
+    Parameters
+    ----------
+    layers : list
+        List of layers. All layers must be provided at initialization time
+    """
+
+    def __init__(self, layers, **kwargs):
+        super().__init__(**kwargs)
+        self.sequential_layers = list(layers)
+
+    def call(self, inputs, training=None, mask=None):
+        outputs = inputs
+        for layer in self.sequential_layers:
+            # During each iteration, `inputs` are the inputs to `layer`, and `outputs`
+            # are the outputs of `layer` applied to `inputs`. At the end of each
+            # iteration `inputs` is set to `outputs` to prepare for the next layer.
+            kwargs = {}
+            argspec = tf_inspect.getfullargspec(layer.call).args
+            if "mask" in argspec:
+                kwargs["mask"] = mask
+            if "training" in argspec:
+                kwargs["training"] = training
+
+            outputs = layer(outputs, **kwargs)
+
+            if len(nest.flatten(outputs)) != 1:
+                raise ValueError(SINGLE_LAYER_OUTPUT_ERROR_MSG)
+
+            mask = getattr(outputs, "_keras_mask", None)
+
+        return outputs
+
+    def get_config(self):
+        layer_configs = []
+        for layer in self.sequential_layers:
+            layer_configs.append(generic_utils.serialize_keras_object(layer))
+        config = {"name": self.name, "layers": copy.deepcopy(layer_configs)}
+        return config
+
+    @classmethod
+    def from_config(cls, config, custom_objects=None):
+        if "name" in config:
+            name = config["name"]
+            layer_configs = config["layers"]
+        else:
+            name = None
+            layer_configs = config
+        layers = []
+        for layer_config in layer_configs:
+            layer = layer_module.deserialize(
+                layer_config, custom_objects=custom_objects
+            )
+            layers.append(layer)
+        model = cls(layers, name=name)
+        return model
+
 
 def keras_channels_index():
     return -3 if K.image_data_format() == "channels_first" else -1
@@ -21,7 +93,7 @@ def keras_model_weights_as_initializers_for_variables(model):
     model : object
         A Keras model.
     """
-    sess = K.get_session()
+    sess = tf.compat.v1.keras.backend.get_session()
     n = len(model.variables)
     logger.debug("Initializing %d variables with their current weights", n)
     for variable in model.variables:
@@ -31,25 +103,6 @@ def keras_model_weights_as_initializers_for_variables(model):
         variable._initial_value = initial_value
 
 
-def apply_trainable_variables_on_keras_model(model, trainable_variables, mode):
-    """Changes the trainable status of layers in a keras model.
-    It can only turn off the trainable status of layer.
-
-    Parameters
-    ----------
-    model : object
-        A Keras model
-    trainable_variables : list or None
-        See bob.learn.tensorflow.estimators.Logits
-    mode : str
-        One of tf.estimator.ModeKeys
-    """
-    for layer in model.layers:
-        trainable = is_trainable(layer.name, trainable_variables, mode=mode)
-        if layer.trainable:
-            layer.trainable = trainable
-
-
 def _create_var_map(variables, normalizer=None):
     if normalizer is None:
 
@@ -65,10 +118,10 @@ def restore_model_variables_from_checkpoint(
     model, checkpoint, session=None, normalizer=None
 ):
     if session is None:
-        session = tf.keras.backend.get_session()
+        session = tf.compat.v1.keras.backend.get_session()
 
     var_list = _create_var_map(model.variables, normalizer=normalizer)
-    saver = tf.train.Saver(var_list=var_list)
+    saver = tf.compat.v1.train.Saver(var_list=var_list)
     ckpt_state = tf.train.get_checkpoint_state(checkpoint)
     logger.info("Loading checkpoint %s", ckpt_state.model_checkpoint_path)
     saver.restore(session, ckpt_state.model_checkpoint_path)
@@ -76,15 +129,11 @@ def restore_model_variables_from_checkpoint(
 
 def initialize_model_from_checkpoint(model, checkpoint, normalizer=None):
     assignment_map = _create_var_map(model.variables, normalizer=normalizer)
-    tf.train.init_from_checkpoint(checkpoint, assignment_map=assignment_map)
+    tf.compat.v1.train.init_from_checkpoint(checkpoint, assignment_map=assignment_map)
 
 
 def model_summary(model, do_print=False):
-    try:
-        from tensorflow.python.keras.utils.layer_utils import count_params
-    except ImportError:
-        from tensorflow_core.python.keras.utils.layer_utils import count_params
-    nest = tf.nest
+    from tensorflow.keras.backend import count_params
 
     if model.__class__.__name__ == "Sequential":
         sequential_like = True
diff --git a/bob/learn/tensorflow/utils/math.py b/bob/learn/tensorflow/utils/math.py
index 64ed7349d7dce745bf3c3fe5ad84bd347a9d493c..304d3d71fef9d361b4c2080344568745263a6884 100644
--- a/bob/learn/tensorflow/utils/math.py
+++ b/bob/learn/tensorflow/utils/math.py
@@ -28,7 +28,7 @@ def gram_matrix(input_tensor):
             [0., 0., 0., ..., 0., 0., 0.]],
     """
     result = tf.linalg.einsum("bijc,bijd->bcd", input_tensor, input_tensor)
-    input_shape = tf.shape(input_tensor)
+    input_shape = tf.shape(input=input_tensor)
     num_locations = tf.cast(input_shape[1] * input_shape[2], tf.float32)
     return result / (num_locations)
 
@@ -61,17 +61,79 @@ def upper_triangle_and_diagonal(A):
     """
     ones = tf.ones_like(A)
     # Upper triangular matrix of 0s and 1s (including diagonal)
-    mask = tf.matrix_band_part(ones, 0, -1)
-    upper_triangular_flat = tf.boolean_mask(A, mask)
+    mask = tf.linalg.band_part(ones, 0, -1)
+    upper_triangular_flat = tf.boolean_mask(tensor=A, mask=mask)
     return upper_triangular_flat
 
 
 def upper_triangle(A):
     ones = tf.ones_like(A)
     # Upper triangular matrix of 0s and 1s (including diagonal)
-    mask_a = tf.matrix_band_part(ones, 0, -1)
+    mask_a = tf.linalg.band_part(ones, 0, -1)
     # Diagonal
-    mask_b = tf.matrix_band_part(ones, 0, 0)
+    mask_b = tf.linalg.band_part(ones, 0, 0)
     mask = tf.cast(mask_a - mask_b, dtype=tf.bool)
-    upper_triangular_flat = tf.boolean_mask(A, mask)
+    upper_triangular_flat = tf.boolean_mask(tensor=A, mask=mask)
     return upper_triangular_flat
+
+
+def pdist(A, metric="sqeuclidean"):
+    if metric != "sqeuclidean":
+        raise NotImplementedError()
+    r = tf.reduce_sum(input_tensor=A * A, axis=1)
+    r = tf.reshape(r, [-1, 1])
+    D = r - 2 * tf.matmul(A, A, transpose_b=True) + tf.transpose(a=r)
+    return D
+
+
+def cdist(A, B, metric="sqeuclidean"):
+    if metric != "sqeuclidean":
+        raise NotImplementedError()
+    M1, M2 = tf.shape(input=A)[0], tf.shape(input=B)[0]
+    # code from https://stackoverflow.com/a/43839605/1286165
+    p1 = tf.matmul(
+        tf.expand_dims(tf.reduce_sum(input_tensor=tf.square(A), axis=1), 1),
+        tf.ones(shape=(1, M2)),
+    )
+    p2 = tf.transpose(
+        a=tf.matmul(
+            tf.reshape(tf.reduce_sum(input_tensor=tf.square(B), axis=1), shape=[-1, 1]),
+            tf.ones(shape=(M1, 1)),
+            transpose_b=True,
+        )
+    )
+
+    D = tf.add(p1, p2) - 2 * tf.matmul(A, B, transpose_b=True)
+    return D
+
+
+def random_choice_no_replacement(one_dim_input, num_indices_to_drop=3, sort=False):
+    """Similar to np.random.choice with no replacement.
+    Code from https://stackoverflow.com/a/54755281/1286165
+    """
+    input_length = tf.shape(input=one_dim_input)[0]
+
+    # create uniform distribution over the sequence
+    uniform_distribution = tf.random.uniform(
+        shape=[input_length],
+        minval=0,
+        maxval=None,
+        dtype=tf.float32,
+        seed=None,
+        name=None,
+    )
+
+    # grab the indices of the greatest num_words_to_drop values from the distibution
+    _, indices_to_keep = tf.nn.top_k(
+        uniform_distribution, input_length - num_indices_to_drop
+    )
+
+    # sort the indices
+    if sort:
+        sorted_indices_to_keep = tf.sort(indices_to_keep)
+    else:
+        sorted_indices_to_keep = indices_to_keep
+
+    # gather indices from the input array using the filtered actual array
+    result = tf.gather(one_dim_input, sorted_indices_to_keep)
+    return result
diff --git a/bob/learn/tensorflow/utils/network.py b/bob/learn/tensorflow/utils/network.py
deleted file mode 100644
index fa56276ed98e2f2b6db309ac0989931d618b6eab..0000000000000000000000000000000000000000
--- a/bob/learn/tensorflow/utils/network.py
+++ /dev/null
@@ -1,51 +0,0 @@
-import tensorflow as tf
-import tensorflow.contrib.slim as slim
-
-
-def append_logits(
-    graph,
-    n_classes,
-    reuse=False,
-    l2_regularizer=5e-05,
-    weights_std=0.1,
-    trainable_variables=None,
-    name="Logits",
-):
-    trainable = is_trainable(name, trainable_variables)
-    return slim.fully_connected(
-        graph,
-        n_classes,
-        activation_fn=None,
-        weights_initializer=tf.truncated_normal_initializer(stddev=weights_std),
-        weights_regularizer=slim.l2_regularizer(l2_regularizer),
-        scope=name,
-        reuse=reuse,
-        trainable=trainable,
-    )
-
-
-def is_trainable(name, trainable_variables, mode=tf.estimator.ModeKeys.TRAIN):
-    """
-    Check if a variable is trainable or not
-
-    Parameters
-    ----------
-
-    name: str
-       Layer name
-
-    trainable_variables: list
-       List containing the variables or scopes to be trained.
-       If None, the variable/scope is trained
-    """
-
-    # if mode is not training, so we shutdown
-    if mode != tf.estimator.ModeKeys.TRAIN:
-        return False
-
-    # If None, we train by default
-    if trainable_variables is None:
-        return True
-
-    # Here is my choice to shutdown the whole scope
-    return name in trainable_variables
diff --git a/bob/learn/tensorflow/utils/reproducible.py b/bob/learn/tensorflow/utils/reproducible.py
deleted file mode 100644
index 677994b9beae33667c43dcd2a0cf5069a512ce77..0000000000000000000000000000000000000000
--- a/bob/learn/tensorflow/utils/reproducible.py
+++ /dev/null
@@ -1,95 +0,0 @@
-"""Helps training reproducible networks.
-"""
-import os
-import random as rn
-import numpy as np
-import tensorflow as tf
-from tensorflow.core.protobuf import rewriter_config_pb2
-
-
-def set_seed(
-    seed=0,
-    python_hash_seed=0,
-    log_device_placement=False,
-    allow_soft_placement=False,
-    arithmetic_optimization=None,
-    allow_growth=None,
-    memory_optimization=None,
-):
-    """Sets the seeds in python, numpy, and tensorflow in order to help
-    training reproducible networks.
-
-    Parameters
-    ----------
-    seed : :obj:`int`, optional
-        The seed to set.
-    python_hash_seed : :obj:`int`, optional
-        https://docs.python.org/3.4/using/cmdline.html#envvar-PYTHONHASHSEED
-    log_device_placement : :obj:`bool`, optional
-        Optionally, log device placement of tensorflow variables.
-
-    Returns
-    -------
-    :any:`tf.ConfigProto`
-        Session config.
-    :any:`tf.estimator.RunConfig`
-        A run config to help training estimators.
-
-    Notes
-    -----
-        This functions return a list and its length might change. Please use
-        indices to select one of returned values. For example
-        ``sess_config, run_config = set_seed()[:2]``.
-    """
-    # reproducible networks
-    # The below is necessary in Python 3.2.3 onwards to
-    # have reproducible behavior for certain hash-based operations.
-    # See these references for further details:
-    # https://docs.python.org/3.4/using/cmdline.html#envvar-PYTHONHASHSEED
-    # https://github.com/fchollet/keras/issues/2280#issuecomment-306959926
-    os.environ["PYTHONHASHSEED"] = "{}".format(python_hash_seed)
-
-    # The below is necessary for starting Numpy generated random numbers
-    # in a well-defined initial state.
-    np.random.seed(seed)
-
-    # The below is necessary for starting core Python generated random numbers
-    # in a well-defined state.
-    rn.seed(seed)
-
-    # Force TensorFlow to use single thread.
-    # Multiple threads are a potential source of
-    # non-reproducible results.
-    # For further details, see:
-    # https://stackoverflow.com/questions/42022950/which-seeds-have-to-be-set-where-to-realize-100-reproducibility-of-training-res
-    session_config = tf.ConfigProto(
-        intra_op_parallelism_threads=1,
-        inter_op_parallelism_threads=1,
-        log_device_placement=log_device_placement,
-        allow_soft_placement=allow_soft_placement,
-    )
-
-    off = rewriter_config_pb2.RewriterConfig.OFF
-    if arithmetic_optimization == "off":
-        session_config.graph_options.rewrite_options.arithmetic_optimization = off
-
-    if memory_optimization == "off":
-        session_config.graph_options.rewrite_options.memory_optimization = off
-
-    if allow_growth is not None:
-        session_config.gpu_options.allow_growth = allow_growth
-        session_config.gpu_options.per_process_gpu_memory_fraction = 0.8
-
-    # The below tf.set_random_seed() will make random number generation
-    # in the TensorFlow backend have a well-defined initial state.
-    # For further details, see:
-    # https://www.tensorflow.org/api_docs/python/tf/set_random_seed
-    tf.set_random_seed(seed)
-    # sess = tf.Session(graph=tf.get_default_graph(), config=session_config)
-    # keras.backend.set_session(sess)
-
-    run_config = tf.estimator.RunConfig()
-    run_config = run_config.replace(session_config=session_config)
-    run_config = run_config.replace(tf_random_seed=seed)
-
-    return [session_config, run_config, None, None, None]
diff --git a/bob/learn/tensorflow/utils/sequences.py b/bob/learn/tensorflow/utils/sequences.py
deleted file mode 100644
index c2de433a50c65e13d3cb7353bd1f036a8055ddc6..0000000000000000000000000000000000000000
--- a/bob/learn/tensorflow/utils/sequences.py
+++ /dev/null
@@ -1,144 +0,0 @@
-from __future__ import division
-import numpy
-from keras.utils import Sequence
-# documentation imports
-from bob.dap.base.database import PadDatabase, PadFile
-from bob.bio.base.preprocessor import Preprocessor
-
-
-class PadSequence(Sequence):
-    """A data shuffler for bob.dap.base database interfaces.
-
-    Attributes
-    ----------
-    batch_size : int
-        The number of samples to return in every batch.
-    files : list of :any:`PadFile`
-        List of file objects for a particular group and protocol.
-    labels : list of bool
-        List of labels for the files. ``True`` if bona-fide, ``False`` if
-        attack.
-    preprocessor : :any:`Preprocessor`
-        The preprocessor to be used to load and process the data.
-    """
-
-    def __init__(self, files, labels, batch_size, preprocessor,
-                 original_directory, original_extension):
-        super(PadSequence, self).__init__()
-        self.files = files
-        self.labels = labels
-        self.batch_size = int(batch_size)
-        self.preprocessor = preprocessor
-        self.original_directory = original_directory
-        self.original_extension = original_extension
-
-    def __len__(self):
-        """Number of batch in the Sequence.
-
-        Returns
-        -------
-        int
-            The number of batches in the Sequence.
-        """
-        return int(numpy.ceil(len(self.files) / self.batch_size))
-
-    def __getitem__(self, idx):
-        files = self.files[idx * self.batch_size:(idx + 1) * self.batch_size]
-        labels = self.labels[idx * self.batch_size:(idx + 1) * self.batch_size]
-        return self.load_batch(files, labels)
-
-    def load_batch(self, files, labels):
-        """Loads a batch of files and processes them.
-
-        Parameters
-        ----------
-        files : list of :any:`PadFile`
-            List of files to load.
-        labels : list of bool
-            List of labels corresponding to the files.
-
-        Returns
-        -------
-        tuple of :any:`numpy.array`
-            A tuple of (x, y): the data and their targets.
-        """
-        data, targets = [], []
-        for file_object, target in zip(files, labels):
-            loaded_data = self.preprocessor.read_original_data(
-                file_object, self.original_directory, self.original_extension)
-            preprocessed_data = self.preprocessor(loaded_data)
-            data.append(preprocessed_data)
-            targets.append(target)
-        return numpy.array(data), numpy.array(targets)
-
-    def on_epoch_end(self):
-        pass
-
-
-def shuffle_data(files, labels):
-    indexes = numpy.arange(len(files))
-    numpy.random.shuffle(indexes)
-    return [files[i] for i in indexes], [labels[i] for i in indexes]
-
-
-def get_pad_files_labels(database, groups):
-    """Returns the pad files and their labels.
-
-    Parameters
-    ----------
-    database : :any:`PadDatabase`
-        The database to be used. The database should have a proper
-        ``database.protocol`` attribute.
-    groups : str
-        The group to be used to return the data. One of ('world', 'dev',
-        'eval'). 'world' means training data and 'dev' means validation data.
-
-    Returns
-    -------
-    tuple
-        A tuple of (files, labels) for that particular group and protocol.
-    """
-    files = database.samples(groups=groups, protocol=database.protocol)
-    labels = ((f.attack_type is None) for f in files)
-    labels = numpy.fromiter(labels, bool, len(files))
-    return files, labels
-
-
-def get_pad_sequences(database,
-                      preprocessor,
-                      batch_size,
-                      groups=('world', 'dev', 'eval'),
-                      shuffle=False,
-                      limit=None):
-    """Returns a list of :any:`Sequence` objects for the database.
-
-    Parameters
-    ----------
-    database : :any:`PadDatabase`
-        The database to be used. The database should have a proper
-        ``database.protocol`` attribute.
-    preprocessor : :any:`Preprocessor`
-        The preprocessor to be used to load and process the data.
-    batch_size : int
-        The number of samples to return in every batch.
-    groups : str
-        The group to be used to return the data. One of ('world', 'dev',
-        'eval'). 'world' means training data and 'dev' means validation data.
-
-    Returns
-    -------
-    list of :any:`Sequence`
-        The requested sequences to be used.
-    """
-    seqs = []
-    for grp in groups:
-        files, labels = get_pad_files_labels(database, grp)
-        if shuffle:
-            files, labels = shuffle_data(files, labels)
-        if limit is not None:
-            files, labels = files[:limit], labels[:limit]
-        seqs.append(
-            PadSequence(files, labels, batch_size, preprocessor,
-                        database.original_directory,
-                        database.original_extension))
-    return seqs
diff --git a/bob/learn/tensorflow/utils/session.py b/bob/learn/tensorflow/utils/session.py
deleted file mode 100644
index 3976f9f0446f14a74f0d5b1e237dd28a7937fbb9..0000000000000000000000000000000000000000
--- a/bob/learn/tensorflow/utils/session.py
+++ /dev/null
@@ -1,25 +0,0 @@
-#!/usr/bin/env python
-# vim: set fileencoding=utf-8 :
-# @author: Tiago de Freitas Pereira <tiago.pereira@idiap.ch>
-# @date: Wed 11 May 2016 09:39:36 CEST
-
-import tensorflow as tf
-from .singleton import Singleton
-from tensorflow.python import debug as tf_debug
-
-
-@Singleton
-class Session(object):
-    """
-    Encapsulates a tf.session
-    """
-
-    def __init__(self, debug=False):
-        config = tf.ConfigProto(
-            log_device_placement=False,
-            allow_soft_placement=True,
-            gpu_options=tf.GPUOptions(per_process_gpu_memory_fraction=0.5))
-        config.gpu_options.allow_growth = True
-        self.session = tf.Session()
-        if debug:
-            self.session = tf_debug.LocalCLIDebugWrapperSession(self.session)
diff --git a/bob/learn/tensorflow/utils/singleton.py b/bob/learn/tensorflow/utils/singleton.py
deleted file mode 100644
index d91f3c3ab5cfc339936066e3642c4f728edd20da..0000000000000000000000000000000000000000
--- a/bob/learn/tensorflow/utils/singleton.py
+++ /dev/null
@@ -1,52 +0,0 @@
-# A singleton class decorator, based on http://stackoverflow.com/a/7346105/3301902
-
-
-class Singleton(object):
-    """
-    A non-thread-safe helper class to ease implementing singletons.
-    This should be used as a **decorator** -- not a metaclass -- to the class that should be a singleton.
-
-    The decorated class can define one `__init__` function that takes an arbitrary list of parameters.
-
-    To get the singleton instance, use the :py:meth:`instance` method. Trying to use `__call__` will result in a `TypeError` being raised.
-
-    Limitations:
-
-    * The decorated class cannot be inherited from.
-    * The documentation of the decorated class is replaced with the documentation of this class.
-    """
-
-    def __init__(self, decorated):
-        self._decorated = decorated
-        # see: functools.WRAPPER_ASSIGNMENTS:
-        self.__doc__ = decorated.__doc__
-        self.__name__ = decorated.__name__
-        self.__module__ = decorated.__module__
-        self.__mro__ = decorated.__mro__
-        self.__bases__ = []
-
-        self._instance = None
-
-    def create(self, *args, **kwargs):
-        """Creates the singleton instance, by passing the given parameters to the class' constructor."""
-        # TODO: I still having problems in killing all the elements of the current session
-
-        if self._instance is not None:
-            self._instance.session.close()
-            del self._instance
-        self._instance = self._decorated(*args, **kwargs)
-
-    def instance(self, new=False):
-        """Returns the singleton instance.
-        The function :py:meth:`create` must have been called before."""
-        if self._instance is None or new:
-
-            self.create()
-        return self._instance
-
-    def __call__(self):
-        raise TypeError(
-            'Singletons must be accessed through the `instance()` method.')
-
-    def __instancecheck__(self, inst):
-        return isinstance(inst, self._decorated)
diff --git a/bob/learn/tensorflow/utils/train.py b/bob/learn/tensorflow/utils/train.py
deleted file mode 100644
index 999891047c46c7101f38eb3f7bc39c4add37ea7d..0000000000000000000000000000000000000000
--- a/bob/learn/tensorflow/utils/train.py
+++ /dev/null
@@ -1,46 +0,0 @@
-import tensorflow as tf
-
-
-def check_features(features):
-    if "data" not in features or "key" not in features:
-        raise ValueError(
-            "The input function needs to contain a dictionary with the keys `data` and `key` "
-        )
-    return True
-
-
-def get_trainable_variables(extra_checkpoint, mode=tf.estimator.ModeKeys.TRAIN):
-    """
-    Given the extra_checkpoint dictionary provided to the estimator,
-    extract the content of "trainable_variables".
-
-    If trainable_variables is not provided, all end points are trainable by
-    default.
-    If trainable_variables==[], all end points are NOT trainable.
-    If trainable_variables contains some end_points, ONLY these endpoints will
-    be trainable.
-
-    Attributes
-    ----------
-
-    extra_checkpoint: dict
-      The extra_checkpoint dictionary provided to the estimator
-
-    mode:
-        The estimator mode. TRAIN, EVAL, and PREDICT. If not TRAIN, None is
-        returned.
-
-    Returns
-    -------
-
-    Returns `None` if **trainable_variables** is not in extra_checkpoint;
-    otherwise returns the content of extra_checkpoint .
-    """
-    if mode != tf.estimator.ModeKeys.TRAIN:
-        return None
-
-    # If you don't set anything, everything is trainable
-    if extra_checkpoint is None or "trainable_variables" not in extra_checkpoint:
-        return None
-
-    return extra_checkpoint["trainable_variables"]
diff --git a/bob/learn/tensorflow/utils/util.py b/bob/learn/tensorflow/utils/util.py
deleted file mode 100644
index 263a9ec00970b03786e6e98db35b65617fa323eb..0000000000000000000000000000000000000000
--- a/bob/learn/tensorflow/utils/util.py
+++ /dev/null
@@ -1,467 +0,0 @@
-#!/usr/bin/env python
-# vim: set fileencoding=utf-8 :
-# @author: Tiago de Freitas Pereira <tiago.pereira@idiap.ch>
-# @date: Wed 11 May 2016 09:39:36 CEST
-
-import numpy
-import tensorflow as tf
-from tensorflow.python.client import device_lib
-from tensorflow.python.framework import function
-import logging
-
-logger = logging.getLogger(__name__)
-
-
-@function.Defun(tf.float32, tf.float32)
-def norm_grad(x, dy):
-    return tf.expand_dims(dy, -1) * (
-        x / (tf.expand_dims(tf.norm(x, ord=2, axis=-1), -1) + 1.0e-19)
-    )
-
-
-@function.Defun(tf.float32, grad_func=norm_grad)
-def norm(x):
-    return tf.norm(x, ord=2, axis=-1)
-
-
-def compute_euclidean_distance(x, y):
-    """
-    Computes the euclidean distance between two tensorflow variables
-    """
-
-    with tf.name_scope("euclidean_distance"):
-        # d = tf.sqrt(tf.reduce_sum(tf.square(tf.subtract(x, y)), 1))
-        d = norm(tf.subtract(x, y))
-        return d
-
-
-def pdist_safe(A, metric="sqeuclidean"):
-    if metric != "sqeuclidean":
-        raise NotImplementedError()
-    r = tf.reduce_sum(A * A, 1)
-    r = tf.reshape(r, [-1, 1])
-    D = r - 2 * tf.matmul(A, A, transpose_b=True) + tf.transpose(r)
-    return D
-
-
-def cdist(A, B, metric="sqeuclidean"):
-    if metric != "sqeuclidean":
-        raise NotImplementedError()
-    M1, M2 = tf.shape(A)[0], tf.shape(B)[0]
-    # code from https://stackoverflow.com/a/43839605/1286165
-    p1 = tf.matmul(
-        tf.expand_dims(tf.reduce_sum(tf.square(A), 1), 1), tf.ones(shape=(1, M2))
-    )
-    p2 = tf.transpose(
-        tf.matmul(
-            tf.reshape(tf.reduce_sum(tf.square(B), 1), shape=[-1, 1]),
-            tf.ones(shape=(M1, 1)),
-            transpose_b=True,
-        )
-    )
-
-    D = tf.add(p1, p2) - 2 * tf.matmul(A, B, transpose_b=True)
-    return D
-
-
-def load_mnist(perc_train=0.9):
-    numpy.random.seed(0)
-    import bob.db.mnist
-
-    db = bob.db.mnist.Database()
-    raw_data = db.data()
-
-    # data  = raw_data[0].astype(numpy.float64)
-    data = raw_data[0]
-    labels = raw_data[1]
-
-    # Shuffling
-    total_samples = data.shape[0]
-    indexes = numpy.array(range(total_samples))
-    numpy.random.shuffle(indexes)
-
-    # Spliting train and validation
-    n_train = int(perc_train * indexes.shape[0])
-    n_validation = total_samples - n_train
-
-    train_data = data[0:n_train, :].astype("float32") * 0.00390625
-    train_labels = labels[0:n_train]
-
-    validation_data = (
-        data[n_train : n_train + n_validation, :].astype("float32") * 0.00390625
-    )
-    validation_labels = labels[n_train : n_train + n_validation]
-
-    return train_data, train_labels, validation_data, validation_labels
-
-
-def create_mnist_tfrecord(tfrecords_filename, data, labels, n_samples=6000):
-    def _bytes_feature(value):
-        return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
-
-    def _int64_feature(value):
-        return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
-
-    writer = tf.python_io.TFRecordWriter(tfrecords_filename)
-
-    for i in range(n_samples):
-        img = data[i]
-        img_raw = img.tostring()
-        feature = {
-            "data": _bytes_feature(img_raw),
-            "label": _int64_feature(labels[i]),
-            "key": _bytes_feature(b"-"),
-        }
-
-        example = tf.train.Example(features=tf.train.Features(feature=feature))
-        writer.write(example.SerializeToString())
-    writer.close()
-
-
-def compute_eer(
-    data_train, labels_train, data_validation, labels_validation, n_classes
-):
-    import bob.measure
-    from scipy.spatial.distance import cosine
-
-    # Creating client models
-    models = []
-    for i in range(n_classes):
-        indexes = labels_train == i
-        models.append(numpy.mean(data_train[indexes, :], axis=0))
-
-    # Probing
-    positive_scores = numpy.zeros(shape=0)
-    negative_scores = numpy.zeros(shape=0)
-
-    for i in range(n_classes):
-        # Positive scoring
-        indexes = labels_validation == i
-        positive_data = data_validation[indexes, :]
-        p = [cosine(models[i], positive_data[j]) for j in range(positive_data.shape[0])]
-        positive_scores = numpy.hstack((positive_scores, p))
-
-        # negative scoring
-        indexes = labels_validation != i
-        negative_data = data_validation[indexes, :]
-        n = [cosine(models[i], negative_data[j]) for j in range(negative_data.shape[0])]
-        negative_scores = numpy.hstack((negative_scores, n))
-
-    # Computing performance based on EER
-    negative_scores = (-1) * negative_scores
-    positive_scores = (-1) * positive_scores
-
-    threshold = bob.measure.eer_threshold(negative_scores, positive_scores)
-    far, frr = bob.measure.farfrr(negative_scores, positive_scores, threshold)
-    eer = (far + frr) / 2.0
-
-    return eer
-
-
-def compute_accuracy(
-    data_train, labels_train, data_validation, labels_validation, n_classes
-):
-    from scipy.spatial.distance import cosine
-
-    # Creating client models
-    models = []
-    for i in range(n_classes):
-        indexes = labels_train == i
-        models.append(numpy.mean(data_train[indexes, :], axis=0))
-
-    # Probing
-    tp = 0
-    for i in range(data_validation.shape[0]):
-
-        d = data_validation[i, :]
-        l = labels_validation[i]
-
-        scores = [cosine(m, d) for m in models]
-        predict = numpy.argmax(scores)
-
-        if predict == l:
-            tp += 1
-
-    return (float(tp) / data_validation.shape[0]) * 100
-
-
-def debug_embbeding(image, architecture, embbeding_dim=2, feature_layer="fc3"):
-    """
-    """
-    import tensorflow as tf
-    from bob.learn.tensorflow.utils.session import Session
-
-    session = Session.instance(new=False).session
-    inference_graph = architecture.compute_graph(
-        architecture.inference_placeholder, feature_layer=feature_layer, training=False
-    )
-
-    embeddings = numpy.zeros(shape=(image.shape[0], embbeding_dim))
-    for i in range(image.shape[0]):
-        feed_dict = {architecture.inference_placeholder: image[i : i + 1, :, :, :]}
-        embedding = session.run(
-            [tf.nn.l2_normalize(inference_graph, 1, 1e-10)], feed_dict=feed_dict
-        )[0]
-        embedding = numpy.reshape(embedding, numpy.prod(embedding.shape[1:]))
-        embeddings[i] = embedding
-
-    return embeddings
-
-
-def pdist(A):
-    """
-    Compute a pairwise euclidean distance in the same fashion
-    as in scipy.spation.distance.pdist
-    """
-    with tf.name_scope("Pairwisedistance"):
-        ones_1 = tf.reshape(tf.cast(tf.ones_like(A), tf.float32)[:, 0], [1, -1])
-        p1 = tf.matmul(tf.expand_dims(tf.reduce_sum(tf.square(A), 1), 1), ones_1)
-
-        ones_2 = tf.reshape(tf.cast(tf.ones_like(A), tf.float32)[:, 0], [-1, 1])
-        p2 = tf.transpose(
-            tf.matmul(
-                tf.reshape(tf.reduce_sum(tf.square(A), 1), shape=[-1, 1]),
-                ones_2,
-                transpose_b=True,
-            )
-        )
-
-        return tf.sqrt(tf.add(p1, p2) - 2 * tf.matmul(A, A, transpose_b=True))
-
-
-def predict_using_tensors(embedding, labels, num=None):
-    """
-    Compute the predictions through exhaustive comparisons between
-    embeddings using tensors
-    """
-
-    # Fitting the main diagonal with infs (removing comparisons with the same
-    # sample)
-    inf = tf.cast(tf.ones_like(labels), tf.float32) * numpy.inf
-
-    distances = pdist(embedding)
-    distances = tf.matrix_set_diag(distances, inf)
-    indexes = tf.argmin(distances, axis=1)
-    return [labels[i] for i in tf.unstack(indexes, num=num)]
-
-
-def compute_embedding_accuracy_tensors(embedding, labels, num=None):
-    """
-    Compute the accuracy in a closed-set
-
-    **Parameters**
-
-    embeddings: `tf.Tensor`
-      Set of embeddings
-
-    labels: `tf.Tensor`
-      Correspondent labels
-    """
-
-    # Fitting the main diagonal with infs (removing comparisons with the same
-    # sample)
-    predictions = predict_using_tensors(embedding, labels, num=num)
-    matching = [
-        tf.equal(p, l)
-        for p, l in zip(tf.unstack(predictions, num=num), tf.unstack(labels, num=num))
-    ]
-
-    return tf.reduce_sum(tf.cast(matching, tf.uint8)) / len(predictions)
-
-
-def compute_embedding_accuracy(embedding, labels):
-    """
-    Compute the accuracy in a closed-set
-
-    **Parameters**
-
-    embeddings: :any:`numpy.array`
-      Set of embeddings
-
-    labels: :any:`numpy.array`
-      Correspondent labels
-    """
-
-    from scipy.spatial.distance import pdist, squareform
-
-    distances = squareform(pdist(embedding))
-
-    n_samples = embedding.shape[0]
-
-    # Fitting the main diagonal with infs (removing comparisons with the same
-    # sample)
-    numpy.fill_diagonal(distances, numpy.inf)
-
-    indexes = distances.argmin(axis=1)
-
-    # Computing the argmin excluding comparisons with the same samples
-    # Basically, we are excluding the main diagonal
-
-    # valid_indexes = distances[distances>0].reshape(n_samples, n_samples-1).argmin(axis=1)
-
-    # Getting the original positions of the indexes in the 1-axis
-    # corrected_indexes = [ i if i<j else i+1 for i, j in zip(valid_indexes, range(n_samples))]
-
-    matching = [labels[i] == labels[j] for i, j in zip(range(n_samples), indexes)]
-    accuracy = sum(matching) / float(n_samples)
-
-    return accuracy
-
-
-def get_available_gpus():
-    """Returns the number of GPU devices that are available.
-
-    Returns
-    -------
-    [str]
-        The names of available GPU devices.
-    """
-    local_device_protos = device_lib.list_local_devices()
-    return [x.name for x in local_device_protos if x.device_type == "GPU"]
-
-
-def to_channels_last(image):
-    """Converts the image to channel_last format. This is the same format as in
-    matplotlib, skimage, and etc.
-
-    Parameters
-    ----------
-    image : `tf.Tensor`
-        At least a 3 dimensional image. If the dimension is more than 3, the
-        last 3 dimensions are assumed to be [C, H, W].
-
-    Returns
-    -------
-    image : `tf.Tensor`
-        The image in [..., H, W, C] format.
-
-    Raises
-    ------
-    ValueError
-        If dim of image is less than 3.
-    """
-    ndim = len(image.shape)
-    if ndim < 3:
-        raise ValueError(
-            "The image needs to be at least 3 dimensional but it " "was {}".format(ndim)
-        )
-    axis_order = [1, 2, 0]
-    shift = ndim - 3
-    axis_order = list(range(ndim - 3)) + [n + shift for n in axis_order]
-    return tf.transpose(image, axis_order)
-
-
-def to_channels_first(image):
-    """Converts the image to channel_first format. This is the same format as
-    in bob.io.image and bob.io.video.
-
-    Parameters
-    ----------
-    image : `tf.Tensor`
-        At least a 3 dimensional image. If the dimension is more than 3, the
-        last 3 dimensions are assumed to be [H, W, C].
-
-    Returns
-    -------
-    image : `tf.Tensor`
-        The image in [..., C, H, W] format.
-
-    Raises
-    ------
-    ValueError
-        If dim of image is less than 3.
-    """
-    ndim = len(image.shape)
-    if ndim < 3:
-        raise ValueError(
-            "The image needs to be at least 3 dimensional but it " "was {}".format(ndim)
-        )
-    axis_order = [2, 0, 1]
-    shift = ndim - 3
-    axis_order = list(range(ndim - 3)) + [n + shift for n in axis_order]
-    return tf.transpose(image, axis_order)
-
-
-to_skimage = to_matplotlib = to_channels_last
-to_bob = to_channels_first
-
-
-def bytes2human(n, format="%(value).1f %(symbol)s", symbols="customary"):
-    """Convert n bytes into a human readable string based on format.
-    From: https://code.activestate.com/recipes/578019-bytes-to-human-human-to-
-    bytes-converter/
-    Author: Giampaolo Rodola' <g.rodola [AT] gmail [DOT] com>
-    License: MIT
-    symbols can be either "customary", "customary_ext", "iec" or "iec_ext",
-    see: http://goo.gl/kTQMs
-    """
-    SYMBOLS = {
-        "customary": ("B", "K", "M", "G", "T", "P", "E", "Z", "Y"),
-        "customary_ext": (
-            "byte",
-            "kilo",
-            "mega",
-            "giga",
-            "tera",
-            "peta",
-            "exa",
-            "zetta",
-            "iotta",
-        ),
-        "iec": ("Bi", "Ki", "Mi", "Gi", "Ti", "Pi", "Ei", "Zi", "Yi"),
-        "iec_ext": (
-            "byte",
-            "kibi",
-            "mebi",
-            "gibi",
-            "tebi",
-            "pebi",
-            "exbi",
-            "zebi",
-            "yobi",
-        ),
-    }
-    n = int(n)
-    if n < 0:
-        raise ValueError("n < 0")
-    symbols = SYMBOLS[symbols]
-    prefix = {}
-    for i, s in enumerate(symbols[1:]):
-        prefix[s] = 1 << (i + 1) * 10
-    for symbol in reversed(symbols[1:]):
-        if n >= prefix[symbol]:
-            value = float(n) / prefix[symbol]
-            return format % locals()
-    return format % dict(symbol=symbols[0], value=n)
-
-
-def random_choice_no_replacement(one_dim_input, num_indices_to_drop=3, sort=False):
-    """Similar to np.random.choice with no replacement.
-    Code from https://stackoverflow.com/a/54755281/1286165
-    """
-    input_length = tf.shape(one_dim_input)[0]
-
-    # create uniform distribution over the sequence
-    uniform_distribution = tf.random.uniform(
-        shape=[input_length],
-        minval=0,
-        maxval=None,
-        dtype=tf.float32,
-        seed=None,
-        name=None,
-    )
-
-    # grab the indices of the greatest num_words_to_drop values from the distibution
-    _, indices_to_keep = tf.nn.top_k(
-        uniform_distribution, input_length - num_indices_to_drop
-    )
-
-    # sort the indices
-    if sort:
-        sorted_indices_to_keep = tf.sort(indices_to_keep)
-    else:
-        sorted_indices_to_keep = indices_to_keep
-
-    # gather indices from the input array using the filtered actual array
-    result = tf.gather(one_dim_input, sorted_indices_to_keep)
-    return result
diff --git a/buildout.cfg b/buildout.cfg
index 26bc4af9ef91ddaab87393f1405958f8d9f88f49..3ba4b6f695b3cd05a01208af83d2f293cef002a7 100644
--- a/buildout.cfg
+++ b/buildout.cfg
@@ -11,4 +11,4 @@ verbose = true
 
 [scripts]
 recipe = bob.buildout:scripts
-dependent-scripts = true
\ No newline at end of file
+dependent-scripts = true
diff --git a/conda/meta.yaml b/conda/meta.yaml
index 0f06eafa7eb4e937c6e192af3ec1b367a428f0ab..87eacc443e98438c71d6ce9d2ed316569d7a71a0 100644
--- a/conda/meta.yaml
+++ b/conda/meta.yaml
@@ -20,23 +20,10 @@ requirements:
   host:
     - python {{ python }}
     - setuptools {{ setuptools }}
-    - bob.blitz
-    - bob.core
-    - bob.db.base
     - bob.extension
     - bob.io.base
     - bob.io.image
-    - bob.learn.activation
-    - bob.learn.em
-    - bob.learn.linear
-    - bob.ip.base
-    - bob.math
     - bob.measure
-    - bob.sp
-    - bob.db.mnist
-    - bob.db.atnt
-    - bob.bio.base
-    - bob.ip.color
     - click {{ click }}
     - click-plugins {{ click_plugins }}
   run:
diff --git a/doc/conf.py b/doc/conf.py
index efb56b77423f80b23671cedadca42b3c3e2ef65d..4c1a316d41c76ea2bc587a80d9c62a35d3094e7a 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -1,33 +1,42 @@
 #!/usr/bin/env python
 # vim: set fileencoding=utf-8 :
 
+import glob
 import os
 import sys
-import glob
+import time
+
 import pkg_resources
 
+# The theme to use for HTML and HTML Help pages.  See the documentation for
+# a list of builtin themes.
+import sphinx_rtd_theme
+
+# For inter-documentation mapping:
+from bob.extension.utils import link_documentation
+from bob.extension.utils import load_requirements
 
 # -- General configuration -----------------------------------------------------
 
 # If your documentation needs a minimal Sphinx version, state it here.
-needs_sphinx = '1.3'
+needs_sphinx = "1.3"
 
 # Add any Sphinx extension module names here, as strings. They can be extensions
 # coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
 extensions = [
-    'sphinx.ext.todo',
-    'sphinx.ext.coverage',
-    'sphinx.ext.ifconfig',
-    'sphinx.ext.autodoc',
-    'sphinx.ext.autosummary',
-    'sphinx.ext.doctest',
-    'sphinx.ext.graphviz',
-    'sphinx.ext.intersphinx',
-    'sphinx.ext.napoleon',
-    'sphinx.ext.viewcode',
-    'sphinx.ext.mathjax',
-    'matplotlib.sphinxext.plot_directive'
-    ]
+    "sphinx.ext.todo",
+    "sphinx.ext.coverage",
+    "sphinx.ext.ifconfig",
+    "sphinx.ext.autodoc",
+    "sphinx.ext.autosummary",
+    "sphinx.ext.doctest",
+    "sphinx.ext.graphviz",
+    "sphinx.ext.intersphinx",
+    "sphinx.ext.napoleon",
+    "sphinx.ext.viewcode",
+    "sphinx.ext.mathjax",
+    "matplotlib.sphinxext.plot_directive",
+]
 
 # Be picky about warnings
 nitpicky = True
@@ -36,13 +45,13 @@ nitpicky = True
 nitpick_ignore = []
 
 # Allows the user to override warnings from a separate file
-if os.path.exists('nitpick-exceptions.txt'):
-    for line in open('nitpick-exceptions.txt'):
+if os.path.exists("nitpick-exceptions.txt"):
+    for line in open("nitpick-exceptions.txt"):
         if line.strip() == "" or line.startswith("#"):
             continue
         dtype, target = line.split(None, 1)
         target = target.strip()
-        try: # python 2.x
+        try:  # python 2.x
             target = unicode(target)
         except NameError:
             pass
@@ -58,25 +67,26 @@ autosummary_generate = True
 numfig = True
 
 # If we are on OSX, the 'dvipng' path maybe different
-dvipng_osx = '/opt/local/libexec/texlive/binaries/dvipng'
-if os.path.exists(dvipng_osx): pngmath_dvipng = dvipng_osx
+dvipng_osx = "/opt/local/libexec/texlive/binaries/dvipng"
+if os.path.exists(dvipng_osx):
+    pngmath_dvipng = dvipng_osx
 
 # Add any paths that contain templates here, relative to this directory.
-templates_path = ['_templates']
+templates_path = ["_templates"]
 
 # The suffix of source filenames.
-source_suffix = '.rst'
+source_suffix = ".rst"
 
 # The encoding of source files.
-#source_encoding = 'utf-8-sig'
+# source_encoding = 'utf-8-sig'
 
 # The master toctree document.
-master_doc = 'index'
+master_doc = "index"
 
 # General information about the project.
-project = u'bob.learn.tensorflow'
-import time
-copyright = u'%s, Idiap Research Institute' % time.strftime('%Y')
+project = u"bob.learn.tensorflow"
+
+copyright = u"%s, Idiap Research Institute" % time.strftime("%Y")
 
 # Grab the setup entry
 distribution = pkg_resources.require(project)[0]
@@ -92,123 +102,121 @@ release = distribution.version
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
-#language = None
+# language = None
 
 # There are two options for replacing |today|: either, you set today to some
 # non-false value, then it is used:
-#today = ''
+# today = ''
 # Else, today_fmt is used as the format for a strftime call.
-#today_fmt = '%B %d, %Y'
+# today_fmt = '%B %d, %Y'
 
 # List of patterns, relative to source directory, that match files and
 # directories to ignore when looking for source files.
-exclude_patterns = ['links.rst']
+exclude_patterns = ["links.rst"]
 
 # The reST default role (used for this markup: `text`) to use for all documents.
-#default_role = None
+# default_role = None
 
 # If true, '()' will be appended to :func: etc. cross-reference text.
-#add_function_parentheses = True
+# add_function_parentheses = True
 
 # If true, the current module name will be prepended to all description
 # unit titles (such as .. function::).
-#add_module_names = True
+# add_module_names = True
 
 # If true, sectionauthor and moduleauthor directives will be shown in the
 # output. They are ignored by default.
-#show_authors = False
+# show_authors = False
 
 # The name of the Pygments (syntax highlighting) style to use.
-pygments_style = 'sphinx'
+pygments_style = "sphinx"
 
 # A list of ignored prefixes for module index sorting.
-#modindex_common_prefix = []
+# modindex_common_prefix = []
 
 # Some variables which are useful for generated material
-project_variable = project.replace('.', '_')
-short_description = u'bob.learn.tensorflow API'
-owner = [u'Idiap Research Institute']
+project_variable = project.replace(".", "_")
+short_description = u"bob.learn.tensorflow API"
+owner = [u"Idiap Research Institute"]
 
 
 # -- Options for HTML output ---------------------------------------------------
 
-# The theme to use for HTML and HTML Help pages.  See the documentation for
-# a list of builtin themes.
-import sphinx_rtd_theme
-html_theme = 'sphinx_rtd_theme'
+
+html_theme = "sphinx_rtd_theme"
 
 # Theme options are theme-specific and customize the look and feel of a theme
 # further.  For a list of options available for each theme, see the
 # documentation.
-#html_theme_options = {}
+# html_theme_options = {}
 
 # Add any paths that contain custom themes here, relative to this directory.
 html_theme_path = [sphinx_rtd_theme.get_html_theme_path()]
 
 # The name for this set of Sphinx documents.  If None, it defaults to
 # "<project> v<release> documentation".
-#html_title = None
+# html_title = None
 
 # A shorter title for the navigation bar.  Default is the same as html_title.
-#html_short_title = project_variable
+# html_short_title = project_variable
 
 # The name of an image file (relative to this directory) to place at the top
 # of the sidebar.
-html_logo = 'img/logo.png'
+html_logo = "img/logo.png"
 
 # The name of an image file (within the static path) to use as favicon of the
 # docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
 # pixels large.
-html_favicon = 'img/favicon.ico'
+html_favicon = "img/favicon.ico"
 
 # Add any paths that contain custom static files (such as style sheets) here,
 # relative to this directory. They are copied after the builtin static files,
 # so a file named "default.css" will overwrite the builtin "default.css".
-#html_static_path = ['_static']
+# html_static_path = ['_static']
 
 # If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
 # using the given strftime format.
-#html_last_updated_fmt = '%b %d, %Y'
+# html_last_updated_fmt = '%b %d, %Y'
 
 # If true, SmartyPants will be used to convert quotes and dashes to
 # typographically correct entities.
-#html_use_smartypants = True
+# html_use_smartypants = True
 
 # Custom sidebar templates, maps document names to template names.
-#html_sidebars = {}
+# html_sidebars = {}
 
 # Additional templates that should be rendered to pages, maps page names to
 # template names.
-#html_additional_pages = {}
+# html_additional_pages = {}
 
 # If false, no module index is generated.
-#html_domain_indices = True
+# html_domain_indices = True
 
 # If false, no index is generated.
-#html_use_index = True
+# html_use_index = True
 
 # If true, the index is split into individual pages for each letter.
-#html_split_index = False
+# html_split_index = False
 
 # If true, links to the reST sources are added to the pages.
-#html_show_sourcelink = True
+# html_show_sourcelink = True
 
 # If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
-#html_show_sphinx = True
+# html_show_sphinx = True
 
 # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
-#html_show_copyright = True
+# html_show_copyright = True
 
 # If true, an OpenSearch description file will be output, and all pages will
 # contain a <link> tag referring to it.  The value of this option must be the
 # base URL from which the finished HTML is served.
-#html_use_opensearch = ''
+# html_use_opensearch = ''
 
 # This is the file name suffix for HTML files (e.g. ".xhtml").
-#html_file_suffix = None
+# html_file_suffix = None
 
 # Output file base name for HTML help builder.
-htmlhelp_basename = project_variable + u'_doc'
+htmlhelp_basename = project_variable + u"_doc"
 
 
 # -- Post configuration --------------------------------------------------------
@@ -218,46 +226,47 @@ rst_epilog = """
 .. |project| replace:: Bob
 .. |version| replace:: %s
 .. |current-year| date:: %%Y
-""" % (version,)
+""" % (
+    version,
+)
 
 # Default processing flags for sphinx
-autoclass_content = 'class'
-autodoc_member_order = 'bysource'
+autoclass_content = "class"
+autodoc_member_order = "bysource"
 autodoc_default_flags = [
-  'members',
-  'undoc-members',
-  'show-inheritance',
-  ]
+    "members",
+    "undoc-members",
+    "show-inheritance",
+]
+
 
-# For inter-documentation mapping:
-from bob.extension.utils import link_documentation, load_requirements
 sphinx_requirements = "extra-intersphinx.txt"
 if os.path.exists(sphinx_requirements):
-  intersphinx_mapping = link_documentation(
-      additional_packages=['python','numpy'] + \
-          load_requirements(sphinx_requirements)
-          )
+    intersphinx_mapping = link_documentation(
+        additional_packages=["python", "numpy"] + load_requirements(sphinx_requirements)
+    )
 else:
-  intersphinx_mapping = link_documentation()
+    intersphinx_mapping = link_documentation()
 
 
 # We want to remove all private (i.e. _. or __.__) members
 # that are not in the list of accepted functions
-accepted_private_functions = ['__array__']
+accepted_private_functions = ["__array__"]
+
 
 def member_function_test(app, what, name, obj, skip, options):
-  # test if we have a private function
-  if len(name) > 1 and name[0] == '_':
-    # test if this private function should be allowed
-    if name not in accepted_private_functions:
-      # omit privat functions that are not in the list of accepted private functions
-      return skip
-    else:
-      # test if the method is documented
-      if not hasattr(obj, '__doc__') or not obj.__doc__:
-        return skip
-  return False
+    # test if we have a private function
+    if len(name) > 1 and name[0] == "_":
+        # test if this private function should be allowed
+        if name not in accepted_private_functions:
+            # omit privat functions that are not in the list of accepted private functions
+            return skip
+        else:
+            # test if the method is documented
+            if not hasattr(obj, "__doc__") or not obj.__doc__:
+                return skip
+    return False
 
-def setup(app):
-  app.connect('autodoc-skip-member', member_function_test)
 
+def setup(app):
+    app.connect("autodoc-skip-member", member_function_test)
diff --git a/doc/index.rst b/doc/index.rst
index b383b757967302c56f7d487089f4b71f0fb609c3..f0bae7a089df9d3d15ed09f2e79a3fc98449681f 100644
--- a/doc/index.rst
+++ b/doc/index.rst
@@ -3,11 +3,12 @@
 .. _bob.learn.tensorflow:
 
 =======================
- Tensorflow Bob Bridge
+ Tensorflow Biometrics
 =======================
 
-`bob.learn.tensorflow` is a high-level library, written in Python that runs on top of TensorFlow.
-The goal here is to be able to do fast experimentation with neural networks.
+This package extends the high-level API of Tensorflow to allow biometrics
+experiments. Especially, it provides the tools to train biometrics recognition
+and presentation attack detection deep models.
 
 Users Guide
 ===========
@@ -16,9 +17,6 @@ Users Guide
    :maxdepth: 2
 
    user_guide
-   regression
-   style_transfer
-   transfer_learning
 
 Reference Manual
 ================
@@ -34,4 +32,3 @@ Indices and tables
 * :ref:`genindex`
 * :ref:`modindex`
 * :ref:`search`
-
diff --git a/doc/py_api.rst b/doc/py_api.rst
index 4c885e20f69d93208561cedc94e0680858a2415f..2fec97b9c55bcd617d16a2cef044b327c3007560 100644
--- a/doc/py_api.rst
+++ b/doc/py_api.rst
@@ -24,19 +24,6 @@ Estimators
 Architectures
 =============
 
-.. autosummary::
-    bob.learn.tensorflow.network.chopra
-    bob.learn.tensorflow.network.light_cnn9
-    bob.learn.tensorflow.network.dummy
-    bob.learn.tensorflow.network.mlp
-    bob.learn.tensorflow.network.mlp_with_batchnorm_and_dropout
-    bob.learn.tensorflow.network.inception_resnet_v2
-    bob.learn.tensorflow.network.inception_resnet_v1
-    bob.learn.tensorflow.network.inception_resnet_v2_batch_norm
-    bob.learn.tensorflow.network.inception_resnet_v1_batch_norm
-    bob.learn.tensorflow.network.SimpleCNN.slim_architecture
-    bob.learn.tensorflow.network.vgg_19
-    bob.learn.tensorflow.network.vgg_16
 
 
 Data
diff --git a/doc/regression.rst b/doc/regression.rst
deleted file mode 100644
index 54cd1ad51fd20705880329c439d824d8b8b996b6..0000000000000000000000000000000000000000
--- a/doc/regression.rst
+++ /dev/null
@@ -1,125 +0,0 @@
-.. vim: set fileencoding=utf-8 :
-
-
-===========
- Regression
-===========
-
-A flexible estimator for regression problems is implemented in
-:py:class:`bob.learn.tensorflow.estimators.Regressor`. You can use this
-estimator for various regression problems. The guide below (taken from
-https://www.tensorflow.org/tutorials/keras/basic_regression) outlines a basic
-regression example using the API of this package.
-
-The Boston Housing Prices dataset
-=================================
-
-.. testsetup::
-
-    import tempfile
-    model_dir = tempfile.mkdtemp()
-
-
-1. Let's do some imports:
-*************************
-
-.. doctest::
-
-    >>> import tensorflow as tf
-    >>> from tensorflow import keras
-    >>> import tensorflow.contrib.slim as slim
-    >>> from bob.learn.tensorflow.estimators import Regressor
-
-2. Download the dataset:
-************************
-
-.. doctest::
-
-    >>> boston_housing = keras.datasets.boston_housing
-    >>> print("doctest s**t"); (train_data, train_labels), (test_data, test_labels) = boston_housing.load_data() # doctest: +ELLIPSIS
-    doc...
-    >>> print("Training set: {}".format(train_data.shape))
-    Training set: (404, 13)
-    >>> print("Testing set:  {}".format(test_data.shape))
-    Testing set:  (102, 13)
-
-3. Normalize features
-*********************
-
-.. doctest::
-
-    >>> # Test data is *not* used when calculating the mean and std.
-    >>>
-    >>> mean = train_data.mean(axis=0)
-    >>> std = train_data.std(axis=0)
-    >>> train_data = (train_data - mean) / std
-    >>> test_data = (test_data - mean) / std
-
-4. Define the input functions
-*****************************
-
-.. doctest::
-
-    >>> EPOCH = 2
-    >>> def input_fn(mode):
-    ...     if mode == tf.estimator.ModeKeys.TRAIN:
-    ...         features, labels = train_data, train_labels
-    ...     else:
-    ...         features, labels, = test_data, test_labels
-    ...     dataset = tf.data.Dataset.from_tensor_slices((features, labels, [str(x) for x in labels]))
-    ...     dataset = dataset.batch(1)
-    ...     if mode == tf.estimator.ModeKeys.TRAIN:
-    ...         dataset = dataset.apply(tf.contrib.data.shuffle_and_repeat(len(labels), EPOCH))
-    ...     data, label, key = dataset.make_one_shot_iterator().get_next()
-    ...     # key is a unique string identifier of each sample.
-    ...     # Here we just use the string version of labels.
-    ...     return {'data': data, 'key': key}, label
-    ...
-    >>> def train_input_fn():
-    ...     return input_fn(tf.estimator.ModeKeys.TRAIN)
-    ...
-    >>> def eval_input_fn():
-    ...     return input_fn(tf.estimator.ModeKeys.EVAL)
-
-
-5. Create the estimator
-***********************
-
-.. doctest::
-
-    >>> def architecture(data, mode, **kwargs):
-    ...     endpoints = {}
-    ...
-    ...     with tf.variable_scope('DNN'):
-    ...
-    ...         name = 'fc1'
-    ...         net = slim.fully_connected(data, 64, scope=name)
-    ...         endpoints[name] = net
-    ...
-    ...         name = 'fc2'
-    ...         net = slim.fully_connected(net, 64, scope=name)
-    ...         endpoints[name] = net
-    ...
-    ...     return net, endpoints
-    ...
-    >>> estimator = Regressor(architecture, model_dir=model_dir)
-
-
-5. Train and evaluate the model
-*******************************
-
-.. doctest::
-
-    >>> estimator.train(train_input_fn) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE +SKIP
-    <bob.learn.tensorflow.estimators.Regressor ...
-
-    >>> 'rmse' in estimator.evaluate(eval_input_fn) # doctest: +SKIP
-    True
-
-    >>> list(estimator.predict(eval_input_fn)) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE  +SKIP
-    [...
-
-.. testcleanup::
-
-    import shutil
-    shutil.rmtree(model_dir, True)
diff --git a/doc/style_transfer.rst b/doc/style_transfer.rst
deleted file mode 100644
index 2babdeeb1e9c4219f19e663a16f4dcafee62b68b..0000000000000000000000000000000000000000
--- a/doc/style_transfer.rst
+++ /dev/null
@@ -1,81 +0,0 @@
-.. vim: set fileencoding=utf-8 :
-
-
-===============
- Style Transfer
-===============
-
-
-We have implemented the style transfer strategy from::
-
-    Gatys, Leon A., Alexander S. Ecker, and Matthias Bethge. "A neural algorithm of artistic style." arXiv preprint arXiv:1508.06576 (2015).
-
-
-Check as the usage possibilities with the command::
-
- $ bob tf style_transfer --help
-
-
-Here we have an example on how to do a style transfer using VGG 19 trained with the image net
-
-.. doctest::
-
-    >>> from bob.learn.tensorflow.network import vgg_19
-    >>> # --architecture
-    >>> architecture = vgg_19
-
-    >>> import numpy
-
-    >>> # YOU CAN DOWNLOAD THE CHECKPOINTS FROM HERE
-    >>> # https://github.com/tensorflow/models/tree/master/research/slim#pre-trained-models
-    >>> checkpoint_dir = "[DOWNLOAD_YOUR_MODEL]"
-
-    >>> # --style-end-points and -- content-end-points
-    >>> content_end_points = ['vgg_19/conv4/conv4_2', 'vgg_19/conv5/conv5_2']
-    >>> style_end_points = ['vgg_19/conv1/conv1_2',
-    ...                 'vgg_19/conv2/conv2_1',
-    ...                 'vgg_19/conv3/conv3_1',
-    ...                 'vgg_19/conv4/conv4_1',
-    ...                 'vgg_19/conv5/conv5_1'
-    ...                ]
-
-    >>> # Transfering variables
-    >>> scopes = {"vgg_19/":"vgg_19/"}
-
-    >>> # Set if images using
-    >>> style_image_paths = ["vincent_van_gogh.jpg"]
-
-    >>> # Functions used to preprocess the input signal and
-    >>> # --preprocess-fn and --un-preprocess-fn
-    >>> # Taken from VGG19
-    >>> def mean_norm(tensor):
-    ...     return tensor - numpy.array([ 123.68 ,  116.779,  103.939])
-
-    >>> def un_mean_norm(tensor):
-    ...     return tensor + numpy.array([ 123.68 ,  116.779,  103.939])
-
-    >>> preprocess_fn = mean_norm
-
-    >>> un_preprocess_fn = un_mean_norm
-
-
-Here we use an image from Angelina Jolie using Van Gogh style as an example::
-
-   $ bob tf style_transfer angelina.jpg angelina_output.jpg vgg19_example.py -i 1000.
-
-.. figure:: img/angelina.jpg
-    :width: 35%
-
-    Source (content) image
-
-.. figure:: img/vincent_van_gogh.jpg
-    :width: 27%
-
-    Style image
-
-.. figure:: img/angelina_output.jpg
-    :width: 35%
-
-    Generated image
-
-
diff --git a/doc/transfer_learning.rst b/doc/transfer_learning.rst
deleted file mode 100644
index 3558619b00555b524da426ac372fa093332f504e..0000000000000000000000000000000000000000
--- a/doc/transfer_learning.rst
+++ /dev/null
@@ -1,9 +0,0 @@
-.. vim: set fileencoding=utf-8 :
-
-
-==================
- Transfer Learning
-==================
-
-
-To be done...
diff --git a/doc/user_guide.rst b/doc/user_guide.rst
index e58b57cf77a9669208f9e4bc811469a239e5fc82..52b530db3726d5594aa7435a1cad72a1343c5a17 100644
--- a/doc/user_guide.rst
+++ b/doc/user_guide.rst
@@ -1,325 +1,126 @@
 .. vim: set fileencoding=utf-8 :
 
-
 ===========
  User guide
 ===========
 
-This package builds on top of tensorflow_. You are expected to have some
-familiarity with it before continuing. We recommend reading at least the
-following pages:
+This package builds on top of tensorflow_ (at least 2.3 is needed). You are
+expected to have some familiarity with it before continuing. The best way to use
+tensorflow_ is to use its ``tf.keras`` and ``tf.data`` API. We recommend reading
+at least the following pages:
 
-* https://www.tensorflow.org/get_started
-* https://www.tensorflow.org/guide/
-* https://www.tensorflow.org/guide/estimators
-* https://www.tensorflow.org/guide/datasets
+* https://www.tensorflow.org/tutorials/quickstart/beginner
+* https://www.tensorflow.org/tutorials/quickstart/advanced
+* https://keras.io/getting_started/intro_to_keras_for_engineers/
+* https://keras.io/getting_started/intro_to_keras_for_researchers/
+* https://www.tensorflow.org/tutorials/load_data/images
+* https://www.tensorflow.org/guide/data
 
-The best way to use tensorflow_ is to use its ``tf.estimator`` and ``tf.data``
-API. The estimators are an abstraction API for machine learning models and the
-data API is here to help you build complex and efficient input pipelines to
-your model. Using the estimators and dataset API of tensorflow will make your
-code more complex but instead you will enjoy more efficiency and avoid code
-redundancy.
+If you were used to Tensorflow 1 API, then reading these pages are also
+recommended:
 
+* https://www.tensorflow.org/guide/effective_tf2
+* https://www.tensorflow.org/guide/migrate
+* https://www.tensorflow.org/guide/upgrade
+* https://github.com/tensorflow/community/blob/master/sigs/testing/faq.md
 
-Face recognition example using bob.db databases
-===============================================
+In the rest of this guide, you will learn a few tips and examples on how to:
 
+* Port v1 checkpoints to tf v2 format.
+* Create datasets and save TFRecords.
+* Create models with custom training and evaluation logic.
+* Mixed-precision training
+* Multi-GPU and multi-worker training
 
-Let's take a look at a complete example of using a convolutional neural network
-(CNN) for recognizing faces from the ATNT database. At the end, we will explain
-the data pipeline in more detail.
+After reading this page, you may look at a complete example in:
+https://gitlab.idiap.ch/bob/bob.learn.tensorflow/-/blob/master/examples/MSCeleba_centerloss_mixed_precision_multi_worker.py
 
-1. Let's do some imports:
-*************************
 
-.. testsetup::
+Porting V1 Tensorflow checkpoints to V2
+=======================================
 
-    import tempfile
-    temp_dir = model_dir = tempfile.mkdtemp()
+Take a look at the notebook located at:
+https://gitlab.idiap.ch/bob/bob.learn.tensorflow/-/blob/master/examples/convert_v1_checkpoints_to_v2.ipynb
+for an example.
 
-.. doctest::
 
-    >>> from bob.learn.tensorflow.dataset.bio import BioGenerator
-    >>> from bob.learn.tensorflow.utils import to_channels_last
-    >>> from bob.learn.tensorflow.estimators import Logits
-    >>> import bob.db.atnt
-    >>> import tensorflow as tf
-    >>> import tensorflow.contrib.slim as slim
-
-2. Define the inputs:
-*********************
-
-.. _input_fn:
-
-.. doctest::
+Creating datasets from data
+===========================
 
-    >>> def input_fn(mode):
-    ...     db = bob.db.atnt.Database()
-    ...
-    ...     if mode == tf.estimator.ModeKeys.TRAIN:
-    ...         groups = 'world'
-    ...     elif mode == tf.estimator.ModeKeys.EVAL:
-    ...         groups = 'dev'
-    ...
-    ...     files = db.objects(groups=groups)
-    ...
-    ...     # construct integer labels for each identity in the database
-    ...     CLIENT_IDS = (str(f.client_id) for f in files)
-    ...     CLIENT_IDS = list(set(CLIENT_IDS))
-    ...     CLIENT_IDS = dict(zip(CLIENT_IDS, range(len(CLIENT_IDS))))
-    ...
-    ...     def biofile_to_label(f):
-    ...         return CLIENT_IDS[str(f.client_id)]
-    ...
-    ...     def load_data(database, f):
-    ...         img = f.load(database.original_directory, database.original_extension)
-    ...         # make a channels_first image (bob format) with 1 channel
-    ...         img = img.reshape(1, 112, 92)
-    ...         return img
-    ...
-    ...     generator = BioGenerator(db, files, load_data, biofile_to_label)
-    ...
-    ...     dataset = tf.data.Dataset.from_generator(
-    ...         generator, generator.output_types, generator.output_shapes)
-    ...
-    ...     def transform(image, label, key):
-    ...         # convert to channels last
-    ...         image = to_channels_last(image)
-    ...
-    ...         # per_image_standardization
-    ...         image = tf.image.per_image_standardization(image)
-    ...         return (image, label, key)
-    ...
-    ...     dataset = dataset.map(transform)
-    ...     dataset = dataset.cache(temp_dir)
-    ...     if mode == tf.estimator.ModeKeys.TRAIN:
-    ...         dataset = dataset.repeat(1)
-    ...     dataset = dataset.batch(8)
-    ...
-    ...     data, label, key = dataset.make_one_shot_iterator().get_next()
-    ...     return {'data': data, 'key': key}, label
-    ...
-    ...
-    >>> def train_input_fn():
-    ...     return input_fn(tf.estimator.ModeKeys.TRAIN)
-    ...
-    ...
-    >>> def eval_input_fn():
-    ...     return input_fn(tf.estimator.ModeKeys.EVAL)
-    ...
-    ...
-    >>> # supply this hook for debugging
-    >>> # from tensorflow.python import debug as tf_debug
-    >>> # hooks = [tf_debug.LocalCLIDebugHook()]
-    >>> hooks = None
-    ...
-    >>> train_spec = tf.estimator.TrainSpec(
-    ...     input_fn=train_input_fn, max_steps=50, hooks=hooks)
-    >>> eval_spec = tf.estimator.EvalSpec(input_fn=eval_input_fn)
-
-3. Define the architecture:
-***************************
+If you are working with Bob databases, below is an example of converting them to
+``tf.data.Dataset``'s using :any:`bob.learn.tensorflow.data.dataset_using_generator`:
 
-.. doctest::
+.. testsetup::
 
-    >>> def architecture(data, mode, **kwargs):
-    ...     endpoints = {}
-    ...     training = mode == tf.estimator.ModeKeys.TRAIN
-    ...
-    ...     with tf.variable_scope('CNN'):
-    ...
-    ...         name = 'conv'
-    ...         net = slim.conv2d(data, 32, kernel_size=(
-    ...             5, 5), stride=2, padding='SAME', activation_fn=tf.nn.relu, scope=name)
-    ...         endpoints[name] = net
-    ...
-    ...         name = 'pool'
-    ...         net = slim.max_pool2d(net, (2, 2),
-    ...             stride=1, padding='SAME', scope=name)
-    ...         endpoints[name] = net
-    ...
-    ...         name = 'pool-flat'
-    ...         net = slim.flatten(net, scope=name)
-    ...         endpoints[name] = net
-    ...
-    ...         name = 'dense'
-    ...         net = slim.fully_connected(net, 128, scope=name)
-    ...         endpoints[name] = net
-    ...
-    ...         name = 'dropout'
-    ...         net = slim.dropout(
-    ...             inputs=net, keep_prob=0.4, is_training=training)
-    ...         endpoints[name] = net
-    ...
-    ...     return net, endpoints
-
-
-.. important ::
-
-    Practical advice: use ``tf.contrib.slim`` to craft your CNNs. Although
-    Tensorflow's documentation recommend the usage of ``tf.layers`` and
-    ``tf.keras``, in our experience ``slim`` has better defaults and is more
-    integrated with tensorflow's framework (compared to ``tf.keras``),
-    probably because it is used more often internally at Google.
-
-
-4. Estimator:
-************************
-
-Explicitly triggering the estimator
-...................................
+    import tempfile
+    temp_dir = model_dir = tempfile.mkdtemp()
 
 .. doctest::
 
-    >>> estimator = Logits(
-    ...     architecture,
-    ...     optimizer=tf.train.GradientDescentOptimizer(1e-4),
-    ...     loss_op=tf.losses.sparse_softmax_cross_entropy,
-    ...     n_classes=20,  # the number of identities in the world set of ATNT database
-    ...     embedding_validation=True,
-    ...     validation_batch_size=8,
-    ...     model_dir=model_dir,
-    ... )
-    >>> tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec) # doctest: +SKIP
-    ({'accuracy':...
-
-
-Triggering the estimator via command line
-..........................................
-
-In the example above we explicitly triggered the training and validation via
-`tf.estimator.train`. We provide command line scripts that does that for you.
-
-Check the command bellow fro training::
-
- $ bob tf train --help
-
-and to evaluate::
-
- $ bob tf eval --help
-
-
-Data pipeline
-=============
-
-There are several ways to provide data to Tensorflow graphs. In this section we
-provide some examples on how to make the bridge between `bob.db` databases and
-tensorflow `input_fn`.
-
-The BioGenerator input pipeline
-*******************************
-
-The :any:`bob.learn.tensorflow.dataset.bio.BioGenerator` class can be used to
-convert any database of bob (not just bob.bio.base's databases) to a
-``tf.data.Dataset`` instance.
-
-While building the input pipeline, you can manipulate your data in two
-sections:
-
-* In the ``load_data`` function where everything is a numpy array.
-* In the ``transform`` function where the data are tensorflow tensors.
-
-For example, you can annotate, crop to bounding box, and scale your images in
-the ``load_data`` function and apply transformations on images (e.g. random
-crop, mean normalization, random flip, ...) in the ``transform`` function.
-
-Once these transformations are applied on your data, you can easily cache them
-to disk (using ``tf.data.Dataset.cache``) for faster reading of data in your
-training.
-
-
-Input pipeline with TFRecords
-*****************************
-
-An optimized way to provide data to Tensorflow graphs is using tfrecords. In
-this `link <http://warmspringwinds.github.io/tensorflow/tf-slim/2016/12/21/tfrecords-guide/>`_
-you have a very nice guide on how TFRecord works.
-
-In `bob.learn.tensorflow` we provide a command line interface
-``bob tf db_to_tfrecords`` that converts ``bob.db`` databases to TFRecords.
-Type the snippet bellow for help::
+    >>> import bob.db.atnt
+    >>> from bob.learn.tensorflow.data import dataset_using_generator
+    >>> import tensorflow as tf
 
-  $ bob tf db_to_tfrecords --help
+    >>> db = bob.db.atnt.Database()
+    >>> samples = db.objects(groups="world")
 
+    >>> # construct integer labels for each identity in the database
+    >>> CLIENT_IDS = (str(f.client_id) for f in samples)
+    >>> CLIENT_IDS = list(set(CLIENT_IDS))
+    >>> CLIENT_IDS = dict(zip(CLIENT_IDS, range(len(CLIENT_IDS))))
 
-To generate a tfrecord for our
-`Face recognition example using bob.db databases`_ example use the following
-snippet.
+    >>> def reader(sample):
+    ...     img = sample.load(db.original_directory, db.original_extension)
+    ...     label = CLIENT_IDS[str(sample.client_id)]
+    ...     return img, label
 
-.. doctest::
+    >>> dataset = dataset_using_generator(samples, reader)
+    >>> dataset
 
-    >>> from bob.bio.base.utils import read_original_data
-    >>> from bob.bio.base.test.dummy.database import database # this is based on bob.db.atnt
+Create TFRecords from tf.data.Datasets
+======================================
 
-    >>> groups = 'dev'
+Use :any:`bob.learn.tensorflow.data.dataset_to_tfrecord` and
+:any:`bob.learn.tensorflow.data.dataset_from_tfrecord` to painlessly convert
+**any** ``tf.data.Dataset`` to TFRecords and create datasets back from those
+TFRecords:
 
-    >>> samples = database.all_files(groups=groups)
+    >>> from bob.learn.tensorflow.data import dataset_to_tfrecord
+    >>> from bob.learn.tensorflow.data import dataset_from_tfrecord
+    >>> path = f"{temp_dir}/my_dataset"
+    >>> dataset_to_tfrecord(dataset, path)
+    >>> dataset = dataset_from_tfrecord(path)
+    >>> dataset
 
-    >>> CLIENT_IDS = (str(f.client_id) for f in database.objects(groups=groups))
-    >>> CLIENT_IDS = set(CLIENT_IDS)
-    >>> CLIENT_IDS = dict(zip(CLIENT_IDS, range(len(CLIENT_IDS))))
+There is also a script called ``bob tf dataset-to-tfrecord`` that wraps the
+:any:`bob.learn.tensorflow.data.dataset_to_tfrecord` for easy Grid job
+submission.
 
-    >>> def file_to_label(f):
-    ...     return CLIENT_IDS[str(f.client_id)]
+Create models with custom training and evaluation logic
+=======================================================
 
-    >>> def reader(biofile):
-    ...     data = read_original_data(biofile, database.original_directory, database.original_extension)
-    ...     label = file_to_label(biofile)
-    ...     key = biofile.path
-    ...     return (data, label, key)
+Training models for biometrics recognition (and metric learning in general) is
+different from the typical classification problems since the labels during
+training and testing are different. We found that overriding the ``compile``,
+``train_step``, and ``test_step`` methods as explained in
+https://www.tensorflow.org/guide/keras/customizing_what_happens_in_fit is the
+best trade-off between the control of what happens during training and
+evaluation and writing boilerplate code.
 
 
-After saving this snippet in a python file (let's say `tfrec.py`) run the
-following command ::
+Mixed-precision training
+========================
+When doing mixed precision training: https://www.tensorflow.org/guide/mixed_precision
+it is important to scale the loss before computing the gradients.
 
-    $ bob tf db_to_tfrecords tfrec.py -o atnt.tfrecord
 
-Once this is done you can replace the `input_fn`_ defined above by the snippet
-bellow.
+Multi-GPU and multi-worker training
+===================================
 
-.. doctest::
+It is important that custom metrics and losses do not average their results by the batch
+size as the values should be averaged by the global batch size:
+https://www.tensorflow.org/tutorials/distribute/custom_training Take a look at custom
+metrics and losses in this package for examples of correct implementations.
 
-    >>>
-    >>> from bob.learn.tensorflow.dataset.tfrecords import shuffle_data_and_labels_image_augmentation
-    >>>
-    >>> tfrecords_filename = ['/path/to/atnt.tfrecord']
-    >>> data_shape = (112, 92 , 3)
-    >>> data_type = tf.uint8
-    >>> batch_size = 16
-    >>> epochs = 1
-    >>>
-    >>> def train_input_fn():
-    ...     return shuffle_data_and_labels_image_augmentation(
-    ...                tfrecords_filename,
-    ...                data_shape,
-    ...                data_type,
-    ...                batch_size,
-    ...                epochs=epochs)
-
-.. testcleanup::
-
-    import shutil
-    shutil.rmtree(model_dir, True)
-
-The Estimator
-=============
-
-In this package we have crafted 4 types of estimators.
-
-   - Logits: `Cross entropy loss
-     <https://www.tensorflow.org/api_docs/python/tf/nn/softmax_cross_entropy_with_logits>`_
-     in the hot-encoded layer
-     :py:class:`bob.learn.tensorflow.estimators.Logits`
-   - LogitsCenterLoss: `Cross entropy loss
-     <https://www.tensorflow.org/api_docs/python/tf/nn/softmax_cross_entropy_with_logits>`_
-     PLUS the `center loss <https://ydwen.github.io/papers/WenECCV16.pdf>`_ in
-     the hot-encoded layer
-     :py:class:`bob.learn.tensorflow.estimators.LogitsCenterLoss`
-   - Siamese: Siamese network estimator
-     :py:class:`bob.learn.tensorflow.estimators.Siamese`
-   - Triplet: Triplet network estimator
-     :py:class:`bob.learn.tensorflow.estimators.Triplet`
 
 .. _tensorflow: https://www.tensorflow.org/
-
diff --git a/examples/MSCeleba_centerloss_mixed_precision_multi_worker.py b/examples/MSCeleba_centerloss_mixed_precision_multi_worker.py
new file mode 100644
index 0000000000000000000000000000000000000000..bd4b7e53b2fd2e6c71361784be926b542a2633c1
--- /dev/null
+++ b/examples/MSCeleba_centerloss_mixed_precision_multi_worker.py
@@ -0,0 +1,322 @@
+#!/usr/bin/env python
+
+import os
+import pickle
+from functools import partial
+from multiprocessing import cpu_count
+
+import pkg_resources
+import tensorflow as tf
+from bob.extension import rc
+from bob.learn.tensorflow.callbacks import add_backup_callback
+from bob.learn.tensorflow.losses import CenterLoss
+from bob.learn.tensorflow.losses import CenterLossLayer
+from bob.learn.tensorflow.models.inception_resnet_v2 import InceptionResNetV2
+from bob.learn.tensorflow.utils import predict_using_tensors
+from tensorflow.keras import layers
+from tensorflow.keras.mixed_precision import experimental as mixed_precision
+
+policy = mixed_precision.Policy("mixed_float16")
+mixed_precision.set_policy(policy)
+
+
+TRAIN_TF_RECORD_PATHS = (
+    f"{rc['htface']}/databases/tfrecords/msceleba/"
+    "tfrecord_182x_hand_prunned_44/*.tfrecord"
+)
+VALIDATION_TF_RECORD_PATHS = (
+    f"{rc['htface']}/databases/tfrecords/lfw/182x/RGB/*.tfrecord"
+)
+# there are 2812 samples in the validation set
+VALIDATION_SAMPLES = 2812
+
+CHECKPOINT = f"{rc['temp']}/models/inception_v2_batchnorm_rgb_msceleba_mixed_precision"
+
+AUTOTUNE = tf.data.experimental.AUTOTUNE
+TFRECORD_PARALLEL_READ = cpu_count()
+N_CLASSES = 87662
+DATA_SHAPE = (182, 182, 3)  # size of faces
+DATA_TYPE = tf.uint8
+OUTPUT_SHAPE = (160, 160)
+
+SHUFFLE_BUFFER = int(2e4)
+
+LEARNING_RATE = 0.1
+BATCH_SIZE = 90 * 2  # should be a multiple of 8
+# we want to run 35 epochs of tfrecords. There are 959083 samples in train tfrecords,
+# depending on batch size, steps per epoch, and keras epoch multiplier should change
+EPOCHS = 35
+# number of training steps to do before validating a model. This also defines an epoch
+# for keras which is not really true. We want to evaluate every 180000 (90 * 2000)
+# samples
+STEPS_PER_EPOCH = 180000 // BATCH_SIZE
+# np.ceil(959083/180000=5.33)
+KERAS_EPOCH_MULTIPLIER = 6
+
+VALIDATION_BATCH_SIZE = 38  # should be a multiple of 8
+
+
+FEATURES = {
+    "data": tf.io.FixedLenFeature([], tf.string),
+    "label": tf.io.FixedLenFeature([], tf.int64),
+    "key": tf.io.FixedLenFeature([], tf.string),
+}
+LOSS_WEIGHTS = {"cross_entropy": 1.0, "center_loss": 0.01}
+
+
+def decode_tfrecords(x):
+    features = tf.io.parse_single_example(x, FEATURES)
+    image = tf.io.decode_raw(features["data"], DATA_TYPE)
+    image = tf.reshape(image, DATA_SHAPE)
+    features["data"] = image
+    return features
+
+
+def get_preprocessor():
+    preprocessor = tf.keras.Sequential(
+        [
+            # rotate before cropping
+            # 5 random degree rotation
+            layers.experimental.preprocessing.RandomRotation(5 / 360),
+            layers.experimental.preprocessing.RandomCrop(
+                height=OUTPUT_SHAPE[0], width=OUTPUT_SHAPE[1]
+            ),
+            layers.experimental.preprocessing.RandomFlip("horizontal"),
+            # FIXED_STANDARDIZATION from https://github.com/davidsandberg/facenet
+            # [-0.99609375, 0.99609375]
+            layers.experimental.preprocessing.Rescaling(
+                scale=1 / 128, offset=-127.5 / 128
+            ),
+        ]
+    )
+    return preprocessor
+
+
+def preprocess(preprocessor, features, augment=False):
+    image = features["data"]
+    label = features["label"]
+    image = preprocessor(image, training=augment)
+    return image, label
+
+
+def prepare_dataset(tf_record_paths, batch_size, shuffle=False, augment=False):
+    ds = tf.data.Dataset.list_files(tf_record_paths, shuffle=shuffle)
+    ds = tf.data.TFRecordDataset(ds, num_parallel_reads=TFRECORD_PARALLEL_READ)
+    if shuffle:
+        # ignore order and read files as soon as they come in
+        ignore_order = tf.data.Options()
+        ignore_order.experimental_deterministic = False
+        ds = ds.with_options(ignore_order)
+    ds = ds.map(decode_tfrecords).prefetch(buffer_size=AUTOTUNE)
+    if shuffle:
+        ds = ds.shuffle(SHUFFLE_BUFFER).repeat(EPOCHS)
+    preprocessor = get_preprocessor()
+    ds = ds.batch(batch_size).map(
+        partial(preprocess, preprocessor, augment=augment),
+        num_parallel_calls=AUTOTUNE,
+    )
+
+    # Use buffered prefecting on all datasets
+    return ds.prefetch(buffer_size=AUTOTUNE)
+    # return ds.apply(tf.data.experimental.prefetch_to_device(
+    #         device, buffer_size=AUTOTUNE))
+
+
+def accuracy_from_embeddings(labels, prelogits):
+    labels = tf.reshape(labels, (-1,))
+    embeddings = tf.nn.l2_normalize(prelogits, 1)
+    predictions = predict_using_tensors(embeddings, labels)
+    return tf.math.equal(labels, predictions)
+
+
+class CustomModel(tf.keras.Model):
+    def compile(
+        self,
+        cross_entropy,
+        center_loss,
+        loss_weights,
+        train_loss,
+        train_cross_entropy,
+        train_center_loss,
+        test_acc,
+        global_batch_size,
+        **kwargs,
+    ):
+        super().compile(**kwargs)
+        self.cross_entropy = cross_entropy
+        self.center_loss = center_loss
+        self.loss_weights = loss_weights
+        self.train_loss = train_loss
+        self.train_cross_entropy = train_cross_entropy
+        self.train_center_loss = train_center_loss
+        self.test_acc = test_acc
+        self.global_batch_size = global_batch_size
+
+    def train_step(self, data):
+        images, labels = data
+        with tf.GradientTape() as tape:
+            logits, prelogits = self(images, training=True)
+            loss_cross = self.cross_entropy(labels, logits)
+            loss_center = self.center_loss(labels, prelogits)
+            loss = (
+                loss_cross * self.loss_weights[self.cross_entropy.name]
+                + loss_center * self.loss_weights[self.center_loss.name]
+            )
+            unscaled_loss = tf.nn.compute_average_loss(
+                loss, global_batch_size=self.global_batch_size
+            )
+            loss = self.optimizer.get_scaled_loss(unscaled_loss)
+
+        trainable_vars = self.trainable_variables
+        gradients = tape.gradient(loss, trainable_vars)
+        gradients = self.optimizer.get_unscaled_gradients(gradients)
+        self.optimizer.apply_gradients(zip(gradients, trainable_vars))
+
+        self.train_loss(unscaled_loss)
+        self.train_cross_entropy(loss_cross)
+        self.train_center_loss(loss_center)
+        return {
+            m.name: m.result()
+            for m in [self.train_loss, self.train_cross_entropy, self.train_center_loss]
+        }
+
+    def test_step(self, data):
+        images, labels = data
+        logits, prelogits = self(images, training=False)
+        self.test_acc(accuracy_from_embeddings(labels, prelogits))
+        return {m.name: m.result() for m in [self.test_acc]}
+
+
+def create_model():
+
+    model = InceptionResNetV2(
+        include_top=True,
+        classes=N_CLASSES,
+        bottleneck=True,
+        input_shape=OUTPUT_SHAPE + (3,),
+    )
+    float32_layer = layers.Activation("linear", dtype="float32")
+
+    prelogits = model.get_layer("Bottleneck/BatchNorm").output
+    prelogits = CenterLossLayer(
+        n_classes=N_CLASSES, n_features=prelogits.shape[-1], name="centers"
+    )(prelogits)
+    prelogits = float32_layer(prelogits)
+    logits = float32_layer(model.get_layer("logits").output)
+    model = CustomModel(
+        inputs=model.input, outputs=[logits, prelogits], name=model.name
+    )
+    return model
+
+
+def build_and_compile_model(global_batch_size):
+    model = create_model()
+
+    cross_entropy = tf.keras.losses.SparseCategoricalCrossentropy(
+        from_logits=True, name="cross_entropy", reduction=tf.keras.losses.Reduction.NONE
+    )
+    center_loss = CenterLoss(
+        centers_layer=model.get_layer("centers"),
+        alpha=0.9,
+        name="center_loss",
+        reduction=tf.keras.losses.Reduction.NONE,
+    )
+
+    optimizer = tf.keras.optimizers.RMSprop(
+        learning_rate=LEARNING_RATE, rho=0.9, momentum=0.9, epsilon=1.0
+    )
+    optimizer = mixed_precision.LossScaleOptimizer(optimizer, loss_scale="dynamic")
+
+    train_loss = tf.keras.metrics.Mean(name="loss")
+    train_cross_entropy = tf.keras.metrics.Mean(name="cross_entropy")
+    train_center_loss = tf.keras.metrics.Mean(name="center_loss")
+
+    test_acc = tf.keras.metrics.Mean(name="accuracy")
+
+    model.compile(
+        optimizer=optimizer,
+        cross_entropy=cross_entropy,
+        center_loss=center_loss,
+        loss_weights=LOSS_WEIGHTS,
+        train_loss=train_loss,
+        train_cross_entropy=train_cross_entropy,
+        train_center_loss=train_center_loss,
+        test_acc=test_acc,
+        global_batch_size=global_batch_size,
+    )
+    return model
+
+
+def train_and_evaluate(tf_config):
+    os.environ["TF_CONFIG"] = json.dumps(tf_config)
+
+    per_worker_batch_size = BATCH_SIZE
+    num_workers = len(tf_config["cluster"]["worker"])
+
+    strategy = tf.distribute.experimental.MultiWorkerMirroredStrategy()
+
+    global_batch_size = per_worker_batch_size * num_workers
+    val_global_batch_size = VALIDATION_BATCH_SIZE * num_workers
+
+    train_ds = prepare_dataset(
+        TRAIN_TF_RECORD_PATHS, batch_size=global_batch_size, shuffle=True, augment=True
+    )
+
+    val_ds = prepare_dataset(
+        VALIDATION_TF_RECORD_PATHS,
+        batch_size=val_global_batch_size,
+        shuffle=False,
+        augment=False,
+    )
+
+    with strategy.scope():
+        model = build_and_compile_model(global_batch_size=global_batch_size)
+
+    val_metric_name = "val_accuracy"
+
+    def scheduler(epoch, lr):
+        # 20 epochs at 0.1, 10 at 0.01 and 5 0.001
+        # The epoch number here is Keras's which is different from actual epoch number
+        epoch = epoch // KERAS_EPOCH_MULTIPLIER
+        if epoch in range(20):
+            return 0.1
+        elif epoch in range(20, 30):
+            return 0.01
+        else:
+            return 0.001
+
+    callbacks = {
+        "latest": tf.keras.callbacks.ModelCheckpoint(f"{CHECKPOINT}/latest", verbose=1),
+        "best": tf.keras.callbacks.ModelCheckpoint(
+            f"{CHECKPOINT}/best",
+            monitor=val_metric_name,
+            save_best_only=True,
+            mode="max",
+            verbose=1,
+        ),
+        "tensorboard": tf.keras.callbacks.TensorBoard(
+            log_dir=f"{CHECKPOINT}/logs", update_freq=15, profile_batch="10,50"
+        ),
+        "lr": tf.keras.callbacks.LearningRateScheduler(scheduler, verbose=1),
+        # "lr": tf.keras.callbacks.ReduceLROnPlateau(
+        #     monitor=val_metric_name, factor=0.2, patience=5, min_lr=0.001
+        # ),
+        "nan": tf.keras.callbacks.TerminateOnNaN(),
+    }
+    callbacks = add_backup_callback(
+        callbacks=callbacks, backup_dir=f"{CHECKPOINT}/backup"
+    )
+
+    model.fit(
+        train_ds,
+        validation_data=val_ds,
+        epochs=EPOCHS * KERAS_EPOCH_MULTIPLIER,
+        steps_per_epoch=STEPS_PER_EPOCH,
+        validation_steps=VALIDATION_SAMPLES // VALIDATION_BATCH_SIZE,
+        callbacks=callbacks,
+        verbose=2 if os.environ.get("SGE_TASK_ID") else 1,
+    )
+
+
+if __name__ == "__main__":
+    train_and_evaluate({})
diff --git a/examples/convert_v1_checkpoints_to_v2.ipynb b/examples/convert_v1_checkpoints_to_v2.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..d76f5c491e059e2bf00de9650bb702ed79e6919f
--- /dev/null
+++ b/examples/convert_v1_checkpoints_to_v2.ipynb
@@ -0,0 +1,4063 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import tarfile\n",
+    "\n",
+    "import tensorflow as tf\n",
+    "from bob.learn.tensorflow.models.inception_resnet_v2 import InceptionResNetV2\n",
+    "from bob.learn.tensorflow.utils import restore_model_variables_from_checkpoint"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'2.3.1'"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# make sure you are using tensorflow 2\n",
+    "tf.__version__"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "checkpoint = \"/tmp/msceleb/inception-v2_batchnorm_rgb\"\n",
+    "path = f\"{checkpoint}.tar.gz\"\n",
+    "os.makedirs(checkpoint, exist_ok=True)\n",
+    "tf.keras.utils.get_file(\n",
+    "    path,\n",
+    "    \"https://www.idiap.ch/software/bob/data/bob/bob.bio.face_ongoing/master/msceleb/inception-v2_batchnorm_rgb.tar.gz\",\n",
+    ")\n",
+    "with tarfile.open(path) as f:\n",
+    "    f.extractall(os.path.dirname(checkpoint))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%bash\n",
+    "# strip the folder names from saved checkpoint\n",
+    "sed -i \"s|/.*/||\" $checkpoint/checkpoint"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {
+    "collapsed": true,
+    "jupyter": {
+     "outputs_hidden": true
+    }
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[('InceptionResnetV2/Block8/Branch_0/Conv2d_1x1/BatchNorm/beta', [192]),\n",
+       " ('InceptionResnetV2/Block8/Branch_0/Conv2d_1x1/BatchNorm/moving_mean', [192]),\n",
+       " ('InceptionResnetV2/Block8/Branch_0/Conv2d_1x1/BatchNorm/moving_variance',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Block8/Branch_0/Conv2d_1x1/weights', [1, 1, 2080, 192]),\n",
+       " ('InceptionResnetV2/Block8/Branch_1/Conv2d_0a_1x1/BatchNorm/beta', [192]),\n",
+       " ('InceptionResnetV2/Block8/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Block8/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Block8/Branch_1/Conv2d_0a_1x1/weights',\n",
+       "  [1, 1, 2080, 192]),\n",
+       " ('InceptionResnetV2/Block8/Branch_1/Conv2d_0b_1x3/BatchNorm/beta', [224]),\n",
+       " ('InceptionResnetV2/Block8/Branch_1/Conv2d_0b_1x3/BatchNorm/moving_mean',\n",
+       "  [224]),\n",
+       " ('InceptionResnetV2/Block8/Branch_1/Conv2d_0b_1x3/BatchNorm/moving_variance',\n",
+       "  [224]),\n",
+       " ('InceptionResnetV2/Block8/Branch_1/Conv2d_0b_1x3/weights', [1, 3, 192, 224]),\n",
+       " ('InceptionResnetV2/Block8/Branch_1/Conv2d_0c_3x1/BatchNorm/beta', [256]),\n",
+       " ('InceptionResnetV2/Block8/Branch_1/Conv2d_0c_3x1/BatchNorm/moving_mean',\n",
+       "  [256]),\n",
+       " ('InceptionResnetV2/Block8/Branch_1/Conv2d_0c_3x1/BatchNorm/moving_variance',\n",
+       "  [256]),\n",
+       " ('InceptionResnetV2/Block8/Branch_1/Conv2d_0c_3x1/weights', [3, 1, 224, 256]),\n",
+       " ('InceptionResnetV2/Block8/Conv2d_1x1/biases', [2080]),\n",
+       " ('InceptionResnetV2/Block8/Conv2d_1x1/weights', [1, 1, 448, 2080]),\n",
+       " ('InceptionResnetV2/Bottleneck/BatchNorm/beta', [128]),\n",
+       " ('InceptionResnetV2/Bottleneck/BatchNorm/moving_mean', [128]),\n",
+       " ('InceptionResnetV2/Bottleneck/BatchNorm/moving_variance', [128]),\n",
+       " ('InceptionResnetV2/Bottleneck/weights', [1536, 128]),\n",
+       " ('InceptionResnetV2/Conv2d_1a_3x3/BatchNorm/beta', [32]),\n",
+       " ('InceptionResnetV2/Conv2d_1a_3x3/BatchNorm/moving_mean', [32]),\n",
+       " ('InceptionResnetV2/Conv2d_1a_3x3/BatchNorm/moving_variance', [32]),\n",
+       " ('InceptionResnetV2/Conv2d_1a_3x3/weights', [3, 3, 3, 32]),\n",
+       " ('InceptionResnetV2/Conv2d_2a_3x3/BatchNorm/beta', [32]),\n",
+       " ('InceptionResnetV2/Conv2d_2a_3x3/BatchNorm/moving_mean', [32]),\n",
+       " ('InceptionResnetV2/Conv2d_2a_3x3/BatchNorm/moving_variance', [32]),\n",
+       " ('InceptionResnetV2/Conv2d_2a_3x3/weights', [3, 3, 32, 32]),\n",
+       " ('InceptionResnetV2/Conv2d_2b_3x3/BatchNorm/beta', [64]),\n",
+       " ('InceptionResnetV2/Conv2d_2b_3x3/BatchNorm/moving_mean', [64]),\n",
+       " ('InceptionResnetV2/Conv2d_2b_3x3/BatchNorm/moving_variance', [64]),\n",
+       " ('InceptionResnetV2/Conv2d_2b_3x3/weights', [3, 3, 32, 64]),\n",
+       " ('InceptionResnetV2/Conv2d_3b_1x1/BatchNorm/beta', [80]),\n",
+       " ('InceptionResnetV2/Conv2d_3b_1x1/BatchNorm/moving_mean', [80]),\n",
+       " ('InceptionResnetV2/Conv2d_3b_1x1/BatchNorm/moving_variance', [80]),\n",
+       " ('InceptionResnetV2/Conv2d_3b_1x1/weights', [1, 1, 64, 80]),\n",
+       " ('InceptionResnetV2/Conv2d_4a_3x3/BatchNorm/beta', [192]),\n",
+       " ('InceptionResnetV2/Conv2d_4a_3x3/BatchNorm/moving_mean', [192]),\n",
+       " ('InceptionResnetV2/Conv2d_4a_3x3/BatchNorm/moving_variance', [192]),\n",
+       " ('InceptionResnetV2/Conv2d_4a_3x3/weights', [3, 3, 80, 192]),\n",
+       " ('InceptionResnetV2/Conv2d_7b_1x1/BatchNorm/beta', [1536]),\n",
+       " ('InceptionResnetV2/Conv2d_7b_1x1/BatchNorm/moving_mean', [1536]),\n",
+       " ('InceptionResnetV2/Conv2d_7b_1x1/BatchNorm/moving_variance', [1536]),\n",
+       " ('InceptionResnetV2/Conv2d_7b_1x1/weights', [1, 1, 2080, 1536]),\n",
+       " ('InceptionResnetV2/Mixed_5b/Branch_0/Conv2d_1x1/BatchNorm/beta', [96]),\n",
+       " ('InceptionResnetV2/Mixed_5b/Branch_0/Conv2d_1x1/BatchNorm/moving_mean',\n",
+       "  [96]),\n",
+       " ('InceptionResnetV2/Mixed_5b/Branch_0/Conv2d_1x1/BatchNorm/moving_variance',\n",
+       "  [96]),\n",
+       " ('InceptionResnetV2/Mixed_5b/Branch_0/Conv2d_1x1/weights', [1, 1, 192, 96]),\n",
+       " ('InceptionResnetV2/Mixed_5b/Branch_1/Conv2d_0a_1x1/BatchNorm/beta', [48]),\n",
+       " ('InceptionResnetV2/Mixed_5b/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean',\n",
+       "  [48]),\n",
+       " ('InceptionResnetV2/Mixed_5b/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance',\n",
+       "  [48]),\n",
+       " ('InceptionResnetV2/Mixed_5b/Branch_1/Conv2d_0a_1x1/weights',\n",
+       "  [1, 1, 192, 48]),\n",
+       " ('InceptionResnetV2/Mixed_5b/Branch_1/Conv2d_0b_5x5/BatchNorm/beta', [64]),\n",
+       " ('InceptionResnetV2/Mixed_5b/Branch_1/Conv2d_0b_5x5/BatchNorm/moving_mean',\n",
+       "  [64]),\n",
+       " ('InceptionResnetV2/Mixed_5b/Branch_1/Conv2d_0b_5x5/BatchNorm/moving_variance',\n",
+       "  [64]),\n",
+       " ('InceptionResnetV2/Mixed_5b/Branch_1/Conv2d_0b_5x5/weights', [5, 5, 48, 64]),\n",
+       " ('InceptionResnetV2/Mixed_5b/Branch_2/Conv2d_0a_1x1/BatchNorm/beta', [64]),\n",
+       " ('InceptionResnetV2/Mixed_5b/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_mean',\n",
+       "  [64]),\n",
+       " ('InceptionResnetV2/Mixed_5b/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_variance',\n",
+       "  [64]),\n",
+       " ('InceptionResnetV2/Mixed_5b/Branch_2/Conv2d_0a_1x1/weights',\n",
+       "  [1, 1, 192, 64]),\n",
+       " ('InceptionResnetV2/Mixed_5b/Branch_2/Conv2d_0b_3x3/BatchNorm/beta', [96]),\n",
+       " ('InceptionResnetV2/Mixed_5b/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_mean',\n",
+       "  [96]),\n",
+       " ('InceptionResnetV2/Mixed_5b/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_variance',\n",
+       "  [96]),\n",
+       " ('InceptionResnetV2/Mixed_5b/Branch_2/Conv2d_0b_3x3/weights', [3, 3, 64, 96]),\n",
+       " ('InceptionResnetV2/Mixed_5b/Branch_2/Conv2d_0c_3x3/BatchNorm/beta', [96]),\n",
+       " ('InceptionResnetV2/Mixed_5b/Branch_2/Conv2d_0c_3x3/BatchNorm/moving_mean',\n",
+       "  [96]),\n",
+       " ('InceptionResnetV2/Mixed_5b/Branch_2/Conv2d_0c_3x3/BatchNorm/moving_variance',\n",
+       "  [96]),\n",
+       " ('InceptionResnetV2/Mixed_5b/Branch_2/Conv2d_0c_3x3/weights', [3, 3, 96, 96]),\n",
+       " ('InceptionResnetV2/Mixed_5b/Branch_3/Conv2d_0b_1x1/BatchNorm/beta', [64]),\n",
+       " ('InceptionResnetV2/Mixed_5b/Branch_3/Conv2d_0b_1x1/BatchNorm/moving_mean',\n",
+       "  [64]),\n",
+       " ('InceptionResnetV2/Mixed_5b/Branch_3/Conv2d_0b_1x1/BatchNorm/moving_variance',\n",
+       "  [64]),\n",
+       " ('InceptionResnetV2/Mixed_5b/Branch_3/Conv2d_0b_1x1/weights',\n",
+       "  [1, 1, 192, 64]),\n",
+       " ('InceptionResnetV2/Mixed_6a/Branch_0/Conv2d_1a_3x3/BatchNorm/beta', [384]),\n",
+       " ('InceptionResnetV2/Mixed_6a/Branch_0/Conv2d_1a_3x3/BatchNorm/moving_mean',\n",
+       "  [384]),\n",
+       " ('InceptionResnetV2/Mixed_6a/Branch_0/Conv2d_1a_3x3/BatchNorm/moving_variance',\n",
+       "  [384]),\n",
+       " ('InceptionResnetV2/Mixed_6a/Branch_0/Conv2d_1a_3x3/weights',\n",
+       "  [3, 3, 320, 384]),\n",
+       " ('InceptionResnetV2/Mixed_6a/Branch_1/Conv2d_0a_1x1/BatchNorm/beta', [256]),\n",
+       " ('InceptionResnetV2/Mixed_6a/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean',\n",
+       "  [256]),\n",
+       " ('InceptionResnetV2/Mixed_6a/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance',\n",
+       "  [256]),\n",
+       " ('InceptionResnetV2/Mixed_6a/Branch_1/Conv2d_0a_1x1/weights',\n",
+       "  [1, 1, 320, 256]),\n",
+       " ('InceptionResnetV2/Mixed_6a/Branch_1/Conv2d_0b_3x3/BatchNorm/beta', [256]),\n",
+       " ('InceptionResnetV2/Mixed_6a/Branch_1/Conv2d_0b_3x3/BatchNorm/moving_mean',\n",
+       "  [256]),\n",
+       " ('InceptionResnetV2/Mixed_6a/Branch_1/Conv2d_0b_3x3/BatchNorm/moving_variance',\n",
+       "  [256]),\n",
+       " ('InceptionResnetV2/Mixed_6a/Branch_1/Conv2d_0b_3x3/weights',\n",
+       "  [3, 3, 256, 256]),\n",
+       " ('InceptionResnetV2/Mixed_6a/Branch_1/Conv2d_1a_3x3/BatchNorm/beta', [384]),\n",
+       " ('InceptionResnetV2/Mixed_6a/Branch_1/Conv2d_1a_3x3/BatchNorm/moving_mean',\n",
+       "  [384]),\n",
+       " ('InceptionResnetV2/Mixed_6a/Branch_1/Conv2d_1a_3x3/BatchNorm/moving_variance',\n",
+       "  [384]),\n",
+       " ('InceptionResnetV2/Mixed_6a/Branch_1/Conv2d_1a_3x3/weights',\n",
+       "  [3, 3, 256, 384]),\n",
+       " ('InceptionResnetV2/Mixed_7a/Branch_0/Conv2d_0a_1x1/BatchNorm/beta', [256]),\n",
+       " ('InceptionResnetV2/Mixed_7a/Branch_0/Conv2d_0a_1x1/BatchNorm/moving_mean',\n",
+       "  [256]),\n",
+       " ('InceptionResnetV2/Mixed_7a/Branch_0/Conv2d_0a_1x1/BatchNorm/moving_variance',\n",
+       "  [256]),\n",
+       " ('InceptionResnetV2/Mixed_7a/Branch_0/Conv2d_0a_1x1/weights',\n",
+       "  [1, 1, 1088, 256]),\n",
+       " ('InceptionResnetV2/Mixed_7a/Branch_0/Conv2d_1a_3x3/BatchNorm/beta', [384]),\n",
+       " ('InceptionResnetV2/Mixed_7a/Branch_0/Conv2d_1a_3x3/BatchNorm/moving_mean',\n",
+       "  [384]),\n",
+       " ('InceptionResnetV2/Mixed_7a/Branch_0/Conv2d_1a_3x3/BatchNorm/moving_variance',\n",
+       "  [384]),\n",
+       " ('InceptionResnetV2/Mixed_7a/Branch_0/Conv2d_1a_3x3/weights',\n",
+       "  [3, 3, 256, 384]),\n",
+       " ('InceptionResnetV2/Mixed_7a/Branch_1/Conv2d_0a_1x1/BatchNorm/beta', [256]),\n",
+       " ('InceptionResnetV2/Mixed_7a/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean',\n",
+       "  [256]),\n",
+       " ('InceptionResnetV2/Mixed_7a/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance',\n",
+       "  [256]),\n",
+       " ('InceptionResnetV2/Mixed_7a/Branch_1/Conv2d_0a_1x1/weights',\n",
+       "  [1, 1, 1088, 256]),\n",
+       " ('InceptionResnetV2/Mixed_7a/Branch_1/Conv2d_1a_3x3/BatchNorm/beta', [288]),\n",
+       " ('InceptionResnetV2/Mixed_7a/Branch_1/Conv2d_1a_3x3/BatchNorm/moving_mean',\n",
+       "  [288]),\n",
+       " ('InceptionResnetV2/Mixed_7a/Branch_1/Conv2d_1a_3x3/BatchNorm/moving_variance',\n",
+       "  [288]),\n",
+       " ('InceptionResnetV2/Mixed_7a/Branch_1/Conv2d_1a_3x3/weights',\n",
+       "  [3, 3, 256, 288]),\n",
+       " ('InceptionResnetV2/Mixed_7a/Branch_2/Conv2d_0a_1x1/BatchNorm/beta', [256]),\n",
+       " ('InceptionResnetV2/Mixed_7a/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_mean',\n",
+       "  [256]),\n",
+       " ('InceptionResnetV2/Mixed_7a/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_variance',\n",
+       "  [256]),\n",
+       " ('InceptionResnetV2/Mixed_7a/Branch_2/Conv2d_0a_1x1/weights',\n",
+       "  [1, 1, 1088, 256]),\n",
+       " ('InceptionResnetV2/Mixed_7a/Branch_2/Conv2d_0b_3x3/BatchNorm/beta', [288]),\n",
+       " ('InceptionResnetV2/Mixed_7a/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_mean',\n",
+       "  [288]),\n",
+       " ('InceptionResnetV2/Mixed_7a/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_variance',\n",
+       "  [288]),\n",
+       " ('InceptionResnetV2/Mixed_7a/Branch_2/Conv2d_0b_3x3/weights',\n",
+       "  [3, 3, 256, 288]),\n",
+       " ('InceptionResnetV2/Mixed_7a/Branch_2/Conv2d_1a_3x3/BatchNorm/beta', [320]),\n",
+       " ('InceptionResnetV2/Mixed_7a/Branch_2/Conv2d_1a_3x3/BatchNorm/moving_mean',\n",
+       "  [320]),\n",
+       " ('InceptionResnetV2/Mixed_7a/Branch_2/Conv2d_1a_3x3/BatchNorm/moving_variance',\n",
+       "  [320]),\n",
+       " ('InceptionResnetV2/Mixed_7a/Branch_2/Conv2d_1a_3x3/weights',\n",
+       "  [3, 3, 288, 320]),\n",
+       " ('InceptionResnetV2/Repeat/block35_1/Branch_0/Conv2d_1x1/BatchNorm/beta',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_1/Branch_0/Conv2d_1x1/BatchNorm/moving_mean',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_1/Branch_0/Conv2d_1x1/BatchNorm/moving_variance',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_1/Branch_0/Conv2d_1x1/weights',\n",
+       "  [1, 1, 320, 32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_1/Branch_1/Conv2d_0a_1x1/BatchNorm/beta',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_1/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_1/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_1/Branch_1/Conv2d_0a_1x1/weights',\n",
+       "  [1, 1, 320, 32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_1/Branch_1/Conv2d_0b_3x3/BatchNorm/beta',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_1/Branch_1/Conv2d_0b_3x3/BatchNorm/moving_mean',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_1/Branch_1/Conv2d_0b_3x3/BatchNorm/moving_variance',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_1/Branch_1/Conv2d_0b_3x3/weights',\n",
+       "  [3, 3, 32, 32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_1/Branch_2/Conv2d_0a_1x1/BatchNorm/beta',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_1/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_mean',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_1/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_variance',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_1/Branch_2/Conv2d_0a_1x1/weights',\n",
+       "  [1, 1, 320, 32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_1/Branch_2/Conv2d_0b_3x3/BatchNorm/beta',\n",
+       "  [48]),\n",
+       " ('InceptionResnetV2/Repeat/block35_1/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_mean',\n",
+       "  [48]),\n",
+       " ('InceptionResnetV2/Repeat/block35_1/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_variance',\n",
+       "  [48]),\n",
+       " ('InceptionResnetV2/Repeat/block35_1/Branch_2/Conv2d_0b_3x3/weights',\n",
+       "  [3, 3, 32, 48]),\n",
+       " ('InceptionResnetV2/Repeat/block35_1/Branch_2/Conv2d_0c_3x3/BatchNorm/beta',\n",
+       "  [64]),\n",
+       " ('InceptionResnetV2/Repeat/block35_1/Branch_2/Conv2d_0c_3x3/BatchNorm/moving_mean',\n",
+       "  [64]),\n",
+       " ('InceptionResnetV2/Repeat/block35_1/Branch_2/Conv2d_0c_3x3/BatchNorm/moving_variance',\n",
+       "  [64]),\n",
+       " ('InceptionResnetV2/Repeat/block35_1/Branch_2/Conv2d_0c_3x3/weights',\n",
+       "  [3, 3, 48, 64]),\n",
+       " ('InceptionResnetV2/Repeat/block35_1/Conv2d_1x1/biases', [320]),\n",
+       " ('InceptionResnetV2/Repeat/block35_1/Conv2d_1x1/weights', [1, 1, 128, 320]),\n",
+       " ('InceptionResnetV2/Repeat/block35_10/Branch_0/Conv2d_1x1/BatchNorm/beta',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_10/Branch_0/Conv2d_1x1/BatchNorm/moving_mean',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_10/Branch_0/Conv2d_1x1/BatchNorm/moving_variance',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_10/Branch_0/Conv2d_1x1/weights',\n",
+       "  [1, 1, 320, 32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_10/Branch_1/Conv2d_0a_1x1/BatchNorm/beta',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_10/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_10/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_10/Branch_1/Conv2d_0a_1x1/weights',\n",
+       "  [1, 1, 320, 32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_10/Branch_1/Conv2d_0b_3x3/BatchNorm/beta',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_10/Branch_1/Conv2d_0b_3x3/BatchNorm/moving_mean',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_10/Branch_1/Conv2d_0b_3x3/BatchNorm/moving_variance',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_10/Branch_1/Conv2d_0b_3x3/weights',\n",
+       "  [3, 3, 32, 32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_10/Branch_2/Conv2d_0a_1x1/BatchNorm/beta',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_10/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_mean',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_10/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_variance',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_10/Branch_2/Conv2d_0a_1x1/weights',\n",
+       "  [1, 1, 320, 32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_10/Branch_2/Conv2d_0b_3x3/BatchNorm/beta',\n",
+       "  [48]),\n",
+       " ('InceptionResnetV2/Repeat/block35_10/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_mean',\n",
+       "  [48]),\n",
+       " ('InceptionResnetV2/Repeat/block35_10/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_variance',\n",
+       "  [48]),\n",
+       " ('InceptionResnetV2/Repeat/block35_10/Branch_2/Conv2d_0b_3x3/weights',\n",
+       "  [3, 3, 32, 48]),\n",
+       " ('InceptionResnetV2/Repeat/block35_10/Branch_2/Conv2d_0c_3x3/BatchNorm/beta',\n",
+       "  [64]),\n",
+       " ('InceptionResnetV2/Repeat/block35_10/Branch_2/Conv2d_0c_3x3/BatchNorm/moving_mean',\n",
+       "  [64]),\n",
+       " ('InceptionResnetV2/Repeat/block35_10/Branch_2/Conv2d_0c_3x3/BatchNorm/moving_variance',\n",
+       "  [64]),\n",
+       " ('InceptionResnetV2/Repeat/block35_10/Branch_2/Conv2d_0c_3x3/weights',\n",
+       "  [3, 3, 48, 64]),\n",
+       " ('InceptionResnetV2/Repeat/block35_10/Conv2d_1x1/biases', [320]),\n",
+       " ('InceptionResnetV2/Repeat/block35_10/Conv2d_1x1/weights', [1, 1, 128, 320]),\n",
+       " ('InceptionResnetV2/Repeat/block35_2/Branch_0/Conv2d_1x1/BatchNorm/beta',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_2/Branch_0/Conv2d_1x1/BatchNorm/moving_mean',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_2/Branch_0/Conv2d_1x1/BatchNorm/moving_variance',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_2/Branch_0/Conv2d_1x1/weights',\n",
+       "  [1, 1, 320, 32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_2/Branch_1/Conv2d_0a_1x1/BatchNorm/beta',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_2/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_2/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_2/Branch_1/Conv2d_0a_1x1/weights',\n",
+       "  [1, 1, 320, 32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_2/Branch_1/Conv2d_0b_3x3/BatchNorm/beta',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_2/Branch_1/Conv2d_0b_3x3/BatchNorm/moving_mean',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_2/Branch_1/Conv2d_0b_3x3/BatchNorm/moving_variance',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_2/Branch_1/Conv2d_0b_3x3/weights',\n",
+       "  [3, 3, 32, 32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_2/Branch_2/Conv2d_0a_1x1/BatchNorm/beta',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_2/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_mean',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_2/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_variance',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_2/Branch_2/Conv2d_0a_1x1/weights',\n",
+       "  [1, 1, 320, 32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_2/Branch_2/Conv2d_0b_3x3/BatchNorm/beta',\n",
+       "  [48]),\n",
+       " ('InceptionResnetV2/Repeat/block35_2/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_mean',\n",
+       "  [48]),\n",
+       " ('InceptionResnetV2/Repeat/block35_2/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_variance',\n",
+       "  [48]),\n",
+       " ('InceptionResnetV2/Repeat/block35_2/Branch_2/Conv2d_0b_3x3/weights',\n",
+       "  [3, 3, 32, 48]),\n",
+       " ('InceptionResnetV2/Repeat/block35_2/Branch_2/Conv2d_0c_3x3/BatchNorm/beta',\n",
+       "  [64]),\n",
+       " ('InceptionResnetV2/Repeat/block35_2/Branch_2/Conv2d_0c_3x3/BatchNorm/moving_mean',\n",
+       "  [64]),\n",
+       " ('InceptionResnetV2/Repeat/block35_2/Branch_2/Conv2d_0c_3x3/BatchNorm/moving_variance',\n",
+       "  [64]),\n",
+       " ('InceptionResnetV2/Repeat/block35_2/Branch_2/Conv2d_0c_3x3/weights',\n",
+       "  [3, 3, 48, 64]),\n",
+       " ('InceptionResnetV2/Repeat/block35_2/Conv2d_1x1/biases', [320]),\n",
+       " ('InceptionResnetV2/Repeat/block35_2/Conv2d_1x1/weights', [1, 1, 128, 320]),\n",
+       " ('InceptionResnetV2/Repeat/block35_3/Branch_0/Conv2d_1x1/BatchNorm/beta',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_3/Branch_0/Conv2d_1x1/BatchNorm/moving_mean',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_3/Branch_0/Conv2d_1x1/BatchNorm/moving_variance',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_3/Branch_0/Conv2d_1x1/weights',\n",
+       "  [1, 1, 320, 32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_3/Branch_1/Conv2d_0a_1x1/BatchNorm/beta',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_3/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_3/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_3/Branch_1/Conv2d_0a_1x1/weights',\n",
+       "  [1, 1, 320, 32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_3/Branch_1/Conv2d_0b_3x3/BatchNorm/beta',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_3/Branch_1/Conv2d_0b_3x3/BatchNorm/moving_mean',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_3/Branch_1/Conv2d_0b_3x3/BatchNorm/moving_variance',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_3/Branch_1/Conv2d_0b_3x3/weights',\n",
+       "  [3, 3, 32, 32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_3/Branch_2/Conv2d_0a_1x1/BatchNorm/beta',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_3/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_mean',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_3/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_variance',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_3/Branch_2/Conv2d_0a_1x1/weights',\n",
+       "  [1, 1, 320, 32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_3/Branch_2/Conv2d_0b_3x3/BatchNorm/beta',\n",
+       "  [48]),\n",
+       " ('InceptionResnetV2/Repeat/block35_3/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_mean',\n",
+       "  [48]),\n",
+       " ('InceptionResnetV2/Repeat/block35_3/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_variance',\n",
+       "  [48]),\n",
+       " ('InceptionResnetV2/Repeat/block35_3/Branch_2/Conv2d_0b_3x3/weights',\n",
+       "  [3, 3, 32, 48]),\n",
+       " ('InceptionResnetV2/Repeat/block35_3/Branch_2/Conv2d_0c_3x3/BatchNorm/beta',\n",
+       "  [64]),\n",
+       " ('InceptionResnetV2/Repeat/block35_3/Branch_2/Conv2d_0c_3x3/BatchNorm/moving_mean',\n",
+       "  [64]),\n",
+       " ('InceptionResnetV2/Repeat/block35_3/Branch_2/Conv2d_0c_3x3/BatchNorm/moving_variance',\n",
+       "  [64]),\n",
+       " ('InceptionResnetV2/Repeat/block35_3/Branch_2/Conv2d_0c_3x3/weights',\n",
+       "  [3, 3, 48, 64]),\n",
+       " ('InceptionResnetV2/Repeat/block35_3/Conv2d_1x1/biases', [320]),\n",
+       " ('InceptionResnetV2/Repeat/block35_3/Conv2d_1x1/weights', [1, 1, 128, 320]),\n",
+       " ('InceptionResnetV2/Repeat/block35_4/Branch_0/Conv2d_1x1/BatchNorm/beta',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_4/Branch_0/Conv2d_1x1/BatchNorm/moving_mean',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_4/Branch_0/Conv2d_1x1/BatchNorm/moving_variance',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_4/Branch_0/Conv2d_1x1/weights',\n",
+       "  [1, 1, 320, 32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_4/Branch_1/Conv2d_0a_1x1/BatchNorm/beta',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_4/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_4/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_4/Branch_1/Conv2d_0a_1x1/weights',\n",
+       "  [1, 1, 320, 32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_4/Branch_1/Conv2d_0b_3x3/BatchNorm/beta',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_4/Branch_1/Conv2d_0b_3x3/BatchNorm/moving_mean',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_4/Branch_1/Conv2d_0b_3x3/BatchNorm/moving_variance',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_4/Branch_1/Conv2d_0b_3x3/weights',\n",
+       "  [3, 3, 32, 32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_4/Branch_2/Conv2d_0a_1x1/BatchNorm/beta',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_4/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_mean',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_4/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_variance',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_4/Branch_2/Conv2d_0a_1x1/weights',\n",
+       "  [1, 1, 320, 32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_4/Branch_2/Conv2d_0b_3x3/BatchNorm/beta',\n",
+       "  [48]),\n",
+       " ('InceptionResnetV2/Repeat/block35_4/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_mean',\n",
+       "  [48]),\n",
+       " ('InceptionResnetV2/Repeat/block35_4/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_variance',\n",
+       "  [48]),\n",
+       " ('InceptionResnetV2/Repeat/block35_4/Branch_2/Conv2d_0b_3x3/weights',\n",
+       "  [3, 3, 32, 48]),\n",
+       " ('InceptionResnetV2/Repeat/block35_4/Branch_2/Conv2d_0c_3x3/BatchNorm/beta',\n",
+       "  [64]),\n",
+       " ('InceptionResnetV2/Repeat/block35_4/Branch_2/Conv2d_0c_3x3/BatchNorm/moving_mean',\n",
+       "  [64]),\n",
+       " ('InceptionResnetV2/Repeat/block35_4/Branch_2/Conv2d_0c_3x3/BatchNorm/moving_variance',\n",
+       "  [64]),\n",
+       " ('InceptionResnetV2/Repeat/block35_4/Branch_2/Conv2d_0c_3x3/weights',\n",
+       "  [3, 3, 48, 64]),\n",
+       " ('InceptionResnetV2/Repeat/block35_4/Conv2d_1x1/biases', [320]),\n",
+       " ('InceptionResnetV2/Repeat/block35_4/Conv2d_1x1/weights', [1, 1, 128, 320]),\n",
+       " ('InceptionResnetV2/Repeat/block35_5/Branch_0/Conv2d_1x1/BatchNorm/beta',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_5/Branch_0/Conv2d_1x1/BatchNorm/moving_mean',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_5/Branch_0/Conv2d_1x1/BatchNorm/moving_variance',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_5/Branch_0/Conv2d_1x1/weights',\n",
+       "  [1, 1, 320, 32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_5/Branch_1/Conv2d_0a_1x1/BatchNorm/beta',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_5/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_5/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_5/Branch_1/Conv2d_0a_1x1/weights',\n",
+       "  [1, 1, 320, 32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_5/Branch_1/Conv2d_0b_3x3/BatchNorm/beta',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_5/Branch_1/Conv2d_0b_3x3/BatchNorm/moving_mean',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_5/Branch_1/Conv2d_0b_3x3/BatchNorm/moving_variance',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_5/Branch_1/Conv2d_0b_3x3/weights',\n",
+       "  [3, 3, 32, 32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_5/Branch_2/Conv2d_0a_1x1/BatchNorm/beta',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_5/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_mean',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_5/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_variance',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_5/Branch_2/Conv2d_0a_1x1/weights',\n",
+       "  [1, 1, 320, 32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_5/Branch_2/Conv2d_0b_3x3/BatchNorm/beta',\n",
+       "  [48]),\n",
+       " ('InceptionResnetV2/Repeat/block35_5/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_mean',\n",
+       "  [48]),\n",
+       " ('InceptionResnetV2/Repeat/block35_5/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_variance',\n",
+       "  [48]),\n",
+       " ('InceptionResnetV2/Repeat/block35_5/Branch_2/Conv2d_0b_3x3/weights',\n",
+       "  [3, 3, 32, 48]),\n",
+       " ('InceptionResnetV2/Repeat/block35_5/Branch_2/Conv2d_0c_3x3/BatchNorm/beta',\n",
+       "  [64]),\n",
+       " ('InceptionResnetV2/Repeat/block35_5/Branch_2/Conv2d_0c_3x3/BatchNorm/moving_mean',\n",
+       "  [64]),\n",
+       " ('InceptionResnetV2/Repeat/block35_5/Branch_2/Conv2d_0c_3x3/BatchNorm/moving_variance',\n",
+       "  [64]),\n",
+       " ('InceptionResnetV2/Repeat/block35_5/Branch_2/Conv2d_0c_3x3/weights',\n",
+       "  [3, 3, 48, 64]),\n",
+       " ('InceptionResnetV2/Repeat/block35_5/Conv2d_1x1/biases', [320]),\n",
+       " ('InceptionResnetV2/Repeat/block35_5/Conv2d_1x1/weights', [1, 1, 128, 320]),\n",
+       " ('InceptionResnetV2/Repeat/block35_6/Branch_0/Conv2d_1x1/BatchNorm/beta',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_6/Branch_0/Conv2d_1x1/BatchNorm/moving_mean',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_6/Branch_0/Conv2d_1x1/BatchNorm/moving_variance',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_6/Branch_0/Conv2d_1x1/weights',\n",
+       "  [1, 1, 320, 32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_6/Branch_1/Conv2d_0a_1x1/BatchNorm/beta',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_6/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_6/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_6/Branch_1/Conv2d_0a_1x1/weights',\n",
+       "  [1, 1, 320, 32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_6/Branch_1/Conv2d_0b_3x3/BatchNorm/beta',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_6/Branch_1/Conv2d_0b_3x3/BatchNorm/moving_mean',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_6/Branch_1/Conv2d_0b_3x3/BatchNorm/moving_variance',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_6/Branch_1/Conv2d_0b_3x3/weights',\n",
+       "  [3, 3, 32, 32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_6/Branch_2/Conv2d_0a_1x1/BatchNorm/beta',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_6/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_mean',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_6/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_variance',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_6/Branch_2/Conv2d_0a_1x1/weights',\n",
+       "  [1, 1, 320, 32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_6/Branch_2/Conv2d_0b_3x3/BatchNorm/beta',\n",
+       "  [48]),\n",
+       " ('InceptionResnetV2/Repeat/block35_6/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_mean',\n",
+       "  [48]),\n",
+       " ('InceptionResnetV2/Repeat/block35_6/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_variance',\n",
+       "  [48]),\n",
+       " ('InceptionResnetV2/Repeat/block35_6/Branch_2/Conv2d_0b_3x3/weights',\n",
+       "  [3, 3, 32, 48]),\n",
+       " ('InceptionResnetV2/Repeat/block35_6/Branch_2/Conv2d_0c_3x3/BatchNorm/beta',\n",
+       "  [64]),\n",
+       " ('InceptionResnetV2/Repeat/block35_6/Branch_2/Conv2d_0c_3x3/BatchNorm/moving_mean',\n",
+       "  [64]),\n",
+       " ('InceptionResnetV2/Repeat/block35_6/Branch_2/Conv2d_0c_3x3/BatchNorm/moving_variance',\n",
+       "  [64]),\n",
+       " ('InceptionResnetV2/Repeat/block35_6/Branch_2/Conv2d_0c_3x3/weights',\n",
+       "  [3, 3, 48, 64]),\n",
+       " ('InceptionResnetV2/Repeat/block35_6/Conv2d_1x1/biases', [320]),\n",
+       " ('InceptionResnetV2/Repeat/block35_6/Conv2d_1x1/weights', [1, 1, 128, 320]),\n",
+       " ('InceptionResnetV2/Repeat/block35_7/Branch_0/Conv2d_1x1/BatchNorm/beta',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_7/Branch_0/Conv2d_1x1/BatchNorm/moving_mean',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_7/Branch_0/Conv2d_1x1/BatchNorm/moving_variance',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_7/Branch_0/Conv2d_1x1/weights',\n",
+       "  [1, 1, 320, 32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_7/Branch_1/Conv2d_0a_1x1/BatchNorm/beta',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_7/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_7/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_7/Branch_1/Conv2d_0a_1x1/weights',\n",
+       "  [1, 1, 320, 32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_7/Branch_1/Conv2d_0b_3x3/BatchNorm/beta',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_7/Branch_1/Conv2d_0b_3x3/BatchNorm/moving_mean',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_7/Branch_1/Conv2d_0b_3x3/BatchNorm/moving_variance',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_7/Branch_1/Conv2d_0b_3x3/weights',\n",
+       "  [3, 3, 32, 32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_7/Branch_2/Conv2d_0a_1x1/BatchNorm/beta',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_7/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_mean',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_7/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_variance',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_7/Branch_2/Conv2d_0a_1x1/weights',\n",
+       "  [1, 1, 320, 32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_7/Branch_2/Conv2d_0b_3x3/BatchNorm/beta',\n",
+       "  [48]),\n",
+       " ('InceptionResnetV2/Repeat/block35_7/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_mean',\n",
+       "  [48]),\n",
+       " ('InceptionResnetV2/Repeat/block35_7/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_variance',\n",
+       "  [48]),\n",
+       " ('InceptionResnetV2/Repeat/block35_7/Branch_2/Conv2d_0b_3x3/weights',\n",
+       "  [3, 3, 32, 48]),\n",
+       " ('InceptionResnetV2/Repeat/block35_7/Branch_2/Conv2d_0c_3x3/BatchNorm/beta',\n",
+       "  [64]),\n",
+       " ('InceptionResnetV2/Repeat/block35_7/Branch_2/Conv2d_0c_3x3/BatchNorm/moving_mean',\n",
+       "  [64]),\n",
+       " ('InceptionResnetV2/Repeat/block35_7/Branch_2/Conv2d_0c_3x3/BatchNorm/moving_variance',\n",
+       "  [64]),\n",
+       " ('InceptionResnetV2/Repeat/block35_7/Branch_2/Conv2d_0c_3x3/weights',\n",
+       "  [3, 3, 48, 64]),\n",
+       " ('InceptionResnetV2/Repeat/block35_7/Conv2d_1x1/biases', [320]),\n",
+       " ('InceptionResnetV2/Repeat/block35_7/Conv2d_1x1/weights', [1, 1, 128, 320]),\n",
+       " ('InceptionResnetV2/Repeat/block35_8/Branch_0/Conv2d_1x1/BatchNorm/beta',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_8/Branch_0/Conv2d_1x1/BatchNorm/moving_mean',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_8/Branch_0/Conv2d_1x1/BatchNorm/moving_variance',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_8/Branch_0/Conv2d_1x1/weights',\n",
+       "  [1, 1, 320, 32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_8/Branch_1/Conv2d_0a_1x1/BatchNorm/beta',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_8/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_8/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_8/Branch_1/Conv2d_0a_1x1/weights',\n",
+       "  [1, 1, 320, 32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_8/Branch_1/Conv2d_0b_3x3/BatchNorm/beta',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_8/Branch_1/Conv2d_0b_3x3/BatchNorm/moving_mean',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_8/Branch_1/Conv2d_0b_3x3/BatchNorm/moving_variance',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_8/Branch_1/Conv2d_0b_3x3/weights',\n",
+       "  [3, 3, 32, 32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_8/Branch_2/Conv2d_0a_1x1/BatchNorm/beta',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_8/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_mean',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_8/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_variance',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_8/Branch_2/Conv2d_0a_1x1/weights',\n",
+       "  [1, 1, 320, 32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_8/Branch_2/Conv2d_0b_3x3/BatchNorm/beta',\n",
+       "  [48]),\n",
+       " ('InceptionResnetV2/Repeat/block35_8/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_mean',\n",
+       "  [48]),\n",
+       " ('InceptionResnetV2/Repeat/block35_8/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_variance',\n",
+       "  [48]),\n",
+       " ('InceptionResnetV2/Repeat/block35_8/Branch_2/Conv2d_0b_3x3/weights',\n",
+       "  [3, 3, 32, 48]),\n",
+       " ('InceptionResnetV2/Repeat/block35_8/Branch_2/Conv2d_0c_3x3/BatchNorm/beta',\n",
+       "  [64]),\n",
+       " ('InceptionResnetV2/Repeat/block35_8/Branch_2/Conv2d_0c_3x3/BatchNorm/moving_mean',\n",
+       "  [64]),\n",
+       " ('InceptionResnetV2/Repeat/block35_8/Branch_2/Conv2d_0c_3x3/BatchNorm/moving_variance',\n",
+       "  [64]),\n",
+       " ('InceptionResnetV2/Repeat/block35_8/Branch_2/Conv2d_0c_3x3/weights',\n",
+       "  [3, 3, 48, 64]),\n",
+       " ('InceptionResnetV2/Repeat/block35_8/Conv2d_1x1/biases', [320]),\n",
+       " ('InceptionResnetV2/Repeat/block35_8/Conv2d_1x1/weights', [1, 1, 128, 320]),\n",
+       " ('InceptionResnetV2/Repeat/block35_9/Branch_0/Conv2d_1x1/BatchNorm/beta',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_9/Branch_0/Conv2d_1x1/BatchNorm/moving_mean',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_9/Branch_0/Conv2d_1x1/BatchNorm/moving_variance',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_9/Branch_0/Conv2d_1x1/weights',\n",
+       "  [1, 1, 320, 32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_9/Branch_1/Conv2d_0a_1x1/BatchNorm/beta',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_9/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_9/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_9/Branch_1/Conv2d_0a_1x1/weights',\n",
+       "  [1, 1, 320, 32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_9/Branch_1/Conv2d_0b_3x3/BatchNorm/beta',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_9/Branch_1/Conv2d_0b_3x3/BatchNorm/moving_mean',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_9/Branch_1/Conv2d_0b_3x3/BatchNorm/moving_variance',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_9/Branch_1/Conv2d_0b_3x3/weights',\n",
+       "  [3, 3, 32, 32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_9/Branch_2/Conv2d_0a_1x1/BatchNorm/beta',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_9/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_mean',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_9/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_variance',\n",
+       "  [32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_9/Branch_2/Conv2d_0a_1x1/weights',\n",
+       "  [1, 1, 320, 32]),\n",
+       " ('InceptionResnetV2/Repeat/block35_9/Branch_2/Conv2d_0b_3x3/BatchNorm/beta',\n",
+       "  [48]),\n",
+       " ('InceptionResnetV2/Repeat/block35_9/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_mean',\n",
+       "  [48]),\n",
+       " ('InceptionResnetV2/Repeat/block35_9/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_variance',\n",
+       "  [48]),\n",
+       " ('InceptionResnetV2/Repeat/block35_9/Branch_2/Conv2d_0b_3x3/weights',\n",
+       "  [3, 3, 32, 48]),\n",
+       " ('InceptionResnetV2/Repeat/block35_9/Branch_2/Conv2d_0c_3x3/BatchNorm/beta',\n",
+       "  [64]),\n",
+       " ('InceptionResnetV2/Repeat/block35_9/Branch_2/Conv2d_0c_3x3/BatchNorm/moving_mean',\n",
+       "  [64]),\n",
+       " ('InceptionResnetV2/Repeat/block35_9/Branch_2/Conv2d_0c_3x3/BatchNorm/moving_variance',\n",
+       "  [64]),\n",
+       " ('InceptionResnetV2/Repeat/block35_9/Branch_2/Conv2d_0c_3x3/weights',\n",
+       "  [3, 3, 48, 64]),\n",
+       " ('InceptionResnetV2/Repeat/block35_9/Conv2d_1x1/biases', [320]),\n",
+       " ('InceptionResnetV2/Repeat/block35_9/Conv2d_1x1/weights', [1, 1, 128, 320]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_1/Branch_0/Conv2d_1x1/BatchNorm/beta',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_1/Branch_0/Conv2d_1x1/BatchNorm/moving_mean',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_1/Branch_0/Conv2d_1x1/BatchNorm/moving_variance',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_1/Branch_0/Conv2d_1x1/weights',\n",
+       "  [1, 1, 1088, 192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_1/Branch_1/Conv2d_0a_1x1/BatchNorm/beta',\n",
+       "  [128]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_1/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean',\n",
+       "  [128]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_1/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance',\n",
+       "  [128]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_1/Branch_1/Conv2d_0a_1x1/weights',\n",
+       "  [1, 1, 1088, 128]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_1/Branch_1/Conv2d_0b_1x7/BatchNorm/beta',\n",
+       "  [160]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_1/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_mean',\n",
+       "  [160]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_1/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_variance',\n",
+       "  [160]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_1/Branch_1/Conv2d_0b_1x7/weights',\n",
+       "  [1, 7, 128, 160]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_1/Branch_1/Conv2d_0c_7x1/BatchNorm/beta',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_1/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_mean',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_1/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_variance',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_1/Branch_1/Conv2d_0c_7x1/weights',\n",
+       "  [7, 1, 160, 192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_1/Conv2d_1x1/biases', [1088]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_1/Conv2d_1x1/weights',\n",
+       "  [1, 1, 384, 1088]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_10/Branch_0/Conv2d_1x1/BatchNorm/beta',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_10/Branch_0/Conv2d_1x1/BatchNorm/moving_mean',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_10/Branch_0/Conv2d_1x1/BatchNorm/moving_variance',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_10/Branch_0/Conv2d_1x1/weights',\n",
+       "  [1, 1, 1088, 192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_10/Branch_1/Conv2d_0a_1x1/BatchNorm/beta',\n",
+       "  [128]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_10/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean',\n",
+       "  [128]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_10/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance',\n",
+       "  [128]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_10/Branch_1/Conv2d_0a_1x1/weights',\n",
+       "  [1, 1, 1088, 128]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_10/Branch_1/Conv2d_0b_1x7/BatchNorm/beta',\n",
+       "  [160]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_10/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_mean',\n",
+       "  [160]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_10/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_variance',\n",
+       "  [160]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_10/Branch_1/Conv2d_0b_1x7/weights',\n",
+       "  [1, 7, 128, 160]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_10/Branch_1/Conv2d_0c_7x1/BatchNorm/beta',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_10/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_mean',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_10/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_variance',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_10/Branch_1/Conv2d_0c_7x1/weights',\n",
+       "  [7, 1, 160, 192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_10/Conv2d_1x1/biases', [1088]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_10/Conv2d_1x1/weights',\n",
+       "  [1, 1, 384, 1088]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_11/Branch_0/Conv2d_1x1/BatchNorm/beta',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_11/Branch_0/Conv2d_1x1/BatchNorm/moving_mean',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_11/Branch_0/Conv2d_1x1/BatchNorm/moving_variance',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_11/Branch_0/Conv2d_1x1/weights',\n",
+       "  [1, 1, 1088, 192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_11/Branch_1/Conv2d_0a_1x1/BatchNorm/beta',\n",
+       "  [128]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_11/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean',\n",
+       "  [128]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_11/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance',\n",
+       "  [128]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_11/Branch_1/Conv2d_0a_1x1/weights',\n",
+       "  [1, 1, 1088, 128]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_11/Branch_1/Conv2d_0b_1x7/BatchNorm/beta',\n",
+       "  [160]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_11/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_mean',\n",
+       "  [160]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_11/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_variance',\n",
+       "  [160]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_11/Branch_1/Conv2d_0b_1x7/weights',\n",
+       "  [1, 7, 128, 160]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_11/Branch_1/Conv2d_0c_7x1/BatchNorm/beta',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_11/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_mean',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_11/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_variance',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_11/Branch_1/Conv2d_0c_7x1/weights',\n",
+       "  [7, 1, 160, 192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_11/Conv2d_1x1/biases', [1088]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_11/Conv2d_1x1/weights',\n",
+       "  [1, 1, 384, 1088]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_12/Branch_0/Conv2d_1x1/BatchNorm/beta',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_12/Branch_0/Conv2d_1x1/BatchNorm/moving_mean',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_12/Branch_0/Conv2d_1x1/BatchNorm/moving_variance',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_12/Branch_0/Conv2d_1x1/weights',\n",
+       "  [1, 1, 1088, 192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_12/Branch_1/Conv2d_0a_1x1/BatchNorm/beta',\n",
+       "  [128]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_12/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean',\n",
+       "  [128]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_12/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance',\n",
+       "  [128]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_12/Branch_1/Conv2d_0a_1x1/weights',\n",
+       "  [1, 1, 1088, 128]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_12/Branch_1/Conv2d_0b_1x7/BatchNorm/beta',\n",
+       "  [160]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_12/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_mean',\n",
+       "  [160]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_12/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_variance',\n",
+       "  [160]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_12/Branch_1/Conv2d_0b_1x7/weights',\n",
+       "  [1, 7, 128, 160]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_12/Branch_1/Conv2d_0c_7x1/BatchNorm/beta',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_12/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_mean',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_12/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_variance',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_12/Branch_1/Conv2d_0c_7x1/weights',\n",
+       "  [7, 1, 160, 192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_12/Conv2d_1x1/biases', [1088]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_12/Conv2d_1x1/weights',\n",
+       "  [1, 1, 384, 1088]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_13/Branch_0/Conv2d_1x1/BatchNorm/beta',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_13/Branch_0/Conv2d_1x1/BatchNorm/moving_mean',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_13/Branch_0/Conv2d_1x1/BatchNorm/moving_variance',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_13/Branch_0/Conv2d_1x1/weights',\n",
+       "  [1, 1, 1088, 192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_13/Branch_1/Conv2d_0a_1x1/BatchNorm/beta',\n",
+       "  [128]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_13/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean',\n",
+       "  [128]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_13/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance',\n",
+       "  [128]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_13/Branch_1/Conv2d_0a_1x1/weights',\n",
+       "  [1, 1, 1088, 128]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_13/Branch_1/Conv2d_0b_1x7/BatchNorm/beta',\n",
+       "  [160]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_13/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_mean',\n",
+       "  [160]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_13/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_variance',\n",
+       "  [160]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_13/Branch_1/Conv2d_0b_1x7/weights',\n",
+       "  [1, 7, 128, 160]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_13/Branch_1/Conv2d_0c_7x1/BatchNorm/beta',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_13/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_mean',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_13/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_variance',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_13/Branch_1/Conv2d_0c_7x1/weights',\n",
+       "  [7, 1, 160, 192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_13/Conv2d_1x1/biases', [1088]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_13/Conv2d_1x1/weights',\n",
+       "  [1, 1, 384, 1088]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_14/Branch_0/Conv2d_1x1/BatchNorm/beta',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_14/Branch_0/Conv2d_1x1/BatchNorm/moving_mean',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_14/Branch_0/Conv2d_1x1/BatchNorm/moving_variance',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_14/Branch_0/Conv2d_1x1/weights',\n",
+       "  [1, 1, 1088, 192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_14/Branch_1/Conv2d_0a_1x1/BatchNorm/beta',\n",
+       "  [128]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_14/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean',\n",
+       "  [128]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_14/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance',\n",
+       "  [128]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_14/Branch_1/Conv2d_0a_1x1/weights',\n",
+       "  [1, 1, 1088, 128]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_14/Branch_1/Conv2d_0b_1x7/BatchNorm/beta',\n",
+       "  [160]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_14/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_mean',\n",
+       "  [160]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_14/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_variance',\n",
+       "  [160]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_14/Branch_1/Conv2d_0b_1x7/weights',\n",
+       "  [1, 7, 128, 160]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_14/Branch_1/Conv2d_0c_7x1/BatchNorm/beta',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_14/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_mean',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_14/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_variance',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_14/Branch_1/Conv2d_0c_7x1/weights',\n",
+       "  [7, 1, 160, 192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_14/Conv2d_1x1/biases', [1088]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_14/Conv2d_1x1/weights',\n",
+       "  [1, 1, 384, 1088]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_15/Branch_0/Conv2d_1x1/BatchNorm/beta',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_15/Branch_0/Conv2d_1x1/BatchNorm/moving_mean',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_15/Branch_0/Conv2d_1x1/BatchNorm/moving_variance',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_15/Branch_0/Conv2d_1x1/weights',\n",
+       "  [1, 1, 1088, 192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_15/Branch_1/Conv2d_0a_1x1/BatchNorm/beta',\n",
+       "  [128]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_15/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean',\n",
+       "  [128]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_15/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance',\n",
+       "  [128]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_15/Branch_1/Conv2d_0a_1x1/weights',\n",
+       "  [1, 1, 1088, 128]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_15/Branch_1/Conv2d_0b_1x7/BatchNorm/beta',\n",
+       "  [160]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_15/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_mean',\n",
+       "  [160]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_15/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_variance',\n",
+       "  [160]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_15/Branch_1/Conv2d_0b_1x7/weights',\n",
+       "  [1, 7, 128, 160]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_15/Branch_1/Conv2d_0c_7x1/BatchNorm/beta',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_15/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_mean',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_15/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_variance',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_15/Branch_1/Conv2d_0c_7x1/weights',\n",
+       "  [7, 1, 160, 192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_15/Conv2d_1x1/biases', [1088]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_15/Conv2d_1x1/weights',\n",
+       "  [1, 1, 384, 1088]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_16/Branch_0/Conv2d_1x1/BatchNorm/beta',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_16/Branch_0/Conv2d_1x1/BatchNorm/moving_mean',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_16/Branch_0/Conv2d_1x1/BatchNorm/moving_variance',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_16/Branch_0/Conv2d_1x1/weights',\n",
+       "  [1, 1, 1088, 192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_16/Branch_1/Conv2d_0a_1x1/BatchNorm/beta',\n",
+       "  [128]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_16/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean',\n",
+       "  [128]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_16/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance',\n",
+       "  [128]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_16/Branch_1/Conv2d_0a_1x1/weights',\n",
+       "  [1, 1, 1088, 128]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_16/Branch_1/Conv2d_0b_1x7/BatchNorm/beta',\n",
+       "  [160]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_16/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_mean',\n",
+       "  [160]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_16/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_variance',\n",
+       "  [160]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_16/Branch_1/Conv2d_0b_1x7/weights',\n",
+       "  [1, 7, 128, 160]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_16/Branch_1/Conv2d_0c_7x1/BatchNorm/beta',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_16/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_mean',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_16/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_variance',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_16/Branch_1/Conv2d_0c_7x1/weights',\n",
+       "  [7, 1, 160, 192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_16/Conv2d_1x1/biases', [1088]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_16/Conv2d_1x1/weights',\n",
+       "  [1, 1, 384, 1088]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_17/Branch_0/Conv2d_1x1/BatchNorm/beta',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_17/Branch_0/Conv2d_1x1/BatchNorm/moving_mean',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_17/Branch_0/Conv2d_1x1/BatchNorm/moving_variance',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_17/Branch_0/Conv2d_1x1/weights',\n",
+       "  [1, 1, 1088, 192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_17/Branch_1/Conv2d_0a_1x1/BatchNorm/beta',\n",
+       "  [128]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_17/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean',\n",
+       "  [128]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_17/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance',\n",
+       "  [128]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_17/Branch_1/Conv2d_0a_1x1/weights',\n",
+       "  [1, 1, 1088, 128]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_17/Branch_1/Conv2d_0b_1x7/BatchNorm/beta',\n",
+       "  [160]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_17/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_mean',\n",
+       "  [160]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_17/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_variance',\n",
+       "  [160]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_17/Branch_1/Conv2d_0b_1x7/weights',\n",
+       "  [1, 7, 128, 160]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_17/Branch_1/Conv2d_0c_7x1/BatchNorm/beta',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_17/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_mean',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_17/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_variance',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_17/Branch_1/Conv2d_0c_7x1/weights',\n",
+       "  [7, 1, 160, 192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_17/Conv2d_1x1/biases', [1088]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_17/Conv2d_1x1/weights',\n",
+       "  [1, 1, 384, 1088]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_18/Branch_0/Conv2d_1x1/BatchNorm/beta',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_18/Branch_0/Conv2d_1x1/BatchNorm/moving_mean',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_18/Branch_0/Conv2d_1x1/BatchNorm/moving_variance',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_18/Branch_0/Conv2d_1x1/weights',\n",
+       "  [1, 1, 1088, 192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_18/Branch_1/Conv2d_0a_1x1/BatchNorm/beta',\n",
+       "  [128]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_18/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean',\n",
+       "  [128]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_18/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance',\n",
+       "  [128]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_18/Branch_1/Conv2d_0a_1x1/weights',\n",
+       "  [1, 1, 1088, 128]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_18/Branch_1/Conv2d_0b_1x7/BatchNorm/beta',\n",
+       "  [160]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_18/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_mean',\n",
+       "  [160]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_18/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_variance',\n",
+       "  [160]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_18/Branch_1/Conv2d_0b_1x7/weights',\n",
+       "  [1, 7, 128, 160]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_18/Branch_1/Conv2d_0c_7x1/BatchNorm/beta',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_18/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_mean',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_18/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_variance',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_18/Branch_1/Conv2d_0c_7x1/weights',\n",
+       "  [7, 1, 160, 192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_18/Conv2d_1x1/biases', [1088]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_18/Conv2d_1x1/weights',\n",
+       "  [1, 1, 384, 1088]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_19/Branch_0/Conv2d_1x1/BatchNorm/beta',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_19/Branch_0/Conv2d_1x1/BatchNorm/moving_mean',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_19/Branch_0/Conv2d_1x1/BatchNorm/moving_variance',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_19/Branch_0/Conv2d_1x1/weights',\n",
+       "  [1, 1, 1088, 192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_19/Branch_1/Conv2d_0a_1x1/BatchNorm/beta',\n",
+       "  [128]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_19/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean',\n",
+       "  [128]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_19/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance',\n",
+       "  [128]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_19/Branch_1/Conv2d_0a_1x1/weights',\n",
+       "  [1, 1, 1088, 128]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_19/Branch_1/Conv2d_0b_1x7/BatchNorm/beta',\n",
+       "  [160]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_19/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_mean',\n",
+       "  [160]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_19/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_variance',\n",
+       "  [160]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_19/Branch_1/Conv2d_0b_1x7/weights',\n",
+       "  [1, 7, 128, 160]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_19/Branch_1/Conv2d_0c_7x1/BatchNorm/beta',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_19/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_mean',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_19/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_variance',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_19/Branch_1/Conv2d_0c_7x1/weights',\n",
+       "  [7, 1, 160, 192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_19/Conv2d_1x1/biases', [1088]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_19/Conv2d_1x1/weights',\n",
+       "  [1, 1, 384, 1088]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_2/Branch_0/Conv2d_1x1/BatchNorm/beta',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_2/Branch_0/Conv2d_1x1/BatchNorm/moving_mean',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_2/Branch_0/Conv2d_1x1/BatchNorm/moving_variance',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_2/Branch_0/Conv2d_1x1/weights',\n",
+       "  [1, 1, 1088, 192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_2/Branch_1/Conv2d_0a_1x1/BatchNorm/beta',\n",
+       "  [128]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_2/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean',\n",
+       "  [128]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_2/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance',\n",
+       "  [128]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_2/Branch_1/Conv2d_0a_1x1/weights',\n",
+       "  [1, 1, 1088, 128]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_2/Branch_1/Conv2d_0b_1x7/BatchNorm/beta',\n",
+       "  [160]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_2/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_mean',\n",
+       "  [160]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_2/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_variance',\n",
+       "  [160]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_2/Branch_1/Conv2d_0b_1x7/weights',\n",
+       "  [1, 7, 128, 160]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_2/Branch_1/Conv2d_0c_7x1/BatchNorm/beta',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_2/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_mean',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_2/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_variance',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_2/Branch_1/Conv2d_0c_7x1/weights',\n",
+       "  [7, 1, 160, 192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_2/Conv2d_1x1/biases', [1088]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_2/Conv2d_1x1/weights',\n",
+       "  [1, 1, 384, 1088]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_20/Branch_0/Conv2d_1x1/BatchNorm/beta',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_20/Branch_0/Conv2d_1x1/BatchNorm/moving_mean',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_20/Branch_0/Conv2d_1x1/BatchNorm/moving_variance',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_20/Branch_0/Conv2d_1x1/weights',\n",
+       "  [1, 1, 1088, 192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_20/Branch_1/Conv2d_0a_1x1/BatchNorm/beta',\n",
+       "  [128]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_20/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean',\n",
+       "  [128]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_20/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance',\n",
+       "  [128]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_20/Branch_1/Conv2d_0a_1x1/weights',\n",
+       "  [1, 1, 1088, 128]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_20/Branch_1/Conv2d_0b_1x7/BatchNorm/beta',\n",
+       "  [160]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_20/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_mean',\n",
+       "  [160]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_20/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_variance',\n",
+       "  [160]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_20/Branch_1/Conv2d_0b_1x7/weights',\n",
+       "  [1, 7, 128, 160]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_20/Branch_1/Conv2d_0c_7x1/BatchNorm/beta',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_20/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_mean',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_20/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_variance',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_20/Branch_1/Conv2d_0c_7x1/weights',\n",
+       "  [7, 1, 160, 192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_20/Conv2d_1x1/biases', [1088]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_20/Conv2d_1x1/weights',\n",
+       "  [1, 1, 384, 1088]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_3/Branch_0/Conv2d_1x1/BatchNorm/beta',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_3/Branch_0/Conv2d_1x1/BatchNorm/moving_mean',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_3/Branch_0/Conv2d_1x1/BatchNorm/moving_variance',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_3/Branch_0/Conv2d_1x1/weights',\n",
+       "  [1, 1, 1088, 192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_3/Branch_1/Conv2d_0a_1x1/BatchNorm/beta',\n",
+       "  [128]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_3/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean',\n",
+       "  [128]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_3/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance',\n",
+       "  [128]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_3/Branch_1/Conv2d_0a_1x1/weights',\n",
+       "  [1, 1, 1088, 128]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_3/Branch_1/Conv2d_0b_1x7/BatchNorm/beta',\n",
+       "  [160]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_3/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_mean',\n",
+       "  [160]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_3/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_variance',\n",
+       "  [160]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_3/Branch_1/Conv2d_0b_1x7/weights',\n",
+       "  [1, 7, 128, 160]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_3/Branch_1/Conv2d_0c_7x1/BatchNorm/beta',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_3/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_mean',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_3/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_variance',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_3/Branch_1/Conv2d_0c_7x1/weights',\n",
+       "  [7, 1, 160, 192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_3/Conv2d_1x1/biases', [1088]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_3/Conv2d_1x1/weights',\n",
+       "  [1, 1, 384, 1088]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_4/Branch_0/Conv2d_1x1/BatchNorm/beta',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_4/Branch_0/Conv2d_1x1/BatchNorm/moving_mean',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_4/Branch_0/Conv2d_1x1/BatchNorm/moving_variance',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_4/Branch_0/Conv2d_1x1/weights',\n",
+       "  [1, 1, 1088, 192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_4/Branch_1/Conv2d_0a_1x1/BatchNorm/beta',\n",
+       "  [128]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_4/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean',\n",
+       "  [128]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_4/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance',\n",
+       "  [128]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_4/Branch_1/Conv2d_0a_1x1/weights',\n",
+       "  [1, 1, 1088, 128]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_4/Branch_1/Conv2d_0b_1x7/BatchNorm/beta',\n",
+       "  [160]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_4/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_mean',\n",
+       "  [160]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_4/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_variance',\n",
+       "  [160]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_4/Branch_1/Conv2d_0b_1x7/weights',\n",
+       "  [1, 7, 128, 160]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_4/Branch_1/Conv2d_0c_7x1/BatchNorm/beta',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_4/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_mean',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_4/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_variance',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_4/Branch_1/Conv2d_0c_7x1/weights',\n",
+       "  [7, 1, 160, 192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_4/Conv2d_1x1/biases', [1088]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_4/Conv2d_1x1/weights',\n",
+       "  [1, 1, 384, 1088]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_5/Branch_0/Conv2d_1x1/BatchNorm/beta',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_5/Branch_0/Conv2d_1x1/BatchNorm/moving_mean',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_5/Branch_0/Conv2d_1x1/BatchNorm/moving_variance',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_5/Branch_0/Conv2d_1x1/weights',\n",
+       "  [1, 1, 1088, 192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_5/Branch_1/Conv2d_0a_1x1/BatchNorm/beta',\n",
+       "  [128]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_5/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean',\n",
+       "  [128]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_5/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance',\n",
+       "  [128]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_5/Branch_1/Conv2d_0a_1x1/weights',\n",
+       "  [1, 1, 1088, 128]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_5/Branch_1/Conv2d_0b_1x7/BatchNorm/beta',\n",
+       "  [160]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_5/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_mean',\n",
+       "  [160]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_5/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_variance',\n",
+       "  [160]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_5/Branch_1/Conv2d_0b_1x7/weights',\n",
+       "  [1, 7, 128, 160]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_5/Branch_1/Conv2d_0c_7x1/BatchNorm/beta',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_5/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_mean',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_5/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_variance',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_5/Branch_1/Conv2d_0c_7x1/weights',\n",
+       "  [7, 1, 160, 192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_5/Conv2d_1x1/biases', [1088]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_5/Conv2d_1x1/weights',\n",
+       "  [1, 1, 384, 1088]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_6/Branch_0/Conv2d_1x1/BatchNorm/beta',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_6/Branch_0/Conv2d_1x1/BatchNorm/moving_mean',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_6/Branch_0/Conv2d_1x1/BatchNorm/moving_variance',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_6/Branch_0/Conv2d_1x1/weights',\n",
+       "  [1, 1, 1088, 192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_6/Branch_1/Conv2d_0a_1x1/BatchNorm/beta',\n",
+       "  [128]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_6/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean',\n",
+       "  [128]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_6/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance',\n",
+       "  [128]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_6/Branch_1/Conv2d_0a_1x1/weights',\n",
+       "  [1, 1, 1088, 128]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_6/Branch_1/Conv2d_0b_1x7/BatchNorm/beta',\n",
+       "  [160]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_6/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_mean',\n",
+       "  [160]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_6/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_variance',\n",
+       "  [160]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_6/Branch_1/Conv2d_0b_1x7/weights',\n",
+       "  [1, 7, 128, 160]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_6/Branch_1/Conv2d_0c_7x1/BatchNorm/beta',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_6/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_mean',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_6/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_variance',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_6/Branch_1/Conv2d_0c_7x1/weights',\n",
+       "  [7, 1, 160, 192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_6/Conv2d_1x1/biases', [1088]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_6/Conv2d_1x1/weights',\n",
+       "  [1, 1, 384, 1088]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_7/Branch_0/Conv2d_1x1/BatchNorm/beta',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_7/Branch_0/Conv2d_1x1/BatchNorm/moving_mean',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_7/Branch_0/Conv2d_1x1/BatchNorm/moving_variance',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_7/Branch_0/Conv2d_1x1/weights',\n",
+       "  [1, 1, 1088, 192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_7/Branch_1/Conv2d_0a_1x1/BatchNorm/beta',\n",
+       "  [128]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_7/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean',\n",
+       "  [128]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_7/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance',\n",
+       "  [128]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_7/Branch_1/Conv2d_0a_1x1/weights',\n",
+       "  [1, 1, 1088, 128]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_7/Branch_1/Conv2d_0b_1x7/BatchNorm/beta',\n",
+       "  [160]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_7/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_mean',\n",
+       "  [160]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_7/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_variance',\n",
+       "  [160]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_7/Branch_1/Conv2d_0b_1x7/weights',\n",
+       "  [1, 7, 128, 160]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_7/Branch_1/Conv2d_0c_7x1/BatchNorm/beta',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_7/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_mean',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_7/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_variance',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_7/Branch_1/Conv2d_0c_7x1/weights',\n",
+       "  [7, 1, 160, 192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_7/Conv2d_1x1/biases', [1088]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_7/Conv2d_1x1/weights',\n",
+       "  [1, 1, 384, 1088]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_8/Branch_0/Conv2d_1x1/BatchNorm/beta',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_8/Branch_0/Conv2d_1x1/BatchNorm/moving_mean',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_8/Branch_0/Conv2d_1x1/BatchNorm/moving_variance',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_8/Branch_0/Conv2d_1x1/weights',\n",
+       "  [1, 1, 1088, 192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_8/Branch_1/Conv2d_0a_1x1/BatchNorm/beta',\n",
+       "  [128]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_8/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean',\n",
+       "  [128]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_8/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance',\n",
+       "  [128]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_8/Branch_1/Conv2d_0a_1x1/weights',\n",
+       "  [1, 1, 1088, 128]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_8/Branch_1/Conv2d_0b_1x7/BatchNorm/beta',\n",
+       "  [160]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_8/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_mean',\n",
+       "  [160]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_8/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_variance',\n",
+       "  [160]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_8/Branch_1/Conv2d_0b_1x7/weights',\n",
+       "  [1, 7, 128, 160]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_8/Branch_1/Conv2d_0c_7x1/BatchNorm/beta',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_8/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_mean',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_8/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_variance',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_8/Branch_1/Conv2d_0c_7x1/weights',\n",
+       "  [7, 1, 160, 192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_8/Conv2d_1x1/biases', [1088]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_8/Conv2d_1x1/weights',\n",
+       "  [1, 1, 384, 1088]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_9/Branch_0/Conv2d_1x1/BatchNorm/beta',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_9/Branch_0/Conv2d_1x1/BatchNorm/moving_mean',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_9/Branch_0/Conv2d_1x1/BatchNorm/moving_variance',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_9/Branch_0/Conv2d_1x1/weights',\n",
+       "  [1, 1, 1088, 192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_9/Branch_1/Conv2d_0a_1x1/BatchNorm/beta',\n",
+       "  [128]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_9/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean',\n",
+       "  [128]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_9/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance',\n",
+       "  [128]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_9/Branch_1/Conv2d_0a_1x1/weights',\n",
+       "  [1, 1, 1088, 128]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_9/Branch_1/Conv2d_0b_1x7/BatchNorm/beta',\n",
+       "  [160]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_9/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_mean',\n",
+       "  [160]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_9/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_variance',\n",
+       "  [160]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_9/Branch_1/Conv2d_0b_1x7/weights',\n",
+       "  [1, 7, 128, 160]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_9/Branch_1/Conv2d_0c_7x1/BatchNorm/beta',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_9/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_mean',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_9/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_variance',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_9/Branch_1/Conv2d_0c_7x1/weights',\n",
+       "  [7, 1, 160, 192]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_9/Conv2d_1x1/biases', [1088]),\n",
+       " ('InceptionResnetV2/Repeat_1/block17_9/Conv2d_1x1/weights',\n",
+       "  [1, 1, 384, 1088]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_1/Branch_0/Conv2d_1x1/BatchNorm/beta',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_1/Branch_0/Conv2d_1x1/BatchNorm/moving_mean',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_1/Branch_0/Conv2d_1x1/BatchNorm/moving_variance',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_1/Branch_0/Conv2d_1x1/weights',\n",
+       "  [1, 1, 2080, 192]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_1/Branch_1/Conv2d_0a_1x1/BatchNorm/beta',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_1/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_1/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_1/Branch_1/Conv2d_0a_1x1/weights',\n",
+       "  [1, 1, 2080, 192]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_1/Branch_1/Conv2d_0b_1x3/BatchNorm/beta',\n",
+       "  [224]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_1/Branch_1/Conv2d_0b_1x3/BatchNorm/moving_mean',\n",
+       "  [224]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_1/Branch_1/Conv2d_0b_1x3/BatchNorm/moving_variance',\n",
+       "  [224]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_1/Branch_1/Conv2d_0b_1x3/weights',\n",
+       "  [1, 3, 192, 224]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_1/Branch_1/Conv2d_0c_3x1/BatchNorm/beta',\n",
+       "  [256]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_1/Branch_1/Conv2d_0c_3x1/BatchNorm/moving_mean',\n",
+       "  [256]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_1/Branch_1/Conv2d_0c_3x1/BatchNorm/moving_variance',\n",
+       "  [256]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_1/Branch_1/Conv2d_0c_3x1/weights',\n",
+       "  [3, 1, 224, 256]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_1/Conv2d_1x1/biases', [2080]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_1/Conv2d_1x1/weights', [1, 1, 448, 2080]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_2/Branch_0/Conv2d_1x1/BatchNorm/beta',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_2/Branch_0/Conv2d_1x1/BatchNorm/moving_mean',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_2/Branch_0/Conv2d_1x1/BatchNorm/moving_variance',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_2/Branch_0/Conv2d_1x1/weights',\n",
+       "  [1, 1, 2080, 192]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_2/Branch_1/Conv2d_0a_1x1/BatchNorm/beta',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_2/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_2/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_2/Branch_1/Conv2d_0a_1x1/weights',\n",
+       "  [1, 1, 2080, 192]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_2/Branch_1/Conv2d_0b_1x3/BatchNorm/beta',\n",
+       "  [224]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_2/Branch_1/Conv2d_0b_1x3/BatchNorm/moving_mean',\n",
+       "  [224]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_2/Branch_1/Conv2d_0b_1x3/BatchNorm/moving_variance',\n",
+       "  [224]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_2/Branch_1/Conv2d_0b_1x3/weights',\n",
+       "  [1, 3, 192, 224]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_2/Branch_1/Conv2d_0c_3x1/BatchNorm/beta',\n",
+       "  [256]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_2/Branch_1/Conv2d_0c_3x1/BatchNorm/moving_mean',\n",
+       "  [256]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_2/Branch_1/Conv2d_0c_3x1/BatchNorm/moving_variance',\n",
+       "  [256]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_2/Branch_1/Conv2d_0c_3x1/weights',\n",
+       "  [3, 1, 224, 256]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_2/Conv2d_1x1/biases', [2080]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_2/Conv2d_1x1/weights', [1, 1, 448, 2080]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_3/Branch_0/Conv2d_1x1/BatchNorm/beta',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_3/Branch_0/Conv2d_1x1/BatchNorm/moving_mean',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_3/Branch_0/Conv2d_1x1/BatchNorm/moving_variance',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_3/Branch_0/Conv2d_1x1/weights',\n",
+       "  [1, 1, 2080, 192]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_3/Branch_1/Conv2d_0a_1x1/BatchNorm/beta',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_3/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_3/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_3/Branch_1/Conv2d_0a_1x1/weights',\n",
+       "  [1, 1, 2080, 192]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_3/Branch_1/Conv2d_0b_1x3/BatchNorm/beta',\n",
+       "  [224]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_3/Branch_1/Conv2d_0b_1x3/BatchNorm/moving_mean',\n",
+       "  [224]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_3/Branch_1/Conv2d_0b_1x3/BatchNorm/moving_variance',\n",
+       "  [224]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_3/Branch_1/Conv2d_0b_1x3/weights',\n",
+       "  [1, 3, 192, 224]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_3/Branch_1/Conv2d_0c_3x1/BatchNorm/beta',\n",
+       "  [256]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_3/Branch_1/Conv2d_0c_3x1/BatchNorm/moving_mean',\n",
+       "  [256]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_3/Branch_1/Conv2d_0c_3x1/BatchNorm/moving_variance',\n",
+       "  [256]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_3/Branch_1/Conv2d_0c_3x1/weights',\n",
+       "  [3, 1, 224, 256]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_3/Conv2d_1x1/biases', [2080]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_3/Conv2d_1x1/weights', [1, 1, 448, 2080]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_4/Branch_0/Conv2d_1x1/BatchNorm/beta',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_4/Branch_0/Conv2d_1x1/BatchNorm/moving_mean',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_4/Branch_0/Conv2d_1x1/BatchNorm/moving_variance',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_4/Branch_0/Conv2d_1x1/weights',\n",
+       "  [1, 1, 2080, 192]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_4/Branch_1/Conv2d_0a_1x1/BatchNorm/beta',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_4/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_4/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_4/Branch_1/Conv2d_0a_1x1/weights',\n",
+       "  [1, 1, 2080, 192]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_4/Branch_1/Conv2d_0b_1x3/BatchNorm/beta',\n",
+       "  [224]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_4/Branch_1/Conv2d_0b_1x3/BatchNorm/moving_mean',\n",
+       "  [224]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_4/Branch_1/Conv2d_0b_1x3/BatchNorm/moving_variance',\n",
+       "  [224]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_4/Branch_1/Conv2d_0b_1x3/weights',\n",
+       "  [1, 3, 192, 224]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_4/Branch_1/Conv2d_0c_3x1/BatchNorm/beta',\n",
+       "  [256]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_4/Branch_1/Conv2d_0c_3x1/BatchNorm/moving_mean',\n",
+       "  [256]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_4/Branch_1/Conv2d_0c_3x1/BatchNorm/moving_variance',\n",
+       "  [256]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_4/Branch_1/Conv2d_0c_3x1/weights',\n",
+       "  [3, 1, 224, 256]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_4/Conv2d_1x1/biases', [2080]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_4/Conv2d_1x1/weights', [1, 1, 448, 2080]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_5/Branch_0/Conv2d_1x1/BatchNorm/beta',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_5/Branch_0/Conv2d_1x1/BatchNorm/moving_mean',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_5/Branch_0/Conv2d_1x1/BatchNorm/moving_variance',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_5/Branch_0/Conv2d_1x1/weights',\n",
+       "  [1, 1, 2080, 192]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_5/Branch_1/Conv2d_0a_1x1/BatchNorm/beta',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_5/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_5/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_5/Branch_1/Conv2d_0a_1x1/weights',\n",
+       "  [1, 1, 2080, 192]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_5/Branch_1/Conv2d_0b_1x3/BatchNorm/beta',\n",
+       "  [224]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_5/Branch_1/Conv2d_0b_1x3/BatchNorm/moving_mean',\n",
+       "  [224]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_5/Branch_1/Conv2d_0b_1x3/BatchNorm/moving_variance',\n",
+       "  [224]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_5/Branch_1/Conv2d_0b_1x3/weights',\n",
+       "  [1, 3, 192, 224]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_5/Branch_1/Conv2d_0c_3x1/BatchNorm/beta',\n",
+       "  [256]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_5/Branch_1/Conv2d_0c_3x1/BatchNorm/moving_mean',\n",
+       "  [256]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_5/Branch_1/Conv2d_0c_3x1/BatchNorm/moving_variance',\n",
+       "  [256]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_5/Branch_1/Conv2d_0c_3x1/weights',\n",
+       "  [3, 1, 224, 256]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_5/Conv2d_1x1/biases', [2080]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_5/Conv2d_1x1/weights', [1, 1, 448, 2080]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_6/Branch_0/Conv2d_1x1/BatchNorm/beta',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_6/Branch_0/Conv2d_1x1/BatchNorm/moving_mean',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_6/Branch_0/Conv2d_1x1/BatchNorm/moving_variance',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_6/Branch_0/Conv2d_1x1/weights',\n",
+       "  [1, 1, 2080, 192]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_6/Branch_1/Conv2d_0a_1x1/BatchNorm/beta',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_6/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_6/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_6/Branch_1/Conv2d_0a_1x1/weights',\n",
+       "  [1, 1, 2080, 192]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_6/Branch_1/Conv2d_0b_1x3/BatchNorm/beta',\n",
+       "  [224]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_6/Branch_1/Conv2d_0b_1x3/BatchNorm/moving_mean',\n",
+       "  [224]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_6/Branch_1/Conv2d_0b_1x3/BatchNorm/moving_variance',\n",
+       "  [224]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_6/Branch_1/Conv2d_0b_1x3/weights',\n",
+       "  [1, 3, 192, 224]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_6/Branch_1/Conv2d_0c_3x1/BatchNorm/beta',\n",
+       "  [256]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_6/Branch_1/Conv2d_0c_3x1/BatchNorm/moving_mean',\n",
+       "  [256]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_6/Branch_1/Conv2d_0c_3x1/BatchNorm/moving_variance',\n",
+       "  [256]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_6/Branch_1/Conv2d_0c_3x1/weights',\n",
+       "  [3, 1, 224, 256]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_6/Conv2d_1x1/biases', [2080]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_6/Conv2d_1x1/weights', [1, 1, 448, 2080]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_7/Branch_0/Conv2d_1x1/BatchNorm/beta',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_7/Branch_0/Conv2d_1x1/BatchNorm/moving_mean',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_7/Branch_0/Conv2d_1x1/BatchNorm/moving_variance',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_7/Branch_0/Conv2d_1x1/weights',\n",
+       "  [1, 1, 2080, 192]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_7/Branch_1/Conv2d_0a_1x1/BatchNorm/beta',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_7/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_7/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_7/Branch_1/Conv2d_0a_1x1/weights',\n",
+       "  [1, 1, 2080, 192]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_7/Branch_1/Conv2d_0b_1x3/BatchNorm/beta',\n",
+       "  [224]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_7/Branch_1/Conv2d_0b_1x3/BatchNorm/moving_mean',\n",
+       "  [224]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_7/Branch_1/Conv2d_0b_1x3/BatchNorm/moving_variance',\n",
+       "  [224]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_7/Branch_1/Conv2d_0b_1x3/weights',\n",
+       "  [1, 3, 192, 224]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_7/Branch_1/Conv2d_0c_3x1/BatchNorm/beta',\n",
+       "  [256]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_7/Branch_1/Conv2d_0c_3x1/BatchNorm/moving_mean',\n",
+       "  [256]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_7/Branch_1/Conv2d_0c_3x1/BatchNorm/moving_variance',\n",
+       "  [256]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_7/Branch_1/Conv2d_0c_3x1/weights',\n",
+       "  [3, 1, 224, 256]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_7/Conv2d_1x1/biases', [2080]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_7/Conv2d_1x1/weights', [1, 1, 448, 2080]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_8/Branch_0/Conv2d_1x1/BatchNorm/beta',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_8/Branch_0/Conv2d_1x1/BatchNorm/moving_mean',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_8/Branch_0/Conv2d_1x1/BatchNorm/moving_variance',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_8/Branch_0/Conv2d_1x1/weights',\n",
+       "  [1, 1, 2080, 192]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_8/Branch_1/Conv2d_0a_1x1/BatchNorm/beta',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_8/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_8/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_8/Branch_1/Conv2d_0a_1x1/weights',\n",
+       "  [1, 1, 2080, 192]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_8/Branch_1/Conv2d_0b_1x3/BatchNorm/beta',\n",
+       "  [224]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_8/Branch_1/Conv2d_0b_1x3/BatchNorm/moving_mean',\n",
+       "  [224]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_8/Branch_1/Conv2d_0b_1x3/BatchNorm/moving_variance',\n",
+       "  [224]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_8/Branch_1/Conv2d_0b_1x3/weights',\n",
+       "  [1, 3, 192, 224]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_8/Branch_1/Conv2d_0c_3x1/BatchNorm/beta',\n",
+       "  [256]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_8/Branch_1/Conv2d_0c_3x1/BatchNorm/moving_mean',\n",
+       "  [256]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_8/Branch_1/Conv2d_0c_3x1/BatchNorm/moving_variance',\n",
+       "  [256]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_8/Branch_1/Conv2d_0c_3x1/weights',\n",
+       "  [3, 1, 224, 256]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_8/Conv2d_1x1/biases', [2080]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_8/Conv2d_1x1/weights', [1, 1, 448, 2080]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_9/Branch_0/Conv2d_1x1/BatchNorm/beta',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_9/Branch_0/Conv2d_1x1/BatchNorm/moving_mean',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_9/Branch_0/Conv2d_1x1/BatchNorm/moving_variance',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_9/Branch_0/Conv2d_1x1/weights',\n",
+       "  [1, 1, 2080, 192]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_9/Branch_1/Conv2d_0a_1x1/BatchNorm/beta',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_9/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_9/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance',\n",
+       "  [192]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_9/Branch_1/Conv2d_0a_1x1/weights',\n",
+       "  [1, 1, 2080, 192]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_9/Branch_1/Conv2d_0b_1x3/BatchNorm/beta',\n",
+       "  [224]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_9/Branch_1/Conv2d_0b_1x3/BatchNorm/moving_mean',\n",
+       "  [224]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_9/Branch_1/Conv2d_0b_1x3/BatchNorm/moving_variance',\n",
+       "  [224]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_9/Branch_1/Conv2d_0b_1x3/weights',\n",
+       "  [1, 3, 192, 224]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_9/Branch_1/Conv2d_0c_3x1/BatchNorm/beta',\n",
+       "  [256]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_9/Branch_1/Conv2d_0c_3x1/BatchNorm/moving_mean',\n",
+       "  [256]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_9/Branch_1/Conv2d_0c_3x1/BatchNorm/moving_variance',\n",
+       "  [256]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_9/Branch_1/Conv2d_0c_3x1/weights',\n",
+       "  [3, 1, 224, 256]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_9/Conv2d_1x1/biases', [2080]),\n",
+       " ('InceptionResnetV2/Repeat_2/block8_9/Conv2d_1x1/weights', [1, 1, 448, 2080])]"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# investigate the variable names from the checkpoint\n",
+    "# This model was saved using the following architecture code:\n",
+    "# https://gitlab.idiap.ch/bob/bob.learn.tensorflow/-/blob/3a6a2531c7978edfba09c601667694ab2ae2359e/bob/learn/tensorflow/network/InceptionResnetV2.py\n",
+    "tf.train.list_variables(tf.train.latest_checkpoint(checkpoint))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {
+    "collapsed": true,
+    "jupyter": {
+     "outputs_hidden": true
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Model: \"InceptionResnetV2\"\n",
+      "_________________________________________________________________\n",
+      "Layer (type)                 Output Shape              Param #   \n",
+      "=================================================================\n",
+      "Conv2d_1a_3x3 (SequentialLay (None, 79, 79, 32)        960       \n",
+      "_________________________________________________________________\n",
+      "Conv2d_2a_3x3 (SequentialLay (None, 77, 77, 32)        9312      \n",
+      "_________________________________________________________________\n",
+      "Conv2d_2b_3x3 (SequentialLay (None, 77, 77, 64)        18624     \n",
+      "_________________________________________________________________\n",
+      "MaxPool_3a_3x3 (MaxPooling2D (None, 38, 38, 64)        0         \n",
+      "_________________________________________________________________\n",
+      "Conv2d_3b_1x1 (SequentialLay (None, 38, 38, 80)        5360      \n",
+      "_________________________________________________________________\n",
+      "Conv2d_4a_3x3 (SequentialLay (None, 36, 36, 192)       138816    \n",
+      "_________________________________________________________________\n",
+      "MaxPool_5a_3x3 (MaxPooling2D (None, 17, 17, 192)       0         \n",
+      "_________________________________________________________________\n",
+      "Mixed_5b (InceptionA)        (None, 17, 17, 320)       268848    \n",
+      "_________________________________________________________________\n",
+      "block35_1 (InceptionResnetBl (None, 17, 17, 320)       123408    \n",
+      "_________________________________________________________________\n",
+      "block35_2 (InceptionResnetBl (None, 17, 17, 320)       123408    \n",
+      "_________________________________________________________________\n",
+      "block35_3 (InceptionResnetBl (None, 17, 17, 320)       123408    \n",
+      "_________________________________________________________________\n",
+      "block35_4 (InceptionResnetBl (None, 17, 17, 320)       123408    \n",
+      "_________________________________________________________________\n",
+      "block35_5 (InceptionResnetBl (None, 17, 17, 320)       123408    \n",
+      "_________________________________________________________________\n",
+      "block35_6 (InceptionResnetBl (None, 17, 17, 320)       123408    \n",
+      "_________________________________________________________________\n",
+      "block35_7 (InceptionResnetBl (None, 17, 17, 320)       123408    \n",
+      "_________________________________________________________________\n",
+      "block35_8 (InceptionResnetBl (None, 17, 17, 320)       123408    \n",
+      "_________________________________________________________________\n",
+      "block35_9 (InceptionResnetBl (None, 17, 17, 320)       123408    \n",
+      "_________________________________________________________________\n",
+      "block35_10 (InceptionResnetB (None, 17, 17, 320)       123408    \n",
+      "_________________________________________________________________\n",
+      "Mixed_6a (ReductionA)        (None, 8, 8, 1088)        2666240   \n",
+      "_________________________________________________________________\n",
+      "block17_1 (InceptionResnetBl (None, 8, 8, 1088)        1127456   \n",
+      "_________________________________________________________________\n",
+      "block17_2 (InceptionResnetBl (None, 8, 8, 1088)        1127456   \n",
+      "_________________________________________________________________\n",
+      "block17_3 (InceptionResnetBl (None, 8, 8, 1088)        1127456   \n",
+      "_________________________________________________________________\n",
+      "block17_4 (InceptionResnetBl (None, 8, 8, 1088)        1127456   \n",
+      "_________________________________________________________________\n",
+      "block17_5 (InceptionResnetBl (None, 8, 8, 1088)        1127456   \n",
+      "_________________________________________________________________\n",
+      "block17_6 (InceptionResnetBl (None, 8, 8, 1088)        1127456   \n",
+      "_________________________________________________________________\n",
+      "block17_7 (InceptionResnetBl (None, 8, 8, 1088)        1127456   \n",
+      "_________________________________________________________________\n",
+      "block17_8 (InceptionResnetBl (None, 8, 8, 1088)        1127456   \n",
+      "_________________________________________________________________\n",
+      "block17_9 (InceptionResnetBl (None, 8, 8, 1088)        1127456   \n",
+      "_________________________________________________________________\n",
+      "block17_10 (InceptionResnetB (None, 8, 8, 1088)        1127456   \n",
+      "_________________________________________________________________\n",
+      "block17_11 (InceptionResnetB (None, 8, 8, 1088)        1127456   \n",
+      "_________________________________________________________________\n",
+      "block17_12 (InceptionResnetB (None, 8, 8, 1088)        1127456   \n",
+      "_________________________________________________________________\n",
+      "block17_13 (InceptionResnetB (None, 8, 8, 1088)        1127456   \n",
+      "_________________________________________________________________\n",
+      "block17_14 (InceptionResnetB (None, 8, 8, 1088)        1127456   \n",
+      "_________________________________________________________________\n",
+      "block17_15 (InceptionResnetB (None, 8, 8, 1088)        1127456   \n",
+      "_________________________________________________________________\n",
+      "block17_16 (InceptionResnetB (None, 8, 8, 1088)        1127456   \n",
+      "_________________________________________________________________\n",
+      "block17_17 (InceptionResnetB (None, 8, 8, 1088)        1127456   \n",
+      "_________________________________________________________________\n",
+      "block17_18 (InceptionResnetB (None, 8, 8, 1088)        1127456   \n",
+      "_________________________________________________________________\n",
+      "block17_19 (InceptionResnetB (None, 8, 8, 1088)        1127456   \n",
+      "_________________________________________________________________\n",
+      "block17_20 (InceptionResnetB (None, 8, 8, 1088)        1127456   \n",
+      "_________________________________________________________________\n",
+      "Mixed_7a (ReductionB)        (None, 3, 3, 2080)        3883008   \n",
+      "_________________________________________________________________\n",
+      "block8_1 (InceptionResnetBlo (None, 3, 3, 2080)        2036288   \n",
+      "_________________________________________________________________\n",
+      "block8_2 (InceptionResnetBlo (None, 3, 3, 2080)        2036288   \n",
+      "_________________________________________________________________\n",
+      "block8_3 (InceptionResnetBlo (None, 3, 3, 2080)        2036288   \n",
+      "_________________________________________________________________\n",
+      "block8_4 (InceptionResnetBlo (None, 3, 3, 2080)        2036288   \n",
+      "_________________________________________________________________\n",
+      "block8_5 (InceptionResnetBlo (None, 3, 3, 2080)        2036288   \n",
+      "_________________________________________________________________\n",
+      "block8_6 (InceptionResnetBlo (None, 3, 3, 2080)        2036288   \n",
+      "_________________________________________________________________\n",
+      "block8_7 (InceptionResnetBlo (None, 3, 3, 2080)        2036288   \n",
+      "_________________________________________________________________\n",
+      "block8_8 (InceptionResnetBlo (None, 3, 3, 2080)        2036288   \n",
+      "_________________________________________________________________\n",
+      "block8_9 (InceptionResnetBlo (None, 3, 3, 2080)        2036288   \n",
+      "_________________________________________________________________\n",
+      "block8_10 (InceptionResnetBl (None, 3, 3, 2080)        2036288   \n",
+      "_________________________________________________________________\n",
+      "Conv2d_7b_1x1 (SequentialLay (None, 3, 3, 1536)        3199488   \n",
+      "_________________________________________________________________\n",
+      "global_average_pooling2d (Gl (None, 1536)              0         \n",
+      "_________________________________________________________________\n",
+      "Dropout (Dropout)            (None, 1536)              0         \n",
+      "_________________________________________________________________\n",
+      "Bottleneck (Dense)           (None, 128)               196608    \n",
+      "_________________________________________________________________\n",
+      "Bottleneck/BatchNorm (BatchN (None, 128)               384       \n",
+      "=================================================================\n",
+      "Total params: 54,533,728\n",
+      "Trainable params: 54,472,928\n",
+      "Non-trainable params: 60,800\n",
+      "_________________________________________________________________\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Create the model and look at its structure\n",
+    "# This is the model's code in tf2 format:\n",
+    "# https://gitlab.idiap.ch/bob/bob.learn.tensorflow/-/blob/ee013d0d47dbd43a447abcb7b665a7cd75398cce/bob/learn/tensorflow/models/inception_resnet_v2.py\n",
+    "model = InceptionResNetV2(input_shape=(160, 160, 3), include_top=False, bottleneck=True)\n",
+    "model.summary()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {
+    "collapsed": true,
+    "jupyter": {
+     "outputs_hidden": true
+    }
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['InceptionResnetV2/Conv2d_1a_3x3/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/Conv2d_1a_3x3/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/Conv2d_1a_3x3/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/Conv2d_1a_3x3/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/Conv2d_2a_3x3/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/Conv2d_2a_3x3/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/Conv2d_2a_3x3/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/Conv2d_2a_3x3/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/Conv2d_2b_3x3/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/Conv2d_2b_3x3/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/Conv2d_2b_3x3/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/Conv2d_2b_3x3/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/Conv2d_3b_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/Conv2d_3b_1x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/Conv2d_3b_1x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/Conv2d_3b_1x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/Conv2d_4a_3x3/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/Conv2d_4a_3x3/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/Conv2d_4a_3x3/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/Conv2d_4a_3x3/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/Mixed_5b/Branch_0/Conv2d_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/Mixed_5b/Branch_0/Conv2d_1x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/Mixed_5b/Branch_2/Conv2d_0a_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/Mixed_5b/Branch_2/Conv2d_0a_1x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/Mixed_5b/Branch_2/Conv2d_0b_3x3/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/Mixed_5b/Branch_2/Conv2d_0b_3x3/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/Mixed_5b/Branch_2/Conv2d_0c_3x3/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/Mixed_5b/Branch_2/Conv2d_0c_3x3/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/Mixed_5b/Branch_1/Conv2d_0a_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/Mixed_5b/Branch_1/Conv2d_0a_1x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/Mixed_5b/Branch_1/Conv2d_0b_5x5/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/Mixed_5b/Branch_1/Conv2d_0b_5x5/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/Mixed_5b/Branch_3/Conv2d_0b_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/Mixed_5b/Branch_3/Conv2d_0b_1x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/Mixed_5b/Branch_0/Conv2d_1x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/Mixed_5b/Branch_0/Conv2d_1x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/Mixed_5b/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/Mixed_5b/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/Mixed_5b/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/Mixed_5b/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/Mixed_5b/Branch_2/Conv2d_0c_3x3/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/Mixed_5b/Branch_2/Conv2d_0c_3x3/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/Mixed_5b/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/Mixed_5b/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/Mixed_5b/Branch_1/Conv2d_0b_5x5/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/Mixed_5b/Branch_1/Conv2d_0b_5x5/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/Mixed_5b/Branch_3/Conv2d_0b_1x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/Mixed_5b/Branch_3/Conv2d_0b_1x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block35_1/Branch_0/Conv2d_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block35_1/Branch_0/Conv2d_1x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block35_1/Branch_1/Conv2d_0a_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block35_1/Branch_1/Conv2d_0a_1x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block35_1/Branch_1/Conv2d_0b_3x3/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block35_1/Branch_1/Conv2d_0b_3x3/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block35_1/Branch_2/Conv2d_0a_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block35_1/Branch_2/Conv2d_0a_1x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block35_1/Branch_2/Conv2d_0b_3x3/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block35_1/Branch_2/Conv2d_0b_3x3/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block35_1/Branch_2/Conv2d_0c_3x3/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block35_1/Branch_2/Conv2d_0c_3x3/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block35_1/Conv2d_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block35_1/Conv2d_1x1/Conv2D/bias:0',\n",
+       " 'InceptionResnetV2/block35_1/Branch_0/Conv2d_1x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block35_1/Branch_0/Conv2d_1x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block35_1/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block35_1/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block35_1/Branch_1/Conv2d_0b_3x3/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block35_1/Branch_1/Conv2d_0b_3x3/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block35_1/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block35_1/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block35_1/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block35_1/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block35_1/Branch_2/Conv2d_0c_3x3/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block35_1/Branch_2/Conv2d_0c_3x3/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block35_2/Branch_0/Conv2d_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block35_2/Branch_0/Conv2d_1x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block35_2/Branch_1/Conv2d_0a_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block35_2/Branch_1/Conv2d_0a_1x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block35_2/Branch_1/Conv2d_0b_3x3/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block35_2/Branch_1/Conv2d_0b_3x3/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block35_2/Branch_2/Conv2d_0a_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block35_2/Branch_2/Conv2d_0a_1x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block35_2/Branch_2/Conv2d_0b_3x3/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block35_2/Branch_2/Conv2d_0b_3x3/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block35_2/Branch_2/Conv2d_0c_3x3/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block35_2/Branch_2/Conv2d_0c_3x3/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block35_2/Conv2d_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block35_2/Conv2d_1x1/Conv2D/bias:0',\n",
+       " 'InceptionResnetV2/block35_2/Branch_0/Conv2d_1x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block35_2/Branch_0/Conv2d_1x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block35_2/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block35_2/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block35_2/Branch_1/Conv2d_0b_3x3/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block35_2/Branch_1/Conv2d_0b_3x3/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block35_2/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block35_2/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block35_2/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block35_2/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block35_2/Branch_2/Conv2d_0c_3x3/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block35_2/Branch_2/Conv2d_0c_3x3/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block35_3/Branch_0/Conv2d_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block35_3/Branch_0/Conv2d_1x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block35_3/Branch_1/Conv2d_0a_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block35_3/Branch_1/Conv2d_0a_1x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block35_3/Branch_1/Conv2d_0b_3x3/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block35_3/Branch_1/Conv2d_0b_3x3/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block35_3/Branch_2/Conv2d_0a_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block35_3/Branch_2/Conv2d_0a_1x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block35_3/Branch_2/Conv2d_0b_3x3/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block35_3/Branch_2/Conv2d_0b_3x3/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block35_3/Branch_2/Conv2d_0c_3x3/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block35_3/Branch_2/Conv2d_0c_3x3/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block35_3/Conv2d_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block35_3/Conv2d_1x1/Conv2D/bias:0',\n",
+       " 'InceptionResnetV2/block35_3/Branch_0/Conv2d_1x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block35_3/Branch_0/Conv2d_1x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block35_3/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block35_3/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block35_3/Branch_1/Conv2d_0b_3x3/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block35_3/Branch_1/Conv2d_0b_3x3/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block35_3/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block35_3/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block35_3/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block35_3/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block35_3/Branch_2/Conv2d_0c_3x3/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block35_3/Branch_2/Conv2d_0c_3x3/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block35_4/Branch_0/Conv2d_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block35_4/Branch_0/Conv2d_1x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block35_4/Branch_1/Conv2d_0a_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block35_4/Branch_1/Conv2d_0a_1x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block35_4/Branch_1/Conv2d_0b_3x3/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block35_4/Branch_1/Conv2d_0b_3x3/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block35_4/Branch_2/Conv2d_0a_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block35_4/Branch_2/Conv2d_0a_1x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block35_4/Branch_2/Conv2d_0b_3x3/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block35_4/Branch_2/Conv2d_0b_3x3/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block35_4/Branch_2/Conv2d_0c_3x3/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block35_4/Branch_2/Conv2d_0c_3x3/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block35_4/Conv2d_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block35_4/Conv2d_1x1/Conv2D/bias:0',\n",
+       " 'InceptionResnetV2/block35_4/Branch_0/Conv2d_1x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block35_4/Branch_0/Conv2d_1x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block35_4/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block35_4/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block35_4/Branch_1/Conv2d_0b_3x3/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block35_4/Branch_1/Conv2d_0b_3x3/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block35_4/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block35_4/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block35_4/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block35_4/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block35_4/Branch_2/Conv2d_0c_3x3/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block35_4/Branch_2/Conv2d_0c_3x3/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block35_5/Branch_0/Conv2d_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block35_5/Branch_0/Conv2d_1x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block35_5/Branch_1/Conv2d_0a_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block35_5/Branch_1/Conv2d_0a_1x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block35_5/Branch_1/Conv2d_0b_3x3/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block35_5/Branch_1/Conv2d_0b_3x3/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block35_5/Branch_2/Conv2d_0a_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block35_5/Branch_2/Conv2d_0a_1x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block35_5/Branch_2/Conv2d_0b_3x3/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block35_5/Branch_2/Conv2d_0b_3x3/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block35_5/Branch_2/Conv2d_0c_3x3/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block35_5/Branch_2/Conv2d_0c_3x3/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block35_5/Conv2d_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block35_5/Conv2d_1x1/Conv2D/bias:0',\n",
+       " 'InceptionResnetV2/block35_5/Branch_0/Conv2d_1x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block35_5/Branch_0/Conv2d_1x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block35_5/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block35_5/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block35_5/Branch_1/Conv2d_0b_3x3/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block35_5/Branch_1/Conv2d_0b_3x3/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block35_5/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block35_5/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block35_5/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block35_5/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block35_5/Branch_2/Conv2d_0c_3x3/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block35_5/Branch_2/Conv2d_0c_3x3/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block35_6/Branch_0/Conv2d_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block35_6/Branch_0/Conv2d_1x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block35_6/Branch_1/Conv2d_0a_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block35_6/Branch_1/Conv2d_0a_1x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block35_6/Branch_1/Conv2d_0b_3x3/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block35_6/Branch_1/Conv2d_0b_3x3/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block35_6/Branch_2/Conv2d_0a_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block35_6/Branch_2/Conv2d_0a_1x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block35_6/Branch_2/Conv2d_0b_3x3/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block35_6/Branch_2/Conv2d_0b_3x3/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block35_6/Branch_2/Conv2d_0c_3x3/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block35_6/Branch_2/Conv2d_0c_3x3/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block35_6/Conv2d_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block35_6/Conv2d_1x1/Conv2D/bias:0',\n",
+       " 'InceptionResnetV2/block35_6/Branch_0/Conv2d_1x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block35_6/Branch_0/Conv2d_1x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block35_6/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block35_6/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block35_6/Branch_1/Conv2d_0b_3x3/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block35_6/Branch_1/Conv2d_0b_3x3/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block35_6/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block35_6/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block35_6/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block35_6/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block35_6/Branch_2/Conv2d_0c_3x3/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block35_6/Branch_2/Conv2d_0c_3x3/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block35_7/Branch_0/Conv2d_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block35_7/Branch_0/Conv2d_1x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block35_7/Branch_1/Conv2d_0a_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block35_7/Branch_1/Conv2d_0a_1x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block35_7/Branch_1/Conv2d_0b_3x3/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block35_7/Branch_1/Conv2d_0b_3x3/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block35_7/Branch_2/Conv2d_0a_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block35_7/Branch_2/Conv2d_0a_1x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block35_7/Branch_2/Conv2d_0b_3x3/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block35_7/Branch_2/Conv2d_0b_3x3/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block35_7/Branch_2/Conv2d_0c_3x3/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block35_7/Branch_2/Conv2d_0c_3x3/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block35_7/Conv2d_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block35_7/Conv2d_1x1/Conv2D/bias:0',\n",
+       " 'InceptionResnetV2/block35_7/Branch_0/Conv2d_1x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block35_7/Branch_0/Conv2d_1x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block35_7/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block35_7/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block35_7/Branch_1/Conv2d_0b_3x3/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block35_7/Branch_1/Conv2d_0b_3x3/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block35_7/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block35_7/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block35_7/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block35_7/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block35_7/Branch_2/Conv2d_0c_3x3/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block35_7/Branch_2/Conv2d_0c_3x3/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block35_8/Branch_0/Conv2d_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block35_8/Branch_0/Conv2d_1x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block35_8/Branch_1/Conv2d_0a_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block35_8/Branch_1/Conv2d_0a_1x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block35_8/Branch_1/Conv2d_0b_3x3/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block35_8/Branch_1/Conv2d_0b_3x3/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block35_8/Branch_2/Conv2d_0a_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block35_8/Branch_2/Conv2d_0a_1x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block35_8/Branch_2/Conv2d_0b_3x3/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block35_8/Branch_2/Conv2d_0b_3x3/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block35_8/Branch_2/Conv2d_0c_3x3/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block35_8/Branch_2/Conv2d_0c_3x3/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block35_8/Conv2d_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block35_8/Conv2d_1x1/Conv2D/bias:0',\n",
+       " 'InceptionResnetV2/block35_8/Branch_0/Conv2d_1x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block35_8/Branch_0/Conv2d_1x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block35_8/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block35_8/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block35_8/Branch_1/Conv2d_0b_3x3/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block35_8/Branch_1/Conv2d_0b_3x3/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block35_8/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block35_8/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block35_8/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block35_8/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block35_8/Branch_2/Conv2d_0c_3x3/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block35_8/Branch_2/Conv2d_0c_3x3/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block35_9/Branch_0/Conv2d_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block35_9/Branch_0/Conv2d_1x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block35_9/Branch_1/Conv2d_0a_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block35_9/Branch_1/Conv2d_0a_1x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block35_9/Branch_1/Conv2d_0b_3x3/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block35_9/Branch_1/Conv2d_0b_3x3/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block35_9/Branch_2/Conv2d_0a_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block35_9/Branch_2/Conv2d_0a_1x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block35_9/Branch_2/Conv2d_0b_3x3/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block35_9/Branch_2/Conv2d_0b_3x3/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block35_9/Branch_2/Conv2d_0c_3x3/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block35_9/Branch_2/Conv2d_0c_3x3/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block35_9/Conv2d_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block35_9/Conv2d_1x1/Conv2D/bias:0',\n",
+       " 'InceptionResnetV2/block35_9/Branch_0/Conv2d_1x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block35_9/Branch_0/Conv2d_1x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block35_9/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block35_9/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block35_9/Branch_1/Conv2d_0b_3x3/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block35_9/Branch_1/Conv2d_0b_3x3/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block35_9/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block35_9/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block35_9/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block35_9/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block35_9/Branch_2/Conv2d_0c_3x3/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block35_9/Branch_2/Conv2d_0c_3x3/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block35_10/Branch_0/Conv2d_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block35_10/Branch_0/Conv2d_1x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block35_10/Branch_1/Conv2d_0a_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block35_10/Branch_1/Conv2d_0a_1x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block35_10/Branch_1/Conv2d_0b_3x3/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block35_10/Branch_1/Conv2d_0b_3x3/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block35_10/Branch_2/Conv2d_0a_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block35_10/Branch_2/Conv2d_0a_1x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block35_10/Branch_2/Conv2d_0b_3x3/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block35_10/Branch_2/Conv2d_0b_3x3/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block35_10/Branch_2/Conv2d_0c_3x3/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block35_10/Branch_2/Conv2d_0c_3x3/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block35_10/Conv2d_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block35_10/Conv2d_1x1/Conv2D/bias:0',\n",
+       " 'InceptionResnetV2/block35_10/Branch_0/Conv2d_1x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block35_10/Branch_0/Conv2d_1x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block35_10/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block35_10/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block35_10/Branch_1/Conv2d_0b_3x3/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block35_10/Branch_1/Conv2d_0b_3x3/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block35_10/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block35_10/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block35_10/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block35_10/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block35_10/Branch_2/Conv2d_0c_3x3/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block35_10/Branch_2/Conv2d_0c_3x3/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/Mixed_6a/Branch_0/Conv2d_1a_3x3/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/Mixed_6a/Branch_0/Conv2d_1a_3x3/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/Mixed_6a/Branch_1/Conv2d_0a_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/Mixed_6a/Branch_1/Conv2d_0a_1x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/Mixed_6a/Branch_1/Conv2d_0b_3x3/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/Mixed_6a/Branch_1/Conv2d_0b_3x3/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/Mixed_6a/Branch_1/Conv2d_1a_3x3/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/Mixed_6a/Branch_1/Conv2d_1a_3x3/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/Mixed_6a/Branch_0/Conv2d_1a_3x3/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/Mixed_6a/Branch_0/Conv2d_1a_3x3/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/Mixed_6a/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/Mixed_6a/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/Mixed_6a/Branch_1/Conv2d_0b_3x3/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/Mixed_6a/Branch_1/Conv2d_0b_3x3/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/Mixed_6a/Branch_1/Conv2d_1a_3x3/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/Mixed_6a/Branch_1/Conv2d_1a_3x3/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block17_1/Branch_0/Conv2d_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block17_1/Branch_0/Conv2d_1x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block17_1/Branch_1/Conv2d_0a_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block17_1/Branch_1/Conv2d_0a_1x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block17_1/Branch_1/Conv2d_0b_1x7/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block17_1/Branch_1/Conv2d_0b_1x7/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block17_1/Branch_1/Conv2d_0c_7x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block17_1/Branch_1/Conv2d_0c_7x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block17_1/Conv2d_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block17_1/Conv2d_1x1/Conv2D/bias:0',\n",
+       " 'InceptionResnetV2/block17_1/Branch_0/Conv2d_1x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block17_1/Branch_0/Conv2d_1x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block17_1/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block17_1/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block17_1/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block17_1/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block17_1/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block17_1/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block17_2/Branch_0/Conv2d_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block17_2/Branch_0/Conv2d_1x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block17_2/Branch_1/Conv2d_0a_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block17_2/Branch_1/Conv2d_0a_1x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block17_2/Branch_1/Conv2d_0b_1x7/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block17_2/Branch_1/Conv2d_0b_1x7/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block17_2/Branch_1/Conv2d_0c_7x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block17_2/Branch_1/Conv2d_0c_7x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block17_2/Conv2d_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block17_2/Conv2d_1x1/Conv2D/bias:0',\n",
+       " 'InceptionResnetV2/block17_2/Branch_0/Conv2d_1x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block17_2/Branch_0/Conv2d_1x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block17_2/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block17_2/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block17_2/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block17_2/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block17_2/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block17_2/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block17_3/Branch_0/Conv2d_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block17_3/Branch_0/Conv2d_1x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block17_3/Branch_1/Conv2d_0a_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block17_3/Branch_1/Conv2d_0a_1x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block17_3/Branch_1/Conv2d_0b_1x7/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block17_3/Branch_1/Conv2d_0b_1x7/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block17_3/Branch_1/Conv2d_0c_7x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block17_3/Branch_1/Conv2d_0c_7x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block17_3/Conv2d_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block17_3/Conv2d_1x1/Conv2D/bias:0',\n",
+       " 'InceptionResnetV2/block17_3/Branch_0/Conv2d_1x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block17_3/Branch_0/Conv2d_1x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block17_3/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block17_3/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block17_3/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block17_3/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block17_3/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block17_3/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block17_4/Branch_0/Conv2d_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block17_4/Branch_0/Conv2d_1x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block17_4/Branch_1/Conv2d_0a_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block17_4/Branch_1/Conv2d_0a_1x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block17_4/Branch_1/Conv2d_0b_1x7/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block17_4/Branch_1/Conv2d_0b_1x7/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block17_4/Branch_1/Conv2d_0c_7x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block17_4/Branch_1/Conv2d_0c_7x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block17_4/Conv2d_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block17_4/Conv2d_1x1/Conv2D/bias:0',\n",
+       " 'InceptionResnetV2/block17_4/Branch_0/Conv2d_1x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block17_4/Branch_0/Conv2d_1x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block17_4/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block17_4/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block17_4/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block17_4/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block17_4/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block17_4/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block17_5/Branch_0/Conv2d_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block17_5/Branch_0/Conv2d_1x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block17_5/Branch_1/Conv2d_0a_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block17_5/Branch_1/Conv2d_0a_1x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block17_5/Branch_1/Conv2d_0b_1x7/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block17_5/Branch_1/Conv2d_0b_1x7/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block17_5/Branch_1/Conv2d_0c_7x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block17_5/Branch_1/Conv2d_0c_7x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block17_5/Conv2d_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block17_5/Conv2d_1x1/Conv2D/bias:0',\n",
+       " 'InceptionResnetV2/block17_5/Branch_0/Conv2d_1x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block17_5/Branch_0/Conv2d_1x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block17_5/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block17_5/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block17_5/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block17_5/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block17_5/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block17_5/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block17_6/Branch_0/Conv2d_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block17_6/Branch_0/Conv2d_1x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block17_6/Branch_1/Conv2d_0a_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block17_6/Branch_1/Conv2d_0a_1x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block17_6/Branch_1/Conv2d_0b_1x7/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block17_6/Branch_1/Conv2d_0b_1x7/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block17_6/Branch_1/Conv2d_0c_7x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block17_6/Branch_1/Conv2d_0c_7x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block17_6/Conv2d_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block17_6/Conv2d_1x1/Conv2D/bias:0',\n",
+       " 'InceptionResnetV2/block17_6/Branch_0/Conv2d_1x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block17_6/Branch_0/Conv2d_1x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block17_6/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block17_6/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block17_6/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block17_6/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block17_6/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block17_6/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block17_7/Branch_0/Conv2d_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block17_7/Branch_0/Conv2d_1x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block17_7/Branch_1/Conv2d_0a_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block17_7/Branch_1/Conv2d_0a_1x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block17_7/Branch_1/Conv2d_0b_1x7/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block17_7/Branch_1/Conv2d_0b_1x7/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block17_7/Branch_1/Conv2d_0c_7x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block17_7/Branch_1/Conv2d_0c_7x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block17_7/Conv2d_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block17_7/Conv2d_1x1/Conv2D/bias:0',\n",
+       " 'InceptionResnetV2/block17_7/Branch_0/Conv2d_1x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block17_7/Branch_0/Conv2d_1x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block17_7/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block17_7/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block17_7/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block17_7/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block17_7/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block17_7/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block17_8/Branch_0/Conv2d_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block17_8/Branch_0/Conv2d_1x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block17_8/Branch_1/Conv2d_0a_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block17_8/Branch_1/Conv2d_0a_1x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block17_8/Branch_1/Conv2d_0b_1x7/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block17_8/Branch_1/Conv2d_0b_1x7/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block17_8/Branch_1/Conv2d_0c_7x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block17_8/Branch_1/Conv2d_0c_7x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block17_8/Conv2d_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block17_8/Conv2d_1x1/Conv2D/bias:0',\n",
+       " 'InceptionResnetV2/block17_8/Branch_0/Conv2d_1x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block17_8/Branch_0/Conv2d_1x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block17_8/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block17_8/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block17_8/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block17_8/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block17_8/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block17_8/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block17_9/Branch_0/Conv2d_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block17_9/Branch_0/Conv2d_1x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block17_9/Branch_1/Conv2d_0a_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block17_9/Branch_1/Conv2d_0a_1x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block17_9/Branch_1/Conv2d_0b_1x7/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block17_9/Branch_1/Conv2d_0b_1x7/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block17_9/Branch_1/Conv2d_0c_7x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block17_9/Branch_1/Conv2d_0c_7x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block17_9/Conv2d_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block17_9/Conv2d_1x1/Conv2D/bias:0',\n",
+       " 'InceptionResnetV2/block17_9/Branch_0/Conv2d_1x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block17_9/Branch_0/Conv2d_1x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block17_9/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block17_9/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block17_9/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block17_9/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block17_9/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block17_9/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block17_10/Branch_0/Conv2d_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block17_10/Branch_0/Conv2d_1x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block17_10/Branch_1/Conv2d_0a_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block17_10/Branch_1/Conv2d_0a_1x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block17_10/Branch_1/Conv2d_0b_1x7/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block17_10/Branch_1/Conv2d_0b_1x7/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block17_10/Branch_1/Conv2d_0c_7x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block17_10/Branch_1/Conv2d_0c_7x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block17_10/Conv2d_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block17_10/Conv2d_1x1/Conv2D/bias:0',\n",
+       " 'InceptionResnetV2/block17_10/Branch_0/Conv2d_1x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block17_10/Branch_0/Conv2d_1x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block17_10/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block17_10/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block17_10/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block17_10/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block17_10/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block17_10/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block17_11/Branch_0/Conv2d_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block17_11/Branch_0/Conv2d_1x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block17_11/Branch_1/Conv2d_0a_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block17_11/Branch_1/Conv2d_0a_1x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block17_11/Branch_1/Conv2d_0b_1x7/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block17_11/Branch_1/Conv2d_0b_1x7/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block17_11/Branch_1/Conv2d_0c_7x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block17_11/Branch_1/Conv2d_0c_7x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block17_11/Conv2d_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block17_11/Conv2d_1x1/Conv2D/bias:0',\n",
+       " 'InceptionResnetV2/block17_11/Branch_0/Conv2d_1x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block17_11/Branch_0/Conv2d_1x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block17_11/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block17_11/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block17_11/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block17_11/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block17_11/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block17_11/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block17_12/Branch_0/Conv2d_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block17_12/Branch_0/Conv2d_1x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block17_12/Branch_1/Conv2d_0a_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block17_12/Branch_1/Conv2d_0a_1x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block17_12/Branch_1/Conv2d_0b_1x7/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block17_12/Branch_1/Conv2d_0b_1x7/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block17_12/Branch_1/Conv2d_0c_7x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block17_12/Branch_1/Conv2d_0c_7x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block17_12/Conv2d_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block17_12/Conv2d_1x1/Conv2D/bias:0',\n",
+       " 'InceptionResnetV2/block17_12/Branch_0/Conv2d_1x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block17_12/Branch_0/Conv2d_1x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block17_12/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block17_12/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block17_12/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block17_12/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block17_12/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block17_12/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block17_13/Branch_0/Conv2d_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block17_13/Branch_0/Conv2d_1x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block17_13/Branch_1/Conv2d_0a_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block17_13/Branch_1/Conv2d_0a_1x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block17_13/Branch_1/Conv2d_0b_1x7/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block17_13/Branch_1/Conv2d_0b_1x7/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block17_13/Branch_1/Conv2d_0c_7x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block17_13/Branch_1/Conv2d_0c_7x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block17_13/Conv2d_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block17_13/Conv2d_1x1/Conv2D/bias:0',\n",
+       " 'InceptionResnetV2/block17_13/Branch_0/Conv2d_1x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block17_13/Branch_0/Conv2d_1x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block17_13/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block17_13/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block17_13/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block17_13/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block17_13/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block17_13/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block17_14/Branch_0/Conv2d_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block17_14/Branch_0/Conv2d_1x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block17_14/Branch_1/Conv2d_0a_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block17_14/Branch_1/Conv2d_0a_1x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block17_14/Branch_1/Conv2d_0b_1x7/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block17_14/Branch_1/Conv2d_0b_1x7/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block17_14/Branch_1/Conv2d_0c_7x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block17_14/Branch_1/Conv2d_0c_7x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block17_14/Conv2d_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block17_14/Conv2d_1x1/Conv2D/bias:0',\n",
+       " 'InceptionResnetV2/block17_14/Branch_0/Conv2d_1x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block17_14/Branch_0/Conv2d_1x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block17_14/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block17_14/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block17_14/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block17_14/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block17_14/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block17_14/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block17_15/Branch_0/Conv2d_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block17_15/Branch_0/Conv2d_1x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block17_15/Branch_1/Conv2d_0a_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block17_15/Branch_1/Conv2d_0a_1x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block17_15/Branch_1/Conv2d_0b_1x7/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block17_15/Branch_1/Conv2d_0b_1x7/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block17_15/Branch_1/Conv2d_0c_7x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block17_15/Branch_1/Conv2d_0c_7x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block17_15/Conv2d_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block17_15/Conv2d_1x1/Conv2D/bias:0',\n",
+       " 'InceptionResnetV2/block17_15/Branch_0/Conv2d_1x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block17_15/Branch_0/Conv2d_1x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block17_15/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block17_15/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block17_15/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block17_15/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block17_15/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block17_15/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block17_16/Branch_0/Conv2d_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block17_16/Branch_0/Conv2d_1x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block17_16/Branch_1/Conv2d_0a_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block17_16/Branch_1/Conv2d_0a_1x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block17_16/Branch_1/Conv2d_0b_1x7/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block17_16/Branch_1/Conv2d_0b_1x7/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block17_16/Branch_1/Conv2d_0c_7x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block17_16/Branch_1/Conv2d_0c_7x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block17_16/Conv2d_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block17_16/Conv2d_1x1/Conv2D/bias:0',\n",
+       " 'InceptionResnetV2/block17_16/Branch_0/Conv2d_1x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block17_16/Branch_0/Conv2d_1x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block17_16/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block17_16/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block17_16/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block17_16/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block17_16/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block17_16/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block17_17/Branch_0/Conv2d_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block17_17/Branch_0/Conv2d_1x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block17_17/Branch_1/Conv2d_0a_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block17_17/Branch_1/Conv2d_0a_1x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block17_17/Branch_1/Conv2d_0b_1x7/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block17_17/Branch_1/Conv2d_0b_1x7/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block17_17/Branch_1/Conv2d_0c_7x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block17_17/Branch_1/Conv2d_0c_7x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block17_17/Conv2d_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block17_17/Conv2d_1x1/Conv2D/bias:0',\n",
+       " 'InceptionResnetV2/block17_17/Branch_0/Conv2d_1x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block17_17/Branch_0/Conv2d_1x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block17_17/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block17_17/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block17_17/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block17_17/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block17_17/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block17_17/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block17_18/Branch_0/Conv2d_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block17_18/Branch_0/Conv2d_1x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block17_18/Branch_1/Conv2d_0a_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block17_18/Branch_1/Conv2d_0a_1x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block17_18/Branch_1/Conv2d_0b_1x7/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block17_18/Branch_1/Conv2d_0b_1x7/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block17_18/Branch_1/Conv2d_0c_7x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block17_18/Branch_1/Conv2d_0c_7x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block17_18/Conv2d_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block17_18/Conv2d_1x1/Conv2D/bias:0',\n",
+       " 'InceptionResnetV2/block17_18/Branch_0/Conv2d_1x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block17_18/Branch_0/Conv2d_1x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block17_18/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block17_18/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block17_18/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block17_18/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block17_18/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block17_18/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block17_19/Branch_0/Conv2d_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block17_19/Branch_0/Conv2d_1x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block17_19/Branch_1/Conv2d_0a_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block17_19/Branch_1/Conv2d_0a_1x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block17_19/Branch_1/Conv2d_0b_1x7/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block17_19/Branch_1/Conv2d_0b_1x7/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block17_19/Branch_1/Conv2d_0c_7x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block17_19/Branch_1/Conv2d_0c_7x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block17_19/Conv2d_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block17_19/Conv2d_1x1/Conv2D/bias:0',\n",
+       " 'InceptionResnetV2/block17_19/Branch_0/Conv2d_1x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block17_19/Branch_0/Conv2d_1x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block17_19/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block17_19/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block17_19/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block17_19/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block17_19/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block17_19/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block17_20/Branch_0/Conv2d_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block17_20/Branch_0/Conv2d_1x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block17_20/Branch_1/Conv2d_0a_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block17_20/Branch_1/Conv2d_0a_1x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block17_20/Branch_1/Conv2d_0b_1x7/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block17_20/Branch_1/Conv2d_0b_1x7/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block17_20/Branch_1/Conv2d_0c_7x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block17_20/Branch_1/Conv2d_0c_7x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block17_20/Conv2d_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block17_20/Conv2d_1x1/Conv2D/bias:0',\n",
+       " 'InceptionResnetV2/block17_20/Branch_0/Conv2d_1x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block17_20/Branch_0/Conv2d_1x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block17_20/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block17_20/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block17_20/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block17_20/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block17_20/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block17_20/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/Mixed_7a/Branch_0/Conv2d_0a_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/Mixed_7a/Branch_0/Conv2d_0a_1x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/Mixed_7a/Branch_0/Conv2d_1a_3x3/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/Mixed_7a/Branch_0/Conv2d_1a_3x3/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/Mixed_7a/Branch_1/Conv2d_0a_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/Mixed_7a/Branch_1/Conv2d_0a_1x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/Mixed_7a/Branch_1/Conv2d_1a_3x3/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/Mixed_7a/Branch_1/Conv2d_1a_3x3/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/Mixed_7a/Branch_2/Conv2d_0a_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/Mixed_7a/Branch_2/Conv2d_0a_1x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/Mixed_7a/Branch_2/Conv2d_0b_3x3/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/Mixed_7a/Branch_2/Conv2d_0b_3x3/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/Mixed_7a/Branch_2/Conv2d_1a_3x3/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/Mixed_7a/Branch_2/Conv2d_1a_3x3/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/Mixed_7a/Branch_0/Conv2d_0a_1x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/Mixed_7a/Branch_0/Conv2d_0a_1x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/Mixed_7a/Branch_0/Conv2d_1a_3x3/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/Mixed_7a/Branch_0/Conv2d_1a_3x3/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/Mixed_7a/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/Mixed_7a/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/Mixed_7a/Branch_1/Conv2d_1a_3x3/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/Mixed_7a/Branch_1/Conv2d_1a_3x3/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/Mixed_7a/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/Mixed_7a/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/Mixed_7a/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/Mixed_7a/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/Mixed_7a/Branch_2/Conv2d_1a_3x3/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/Mixed_7a/Branch_2/Conv2d_1a_3x3/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block8_1/Branch_0/Conv2d_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block8_1/Branch_0/Conv2d_1x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block8_1/Branch_1/Conv2d_0a_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block8_1/Branch_1/Conv2d_0a_1x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block8_1/Branch_1/Conv2d_0b_1x3/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block8_1/Branch_1/Conv2d_0b_1x3/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block8_1/Branch_1/Conv2d_0c_3x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block8_1/Branch_1/Conv2d_0c_3x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block8_1/Conv2d_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block8_1/Conv2d_1x1/Conv2D/bias:0',\n",
+       " 'InceptionResnetV2/block8_1/Branch_0/Conv2d_1x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block8_1/Branch_0/Conv2d_1x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block8_1/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block8_1/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block8_1/Branch_1/Conv2d_0b_1x3/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block8_1/Branch_1/Conv2d_0b_1x3/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block8_1/Branch_1/Conv2d_0c_3x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block8_1/Branch_1/Conv2d_0c_3x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block8_2/Branch_0/Conv2d_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block8_2/Branch_0/Conv2d_1x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block8_2/Branch_1/Conv2d_0a_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block8_2/Branch_1/Conv2d_0a_1x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block8_2/Branch_1/Conv2d_0b_1x3/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block8_2/Branch_1/Conv2d_0b_1x3/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block8_2/Branch_1/Conv2d_0c_3x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block8_2/Branch_1/Conv2d_0c_3x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block8_2/Conv2d_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block8_2/Conv2d_1x1/Conv2D/bias:0',\n",
+       " 'InceptionResnetV2/block8_2/Branch_0/Conv2d_1x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block8_2/Branch_0/Conv2d_1x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block8_2/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block8_2/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block8_2/Branch_1/Conv2d_0b_1x3/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block8_2/Branch_1/Conv2d_0b_1x3/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block8_2/Branch_1/Conv2d_0c_3x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block8_2/Branch_1/Conv2d_0c_3x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block8_3/Branch_0/Conv2d_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block8_3/Branch_0/Conv2d_1x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block8_3/Branch_1/Conv2d_0a_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block8_3/Branch_1/Conv2d_0a_1x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block8_3/Branch_1/Conv2d_0b_1x3/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block8_3/Branch_1/Conv2d_0b_1x3/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block8_3/Branch_1/Conv2d_0c_3x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block8_3/Branch_1/Conv2d_0c_3x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block8_3/Conv2d_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block8_3/Conv2d_1x1/Conv2D/bias:0',\n",
+       " 'InceptionResnetV2/block8_3/Branch_0/Conv2d_1x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block8_3/Branch_0/Conv2d_1x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block8_3/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block8_3/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block8_3/Branch_1/Conv2d_0b_1x3/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block8_3/Branch_1/Conv2d_0b_1x3/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block8_3/Branch_1/Conv2d_0c_3x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block8_3/Branch_1/Conv2d_0c_3x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block8_4/Branch_0/Conv2d_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block8_4/Branch_0/Conv2d_1x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block8_4/Branch_1/Conv2d_0a_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block8_4/Branch_1/Conv2d_0a_1x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block8_4/Branch_1/Conv2d_0b_1x3/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block8_4/Branch_1/Conv2d_0b_1x3/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block8_4/Branch_1/Conv2d_0c_3x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block8_4/Branch_1/Conv2d_0c_3x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block8_4/Conv2d_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block8_4/Conv2d_1x1/Conv2D/bias:0',\n",
+       " 'InceptionResnetV2/block8_4/Branch_0/Conv2d_1x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block8_4/Branch_0/Conv2d_1x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block8_4/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block8_4/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block8_4/Branch_1/Conv2d_0b_1x3/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block8_4/Branch_1/Conv2d_0b_1x3/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block8_4/Branch_1/Conv2d_0c_3x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block8_4/Branch_1/Conv2d_0c_3x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block8_5/Branch_0/Conv2d_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block8_5/Branch_0/Conv2d_1x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block8_5/Branch_1/Conv2d_0a_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block8_5/Branch_1/Conv2d_0a_1x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block8_5/Branch_1/Conv2d_0b_1x3/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block8_5/Branch_1/Conv2d_0b_1x3/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block8_5/Branch_1/Conv2d_0c_3x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block8_5/Branch_1/Conv2d_0c_3x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block8_5/Conv2d_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block8_5/Conv2d_1x1/Conv2D/bias:0',\n",
+       " 'InceptionResnetV2/block8_5/Branch_0/Conv2d_1x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block8_5/Branch_0/Conv2d_1x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block8_5/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block8_5/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block8_5/Branch_1/Conv2d_0b_1x3/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block8_5/Branch_1/Conv2d_0b_1x3/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block8_5/Branch_1/Conv2d_0c_3x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block8_5/Branch_1/Conv2d_0c_3x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block8_6/Branch_0/Conv2d_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block8_6/Branch_0/Conv2d_1x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block8_6/Branch_1/Conv2d_0a_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block8_6/Branch_1/Conv2d_0a_1x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block8_6/Branch_1/Conv2d_0b_1x3/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block8_6/Branch_1/Conv2d_0b_1x3/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block8_6/Branch_1/Conv2d_0c_3x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block8_6/Branch_1/Conv2d_0c_3x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block8_6/Conv2d_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block8_6/Conv2d_1x1/Conv2D/bias:0',\n",
+       " 'InceptionResnetV2/block8_6/Branch_0/Conv2d_1x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block8_6/Branch_0/Conv2d_1x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block8_6/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block8_6/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block8_6/Branch_1/Conv2d_0b_1x3/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block8_6/Branch_1/Conv2d_0b_1x3/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block8_6/Branch_1/Conv2d_0c_3x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block8_6/Branch_1/Conv2d_0c_3x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block8_7/Branch_0/Conv2d_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block8_7/Branch_0/Conv2d_1x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block8_7/Branch_1/Conv2d_0a_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block8_7/Branch_1/Conv2d_0a_1x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block8_7/Branch_1/Conv2d_0b_1x3/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block8_7/Branch_1/Conv2d_0b_1x3/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block8_7/Branch_1/Conv2d_0c_3x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block8_7/Branch_1/Conv2d_0c_3x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block8_7/Conv2d_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block8_7/Conv2d_1x1/Conv2D/bias:0',\n",
+       " 'InceptionResnetV2/block8_7/Branch_0/Conv2d_1x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block8_7/Branch_0/Conv2d_1x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block8_7/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block8_7/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block8_7/Branch_1/Conv2d_0b_1x3/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block8_7/Branch_1/Conv2d_0b_1x3/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block8_7/Branch_1/Conv2d_0c_3x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block8_7/Branch_1/Conv2d_0c_3x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block8_8/Branch_0/Conv2d_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block8_8/Branch_0/Conv2d_1x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block8_8/Branch_1/Conv2d_0a_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block8_8/Branch_1/Conv2d_0a_1x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block8_8/Branch_1/Conv2d_0b_1x3/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block8_8/Branch_1/Conv2d_0b_1x3/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block8_8/Branch_1/Conv2d_0c_3x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block8_8/Branch_1/Conv2d_0c_3x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block8_8/Conv2d_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block8_8/Conv2d_1x1/Conv2D/bias:0',\n",
+       " 'InceptionResnetV2/block8_8/Branch_0/Conv2d_1x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block8_8/Branch_0/Conv2d_1x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block8_8/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block8_8/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block8_8/Branch_1/Conv2d_0b_1x3/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block8_8/Branch_1/Conv2d_0b_1x3/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block8_8/Branch_1/Conv2d_0c_3x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block8_8/Branch_1/Conv2d_0c_3x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block8_9/Branch_0/Conv2d_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block8_9/Branch_0/Conv2d_1x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block8_9/Branch_1/Conv2d_0a_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block8_9/Branch_1/Conv2d_0a_1x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block8_9/Branch_1/Conv2d_0b_1x3/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block8_9/Branch_1/Conv2d_0b_1x3/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block8_9/Branch_1/Conv2d_0c_3x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block8_9/Branch_1/Conv2d_0c_3x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block8_9/Conv2d_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block8_9/Conv2d_1x1/Conv2D/bias:0',\n",
+       " 'InceptionResnetV2/block8_9/Branch_0/Conv2d_1x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block8_9/Branch_0/Conv2d_1x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block8_9/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block8_9/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block8_9/Branch_1/Conv2d_0b_1x3/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block8_9/Branch_1/Conv2d_0b_1x3/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block8_9/Branch_1/Conv2d_0c_3x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block8_9/Branch_1/Conv2d_0c_3x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block8_10/Branch_0/Conv2d_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block8_10/Branch_0/Conv2d_1x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block8_10/Branch_1/Conv2d_0a_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block8_10/Branch_1/Conv2d_0a_1x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block8_10/Branch_1/Conv2d_0b_1x3/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block8_10/Branch_1/Conv2d_0b_1x3/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block8_10/Branch_1/Conv2d_0c_3x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block8_10/Branch_1/Conv2d_0c_3x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/block8_10/Conv2d_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/block8_10/Conv2d_1x1/Conv2D/bias:0',\n",
+       " 'InceptionResnetV2/block8_10/Branch_0/Conv2d_1x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block8_10/Branch_0/Conv2d_1x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block8_10/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block8_10/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block8_10/Branch_1/Conv2d_0b_1x3/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block8_10/Branch_1/Conv2d_0b_1x3/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/block8_10/Branch_1/Conv2d_0c_3x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/block8_10/Branch_1/Conv2d_0c_3x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/Conv2d_7b_1x1/Conv2D/kernel:0',\n",
+       " 'InceptionResnetV2/Conv2d_7b_1x1/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/Conv2d_7b_1x1/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/Conv2d_7b_1x1/BatchNorm/moving_variance:0',\n",
+       " 'InceptionResnetV2/Bottleneck/kernel:0',\n",
+       " 'InceptionResnetV2/Bottleneck/BatchNorm/beta:0',\n",
+       " 'InceptionResnetV2/Bottleneck/BatchNorm/moving_mean:0',\n",
+       " 'InceptionResnetV2/Bottleneck/BatchNorm/moving_variance:0']"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# investigate the variable names in the model\n",
+    "[var.name for var in model.variables]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {
+    "collapsed": true,
+    "jupyter": {
+     "outputs_hidden": true
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "WARNING:tensorflow:From /idiap/home/amohammadi/git/bob.bio.face_ongoing/src/bob.learn.tensorflow/bob/learn/tensorflow/utils/keras.py:121: The name tf.keras.backend.get_session is deprecated. Please use tf.compat.v1.keras.backend.get_session instead.\n",
+      "\n",
+      "InceptionResnetV2/Conv2d_1a_3x3/Conv2D/kernel -> InceptionResnetV2/Conv2d_1a_3x3/weights\n",
+      "InceptionResnetV2/Conv2d_1a_3x3/BatchNorm/beta -> InceptionResnetV2/Conv2d_1a_3x3/BatchNorm/beta\n",
+      "InceptionResnetV2/Conv2d_1a_3x3/BatchNorm/moving_mean -> InceptionResnetV2/Conv2d_1a_3x3/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/Conv2d_1a_3x3/BatchNorm/moving_variance -> InceptionResnetV2/Conv2d_1a_3x3/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/Conv2d_2a_3x3/Conv2D/kernel -> InceptionResnetV2/Conv2d_2a_3x3/weights\n",
+      "InceptionResnetV2/Conv2d_2a_3x3/BatchNorm/beta -> InceptionResnetV2/Conv2d_2a_3x3/BatchNorm/beta\n",
+      "InceptionResnetV2/Conv2d_2a_3x3/BatchNorm/moving_mean -> InceptionResnetV2/Conv2d_2a_3x3/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/Conv2d_2a_3x3/BatchNorm/moving_variance -> InceptionResnetV2/Conv2d_2a_3x3/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/Conv2d_2b_3x3/Conv2D/kernel -> InceptionResnetV2/Conv2d_2b_3x3/weights\n",
+      "InceptionResnetV2/Conv2d_2b_3x3/BatchNorm/beta -> InceptionResnetV2/Conv2d_2b_3x3/BatchNorm/beta\n",
+      "InceptionResnetV2/Conv2d_2b_3x3/BatchNorm/moving_mean -> InceptionResnetV2/Conv2d_2b_3x3/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/Conv2d_2b_3x3/BatchNorm/moving_variance -> InceptionResnetV2/Conv2d_2b_3x3/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/Conv2d_3b_1x1/Conv2D/kernel -> InceptionResnetV2/Conv2d_3b_1x1/weights\n",
+      "InceptionResnetV2/Conv2d_3b_1x1/BatchNorm/beta -> InceptionResnetV2/Conv2d_3b_1x1/BatchNorm/beta\n",
+      "InceptionResnetV2/Conv2d_3b_1x1/BatchNorm/moving_mean -> InceptionResnetV2/Conv2d_3b_1x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/Conv2d_3b_1x1/BatchNorm/moving_variance -> InceptionResnetV2/Conv2d_3b_1x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/Conv2d_4a_3x3/Conv2D/kernel -> InceptionResnetV2/Conv2d_4a_3x3/weights\n",
+      "InceptionResnetV2/Conv2d_4a_3x3/BatchNorm/beta -> InceptionResnetV2/Conv2d_4a_3x3/BatchNorm/beta\n",
+      "InceptionResnetV2/Conv2d_4a_3x3/BatchNorm/moving_mean -> InceptionResnetV2/Conv2d_4a_3x3/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/Conv2d_4a_3x3/BatchNorm/moving_variance -> InceptionResnetV2/Conv2d_4a_3x3/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/Mixed_5b/Branch_0/Conv2d_1x1/Conv2D/kernel -> InceptionResnetV2/Mixed_5b/Branch_0/Conv2d_1x1/weights\n",
+      "InceptionResnetV2/Mixed_5b/Branch_0/Conv2d_1x1/BatchNorm/beta -> InceptionResnetV2/Mixed_5b/Branch_0/Conv2d_1x1/BatchNorm/beta\n",
+      "InceptionResnetV2/Mixed_5b/Branch_2/Conv2d_0a_1x1/Conv2D/kernel -> InceptionResnetV2/Mixed_5b/Branch_2/Conv2d_0a_1x1/weights\n",
+      "InceptionResnetV2/Mixed_5b/Branch_2/Conv2d_0a_1x1/BatchNorm/beta -> InceptionResnetV2/Mixed_5b/Branch_2/Conv2d_0a_1x1/BatchNorm/beta\n",
+      "InceptionResnetV2/Mixed_5b/Branch_2/Conv2d_0b_3x3/Conv2D/kernel -> InceptionResnetV2/Mixed_5b/Branch_2/Conv2d_0b_3x3/weights\n",
+      "InceptionResnetV2/Mixed_5b/Branch_2/Conv2d_0b_3x3/BatchNorm/beta -> InceptionResnetV2/Mixed_5b/Branch_2/Conv2d_0b_3x3/BatchNorm/beta\n",
+      "InceptionResnetV2/Mixed_5b/Branch_2/Conv2d_0c_3x3/Conv2D/kernel -> InceptionResnetV2/Mixed_5b/Branch_2/Conv2d_0c_3x3/weights\n",
+      "InceptionResnetV2/Mixed_5b/Branch_2/Conv2d_0c_3x3/BatchNorm/beta -> InceptionResnetV2/Mixed_5b/Branch_2/Conv2d_0c_3x3/BatchNorm/beta\n",
+      "InceptionResnetV2/Mixed_5b/Branch_1/Conv2d_0a_1x1/Conv2D/kernel -> InceptionResnetV2/Mixed_5b/Branch_1/Conv2d_0a_1x1/weights\n",
+      "InceptionResnetV2/Mixed_5b/Branch_1/Conv2d_0a_1x1/BatchNorm/beta -> InceptionResnetV2/Mixed_5b/Branch_1/Conv2d_0a_1x1/BatchNorm/beta\n",
+      "InceptionResnetV2/Mixed_5b/Branch_1/Conv2d_0b_5x5/Conv2D/kernel -> InceptionResnetV2/Mixed_5b/Branch_1/Conv2d_0b_5x5/weights\n",
+      "InceptionResnetV2/Mixed_5b/Branch_1/Conv2d_0b_5x5/BatchNorm/beta -> InceptionResnetV2/Mixed_5b/Branch_1/Conv2d_0b_5x5/BatchNorm/beta\n",
+      "InceptionResnetV2/Mixed_5b/Branch_3/Conv2d_0b_1x1/Conv2D/kernel -> InceptionResnetV2/Mixed_5b/Branch_3/Conv2d_0b_1x1/weights\n",
+      "InceptionResnetV2/Mixed_5b/Branch_3/Conv2d_0b_1x1/BatchNorm/beta -> InceptionResnetV2/Mixed_5b/Branch_3/Conv2d_0b_1x1/BatchNorm/beta\n",
+      "InceptionResnetV2/Mixed_5b/Branch_0/Conv2d_1x1/BatchNorm/moving_mean -> InceptionResnetV2/Mixed_5b/Branch_0/Conv2d_1x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/Mixed_5b/Branch_0/Conv2d_1x1/BatchNorm/moving_variance -> InceptionResnetV2/Mixed_5b/Branch_0/Conv2d_1x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/Mixed_5b/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_mean -> InceptionResnetV2/Mixed_5b/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/Mixed_5b/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_variance -> InceptionResnetV2/Mixed_5b/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/Mixed_5b/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_mean -> InceptionResnetV2/Mixed_5b/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/Mixed_5b/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_variance -> InceptionResnetV2/Mixed_5b/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/Mixed_5b/Branch_2/Conv2d_0c_3x3/BatchNorm/moving_mean -> InceptionResnetV2/Mixed_5b/Branch_2/Conv2d_0c_3x3/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/Mixed_5b/Branch_2/Conv2d_0c_3x3/BatchNorm/moving_variance -> InceptionResnetV2/Mixed_5b/Branch_2/Conv2d_0c_3x3/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/Mixed_5b/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean -> InceptionResnetV2/Mixed_5b/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/Mixed_5b/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance -> InceptionResnetV2/Mixed_5b/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/Mixed_5b/Branch_1/Conv2d_0b_5x5/BatchNorm/moving_mean -> InceptionResnetV2/Mixed_5b/Branch_1/Conv2d_0b_5x5/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/Mixed_5b/Branch_1/Conv2d_0b_5x5/BatchNorm/moving_variance -> InceptionResnetV2/Mixed_5b/Branch_1/Conv2d_0b_5x5/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/Mixed_5b/Branch_3/Conv2d_0b_1x1/BatchNorm/moving_mean -> InceptionResnetV2/Mixed_5b/Branch_3/Conv2d_0b_1x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/Mixed_5b/Branch_3/Conv2d_0b_1x1/BatchNorm/moving_variance -> InceptionResnetV2/Mixed_5b/Branch_3/Conv2d_0b_1x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block35_1/Branch_0/Conv2d_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat/block35_1/Branch_0/Conv2d_1x1/weights\n",
+      "InceptionResnetV2/block35_1/Branch_0/Conv2d_1x1/BatchNorm/beta -> InceptionResnetV2/Repeat/block35_1/Branch_0/Conv2d_1x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block35_1/Branch_1/Conv2d_0a_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat/block35_1/Branch_1/Conv2d_0a_1x1/weights\n",
+      "InceptionResnetV2/block35_1/Branch_1/Conv2d_0a_1x1/BatchNorm/beta -> InceptionResnetV2/Repeat/block35_1/Branch_1/Conv2d_0a_1x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block35_1/Branch_1/Conv2d_0b_3x3/Conv2D/kernel -> InceptionResnetV2/Repeat/block35_1/Branch_1/Conv2d_0b_3x3/weights\n",
+      "InceptionResnetV2/block35_1/Branch_1/Conv2d_0b_3x3/BatchNorm/beta -> InceptionResnetV2/Repeat/block35_1/Branch_1/Conv2d_0b_3x3/BatchNorm/beta\n",
+      "InceptionResnetV2/block35_1/Branch_2/Conv2d_0a_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat/block35_1/Branch_2/Conv2d_0a_1x1/weights\n",
+      "InceptionResnetV2/block35_1/Branch_2/Conv2d_0a_1x1/BatchNorm/beta -> InceptionResnetV2/Repeat/block35_1/Branch_2/Conv2d_0a_1x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block35_1/Branch_2/Conv2d_0b_3x3/Conv2D/kernel -> InceptionResnetV2/Repeat/block35_1/Branch_2/Conv2d_0b_3x3/weights\n",
+      "InceptionResnetV2/block35_1/Branch_2/Conv2d_0b_3x3/BatchNorm/beta -> InceptionResnetV2/Repeat/block35_1/Branch_2/Conv2d_0b_3x3/BatchNorm/beta\n",
+      "InceptionResnetV2/block35_1/Branch_2/Conv2d_0c_3x3/Conv2D/kernel -> InceptionResnetV2/Repeat/block35_1/Branch_2/Conv2d_0c_3x3/weights\n",
+      "InceptionResnetV2/block35_1/Branch_2/Conv2d_0c_3x3/BatchNorm/beta -> InceptionResnetV2/Repeat/block35_1/Branch_2/Conv2d_0c_3x3/BatchNorm/beta\n",
+      "InceptionResnetV2/block35_1/Conv2d_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat/block35_1/Conv2d_1x1/weights\n",
+      "InceptionResnetV2/block35_1/Conv2d_1x1/Conv2D/bias -> InceptionResnetV2/Repeat/block35_1/Conv2d_1x1/biases\n",
+      "InceptionResnetV2/block35_1/Branch_0/Conv2d_1x1/BatchNorm/moving_mean -> InceptionResnetV2/Repeat/block35_1/Branch_0/Conv2d_1x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block35_1/Branch_0/Conv2d_1x1/BatchNorm/moving_variance -> InceptionResnetV2/Repeat/block35_1/Branch_0/Conv2d_1x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block35_1/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean -> InceptionResnetV2/Repeat/block35_1/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block35_1/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance -> InceptionResnetV2/Repeat/block35_1/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block35_1/Branch_1/Conv2d_0b_3x3/BatchNorm/moving_mean -> InceptionResnetV2/Repeat/block35_1/Branch_1/Conv2d_0b_3x3/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block35_1/Branch_1/Conv2d_0b_3x3/BatchNorm/moving_variance -> InceptionResnetV2/Repeat/block35_1/Branch_1/Conv2d_0b_3x3/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block35_1/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_mean -> InceptionResnetV2/Repeat/block35_1/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block35_1/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_variance -> InceptionResnetV2/Repeat/block35_1/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block35_1/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_mean -> InceptionResnetV2/Repeat/block35_1/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block35_1/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_variance -> InceptionResnetV2/Repeat/block35_1/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block35_1/Branch_2/Conv2d_0c_3x3/BatchNorm/moving_mean -> InceptionResnetV2/Repeat/block35_1/Branch_2/Conv2d_0c_3x3/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block35_1/Branch_2/Conv2d_0c_3x3/BatchNorm/moving_variance -> InceptionResnetV2/Repeat/block35_1/Branch_2/Conv2d_0c_3x3/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block35_2/Branch_0/Conv2d_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat/block35_2/Branch_0/Conv2d_1x1/weights\n",
+      "InceptionResnetV2/block35_2/Branch_0/Conv2d_1x1/BatchNorm/beta -> InceptionResnetV2/Repeat/block35_2/Branch_0/Conv2d_1x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block35_2/Branch_1/Conv2d_0a_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat/block35_2/Branch_1/Conv2d_0a_1x1/weights\n",
+      "InceptionResnetV2/block35_2/Branch_1/Conv2d_0a_1x1/BatchNorm/beta -> InceptionResnetV2/Repeat/block35_2/Branch_1/Conv2d_0a_1x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block35_2/Branch_1/Conv2d_0b_3x3/Conv2D/kernel -> InceptionResnetV2/Repeat/block35_2/Branch_1/Conv2d_0b_3x3/weights\n",
+      "InceptionResnetV2/block35_2/Branch_1/Conv2d_0b_3x3/BatchNorm/beta -> InceptionResnetV2/Repeat/block35_2/Branch_1/Conv2d_0b_3x3/BatchNorm/beta\n",
+      "InceptionResnetV2/block35_2/Branch_2/Conv2d_0a_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat/block35_2/Branch_2/Conv2d_0a_1x1/weights\n",
+      "InceptionResnetV2/block35_2/Branch_2/Conv2d_0a_1x1/BatchNorm/beta -> InceptionResnetV2/Repeat/block35_2/Branch_2/Conv2d_0a_1x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block35_2/Branch_2/Conv2d_0b_3x3/Conv2D/kernel -> InceptionResnetV2/Repeat/block35_2/Branch_2/Conv2d_0b_3x3/weights\n",
+      "InceptionResnetV2/block35_2/Branch_2/Conv2d_0b_3x3/BatchNorm/beta -> InceptionResnetV2/Repeat/block35_2/Branch_2/Conv2d_0b_3x3/BatchNorm/beta\n",
+      "InceptionResnetV2/block35_2/Branch_2/Conv2d_0c_3x3/Conv2D/kernel -> InceptionResnetV2/Repeat/block35_2/Branch_2/Conv2d_0c_3x3/weights\n",
+      "InceptionResnetV2/block35_2/Branch_2/Conv2d_0c_3x3/BatchNorm/beta -> InceptionResnetV2/Repeat/block35_2/Branch_2/Conv2d_0c_3x3/BatchNorm/beta\n",
+      "InceptionResnetV2/block35_2/Conv2d_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat/block35_2/Conv2d_1x1/weights\n",
+      "InceptionResnetV2/block35_2/Conv2d_1x1/Conv2D/bias -> InceptionResnetV2/Repeat/block35_2/Conv2d_1x1/biases\n",
+      "InceptionResnetV2/block35_2/Branch_0/Conv2d_1x1/BatchNorm/moving_mean -> InceptionResnetV2/Repeat/block35_2/Branch_0/Conv2d_1x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block35_2/Branch_0/Conv2d_1x1/BatchNorm/moving_variance -> InceptionResnetV2/Repeat/block35_2/Branch_0/Conv2d_1x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block35_2/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean -> InceptionResnetV2/Repeat/block35_2/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block35_2/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance -> InceptionResnetV2/Repeat/block35_2/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block35_2/Branch_1/Conv2d_0b_3x3/BatchNorm/moving_mean -> InceptionResnetV2/Repeat/block35_2/Branch_1/Conv2d_0b_3x3/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block35_2/Branch_1/Conv2d_0b_3x3/BatchNorm/moving_variance -> InceptionResnetV2/Repeat/block35_2/Branch_1/Conv2d_0b_3x3/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block35_2/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_mean -> InceptionResnetV2/Repeat/block35_2/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block35_2/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_variance -> InceptionResnetV2/Repeat/block35_2/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block35_2/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_mean -> InceptionResnetV2/Repeat/block35_2/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block35_2/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_variance -> InceptionResnetV2/Repeat/block35_2/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block35_2/Branch_2/Conv2d_0c_3x3/BatchNorm/moving_mean -> InceptionResnetV2/Repeat/block35_2/Branch_2/Conv2d_0c_3x3/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block35_2/Branch_2/Conv2d_0c_3x3/BatchNorm/moving_variance -> InceptionResnetV2/Repeat/block35_2/Branch_2/Conv2d_0c_3x3/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block35_3/Branch_0/Conv2d_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat/block35_3/Branch_0/Conv2d_1x1/weights\n",
+      "InceptionResnetV2/block35_3/Branch_0/Conv2d_1x1/BatchNorm/beta -> InceptionResnetV2/Repeat/block35_3/Branch_0/Conv2d_1x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block35_3/Branch_1/Conv2d_0a_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat/block35_3/Branch_1/Conv2d_0a_1x1/weights\n",
+      "InceptionResnetV2/block35_3/Branch_1/Conv2d_0a_1x1/BatchNorm/beta -> InceptionResnetV2/Repeat/block35_3/Branch_1/Conv2d_0a_1x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block35_3/Branch_1/Conv2d_0b_3x3/Conv2D/kernel -> InceptionResnetV2/Repeat/block35_3/Branch_1/Conv2d_0b_3x3/weights\n",
+      "InceptionResnetV2/block35_3/Branch_1/Conv2d_0b_3x3/BatchNorm/beta -> InceptionResnetV2/Repeat/block35_3/Branch_1/Conv2d_0b_3x3/BatchNorm/beta\n",
+      "InceptionResnetV2/block35_3/Branch_2/Conv2d_0a_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat/block35_3/Branch_2/Conv2d_0a_1x1/weights\n",
+      "InceptionResnetV2/block35_3/Branch_2/Conv2d_0a_1x1/BatchNorm/beta -> InceptionResnetV2/Repeat/block35_3/Branch_2/Conv2d_0a_1x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block35_3/Branch_2/Conv2d_0b_3x3/Conv2D/kernel -> InceptionResnetV2/Repeat/block35_3/Branch_2/Conv2d_0b_3x3/weights\n",
+      "InceptionResnetV2/block35_3/Branch_2/Conv2d_0b_3x3/BatchNorm/beta -> InceptionResnetV2/Repeat/block35_3/Branch_2/Conv2d_0b_3x3/BatchNorm/beta\n",
+      "InceptionResnetV2/block35_3/Branch_2/Conv2d_0c_3x3/Conv2D/kernel -> InceptionResnetV2/Repeat/block35_3/Branch_2/Conv2d_0c_3x3/weights\n",
+      "InceptionResnetV2/block35_3/Branch_2/Conv2d_0c_3x3/BatchNorm/beta -> InceptionResnetV2/Repeat/block35_3/Branch_2/Conv2d_0c_3x3/BatchNorm/beta\n",
+      "InceptionResnetV2/block35_3/Conv2d_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat/block35_3/Conv2d_1x1/weights\n",
+      "InceptionResnetV2/block35_3/Conv2d_1x1/Conv2D/bias -> InceptionResnetV2/Repeat/block35_3/Conv2d_1x1/biases\n",
+      "InceptionResnetV2/block35_3/Branch_0/Conv2d_1x1/BatchNorm/moving_mean -> InceptionResnetV2/Repeat/block35_3/Branch_0/Conv2d_1x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block35_3/Branch_0/Conv2d_1x1/BatchNorm/moving_variance -> InceptionResnetV2/Repeat/block35_3/Branch_0/Conv2d_1x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block35_3/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean -> InceptionResnetV2/Repeat/block35_3/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block35_3/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance -> InceptionResnetV2/Repeat/block35_3/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block35_3/Branch_1/Conv2d_0b_3x3/BatchNorm/moving_mean -> InceptionResnetV2/Repeat/block35_3/Branch_1/Conv2d_0b_3x3/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block35_3/Branch_1/Conv2d_0b_3x3/BatchNorm/moving_variance -> InceptionResnetV2/Repeat/block35_3/Branch_1/Conv2d_0b_3x3/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block35_3/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_mean -> InceptionResnetV2/Repeat/block35_3/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block35_3/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_variance -> InceptionResnetV2/Repeat/block35_3/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block35_3/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_mean -> InceptionResnetV2/Repeat/block35_3/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block35_3/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_variance -> InceptionResnetV2/Repeat/block35_3/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block35_3/Branch_2/Conv2d_0c_3x3/BatchNorm/moving_mean -> InceptionResnetV2/Repeat/block35_3/Branch_2/Conv2d_0c_3x3/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block35_3/Branch_2/Conv2d_0c_3x3/BatchNorm/moving_variance -> InceptionResnetV2/Repeat/block35_3/Branch_2/Conv2d_0c_3x3/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block35_4/Branch_0/Conv2d_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat/block35_4/Branch_0/Conv2d_1x1/weights\n",
+      "InceptionResnetV2/block35_4/Branch_0/Conv2d_1x1/BatchNorm/beta -> InceptionResnetV2/Repeat/block35_4/Branch_0/Conv2d_1x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block35_4/Branch_1/Conv2d_0a_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat/block35_4/Branch_1/Conv2d_0a_1x1/weights\n",
+      "InceptionResnetV2/block35_4/Branch_1/Conv2d_0a_1x1/BatchNorm/beta -> InceptionResnetV2/Repeat/block35_4/Branch_1/Conv2d_0a_1x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block35_4/Branch_1/Conv2d_0b_3x3/Conv2D/kernel -> InceptionResnetV2/Repeat/block35_4/Branch_1/Conv2d_0b_3x3/weights\n",
+      "InceptionResnetV2/block35_4/Branch_1/Conv2d_0b_3x3/BatchNorm/beta -> InceptionResnetV2/Repeat/block35_4/Branch_1/Conv2d_0b_3x3/BatchNorm/beta\n",
+      "InceptionResnetV2/block35_4/Branch_2/Conv2d_0a_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat/block35_4/Branch_2/Conv2d_0a_1x1/weights\n",
+      "InceptionResnetV2/block35_4/Branch_2/Conv2d_0a_1x1/BatchNorm/beta -> InceptionResnetV2/Repeat/block35_4/Branch_2/Conv2d_0a_1x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block35_4/Branch_2/Conv2d_0b_3x3/Conv2D/kernel -> InceptionResnetV2/Repeat/block35_4/Branch_2/Conv2d_0b_3x3/weights\n",
+      "InceptionResnetV2/block35_4/Branch_2/Conv2d_0b_3x3/BatchNorm/beta -> InceptionResnetV2/Repeat/block35_4/Branch_2/Conv2d_0b_3x3/BatchNorm/beta\n",
+      "InceptionResnetV2/block35_4/Branch_2/Conv2d_0c_3x3/Conv2D/kernel -> InceptionResnetV2/Repeat/block35_4/Branch_2/Conv2d_0c_3x3/weights\n",
+      "InceptionResnetV2/block35_4/Branch_2/Conv2d_0c_3x3/BatchNorm/beta -> InceptionResnetV2/Repeat/block35_4/Branch_2/Conv2d_0c_3x3/BatchNorm/beta\n",
+      "InceptionResnetV2/block35_4/Conv2d_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat/block35_4/Conv2d_1x1/weights\n",
+      "InceptionResnetV2/block35_4/Conv2d_1x1/Conv2D/bias -> InceptionResnetV2/Repeat/block35_4/Conv2d_1x1/biases\n",
+      "InceptionResnetV2/block35_4/Branch_0/Conv2d_1x1/BatchNorm/moving_mean -> InceptionResnetV2/Repeat/block35_4/Branch_0/Conv2d_1x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block35_4/Branch_0/Conv2d_1x1/BatchNorm/moving_variance -> InceptionResnetV2/Repeat/block35_4/Branch_0/Conv2d_1x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block35_4/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean -> InceptionResnetV2/Repeat/block35_4/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block35_4/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance -> InceptionResnetV2/Repeat/block35_4/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block35_4/Branch_1/Conv2d_0b_3x3/BatchNorm/moving_mean -> InceptionResnetV2/Repeat/block35_4/Branch_1/Conv2d_0b_3x3/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block35_4/Branch_1/Conv2d_0b_3x3/BatchNorm/moving_variance -> InceptionResnetV2/Repeat/block35_4/Branch_1/Conv2d_0b_3x3/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block35_4/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_mean -> InceptionResnetV2/Repeat/block35_4/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block35_4/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_variance -> InceptionResnetV2/Repeat/block35_4/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block35_4/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_mean -> InceptionResnetV2/Repeat/block35_4/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block35_4/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_variance -> InceptionResnetV2/Repeat/block35_4/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block35_4/Branch_2/Conv2d_0c_3x3/BatchNorm/moving_mean -> InceptionResnetV2/Repeat/block35_4/Branch_2/Conv2d_0c_3x3/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block35_4/Branch_2/Conv2d_0c_3x3/BatchNorm/moving_variance -> InceptionResnetV2/Repeat/block35_4/Branch_2/Conv2d_0c_3x3/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block35_5/Branch_0/Conv2d_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat/block35_5/Branch_0/Conv2d_1x1/weights\n",
+      "InceptionResnetV2/block35_5/Branch_0/Conv2d_1x1/BatchNorm/beta -> InceptionResnetV2/Repeat/block35_5/Branch_0/Conv2d_1x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block35_5/Branch_1/Conv2d_0a_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat/block35_5/Branch_1/Conv2d_0a_1x1/weights\n",
+      "InceptionResnetV2/block35_5/Branch_1/Conv2d_0a_1x1/BatchNorm/beta -> InceptionResnetV2/Repeat/block35_5/Branch_1/Conv2d_0a_1x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block35_5/Branch_1/Conv2d_0b_3x3/Conv2D/kernel -> InceptionResnetV2/Repeat/block35_5/Branch_1/Conv2d_0b_3x3/weights\n",
+      "InceptionResnetV2/block35_5/Branch_1/Conv2d_0b_3x3/BatchNorm/beta -> InceptionResnetV2/Repeat/block35_5/Branch_1/Conv2d_0b_3x3/BatchNorm/beta\n",
+      "InceptionResnetV2/block35_5/Branch_2/Conv2d_0a_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat/block35_5/Branch_2/Conv2d_0a_1x1/weights\n",
+      "InceptionResnetV2/block35_5/Branch_2/Conv2d_0a_1x1/BatchNorm/beta -> InceptionResnetV2/Repeat/block35_5/Branch_2/Conv2d_0a_1x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block35_5/Branch_2/Conv2d_0b_3x3/Conv2D/kernel -> InceptionResnetV2/Repeat/block35_5/Branch_2/Conv2d_0b_3x3/weights\n",
+      "InceptionResnetV2/block35_5/Branch_2/Conv2d_0b_3x3/BatchNorm/beta -> InceptionResnetV2/Repeat/block35_5/Branch_2/Conv2d_0b_3x3/BatchNorm/beta\n",
+      "InceptionResnetV2/block35_5/Branch_2/Conv2d_0c_3x3/Conv2D/kernel -> InceptionResnetV2/Repeat/block35_5/Branch_2/Conv2d_0c_3x3/weights\n",
+      "InceptionResnetV2/block35_5/Branch_2/Conv2d_0c_3x3/BatchNorm/beta -> InceptionResnetV2/Repeat/block35_5/Branch_2/Conv2d_0c_3x3/BatchNorm/beta\n",
+      "InceptionResnetV2/block35_5/Conv2d_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat/block35_5/Conv2d_1x1/weights\n",
+      "InceptionResnetV2/block35_5/Conv2d_1x1/Conv2D/bias -> InceptionResnetV2/Repeat/block35_5/Conv2d_1x1/biases\n",
+      "InceptionResnetV2/block35_5/Branch_0/Conv2d_1x1/BatchNorm/moving_mean -> InceptionResnetV2/Repeat/block35_5/Branch_0/Conv2d_1x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block35_5/Branch_0/Conv2d_1x1/BatchNorm/moving_variance -> InceptionResnetV2/Repeat/block35_5/Branch_0/Conv2d_1x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block35_5/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean -> InceptionResnetV2/Repeat/block35_5/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block35_5/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance -> InceptionResnetV2/Repeat/block35_5/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block35_5/Branch_1/Conv2d_0b_3x3/BatchNorm/moving_mean -> InceptionResnetV2/Repeat/block35_5/Branch_1/Conv2d_0b_3x3/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block35_5/Branch_1/Conv2d_0b_3x3/BatchNorm/moving_variance -> InceptionResnetV2/Repeat/block35_5/Branch_1/Conv2d_0b_3x3/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block35_5/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_mean -> InceptionResnetV2/Repeat/block35_5/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block35_5/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_variance -> InceptionResnetV2/Repeat/block35_5/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block35_5/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_mean -> InceptionResnetV2/Repeat/block35_5/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block35_5/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_variance -> InceptionResnetV2/Repeat/block35_5/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block35_5/Branch_2/Conv2d_0c_3x3/BatchNorm/moving_mean -> InceptionResnetV2/Repeat/block35_5/Branch_2/Conv2d_0c_3x3/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block35_5/Branch_2/Conv2d_0c_3x3/BatchNorm/moving_variance -> InceptionResnetV2/Repeat/block35_5/Branch_2/Conv2d_0c_3x3/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block35_6/Branch_0/Conv2d_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat/block35_6/Branch_0/Conv2d_1x1/weights\n",
+      "InceptionResnetV2/block35_6/Branch_0/Conv2d_1x1/BatchNorm/beta -> InceptionResnetV2/Repeat/block35_6/Branch_0/Conv2d_1x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block35_6/Branch_1/Conv2d_0a_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat/block35_6/Branch_1/Conv2d_0a_1x1/weights\n",
+      "InceptionResnetV2/block35_6/Branch_1/Conv2d_0a_1x1/BatchNorm/beta -> InceptionResnetV2/Repeat/block35_6/Branch_1/Conv2d_0a_1x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block35_6/Branch_1/Conv2d_0b_3x3/Conv2D/kernel -> InceptionResnetV2/Repeat/block35_6/Branch_1/Conv2d_0b_3x3/weights\n",
+      "InceptionResnetV2/block35_6/Branch_1/Conv2d_0b_3x3/BatchNorm/beta -> InceptionResnetV2/Repeat/block35_6/Branch_1/Conv2d_0b_3x3/BatchNorm/beta\n",
+      "InceptionResnetV2/block35_6/Branch_2/Conv2d_0a_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat/block35_6/Branch_2/Conv2d_0a_1x1/weights\n",
+      "InceptionResnetV2/block35_6/Branch_2/Conv2d_0a_1x1/BatchNorm/beta -> InceptionResnetV2/Repeat/block35_6/Branch_2/Conv2d_0a_1x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block35_6/Branch_2/Conv2d_0b_3x3/Conv2D/kernel -> InceptionResnetV2/Repeat/block35_6/Branch_2/Conv2d_0b_3x3/weights\n",
+      "InceptionResnetV2/block35_6/Branch_2/Conv2d_0b_3x3/BatchNorm/beta -> InceptionResnetV2/Repeat/block35_6/Branch_2/Conv2d_0b_3x3/BatchNorm/beta\n",
+      "InceptionResnetV2/block35_6/Branch_2/Conv2d_0c_3x3/Conv2D/kernel -> InceptionResnetV2/Repeat/block35_6/Branch_2/Conv2d_0c_3x3/weights\n",
+      "InceptionResnetV2/block35_6/Branch_2/Conv2d_0c_3x3/BatchNorm/beta -> InceptionResnetV2/Repeat/block35_6/Branch_2/Conv2d_0c_3x3/BatchNorm/beta\n",
+      "InceptionResnetV2/block35_6/Conv2d_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat/block35_6/Conv2d_1x1/weights\n",
+      "InceptionResnetV2/block35_6/Conv2d_1x1/Conv2D/bias -> InceptionResnetV2/Repeat/block35_6/Conv2d_1x1/biases\n",
+      "InceptionResnetV2/block35_6/Branch_0/Conv2d_1x1/BatchNorm/moving_mean -> InceptionResnetV2/Repeat/block35_6/Branch_0/Conv2d_1x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block35_6/Branch_0/Conv2d_1x1/BatchNorm/moving_variance -> InceptionResnetV2/Repeat/block35_6/Branch_0/Conv2d_1x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block35_6/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean -> InceptionResnetV2/Repeat/block35_6/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block35_6/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance -> InceptionResnetV2/Repeat/block35_6/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block35_6/Branch_1/Conv2d_0b_3x3/BatchNorm/moving_mean -> InceptionResnetV2/Repeat/block35_6/Branch_1/Conv2d_0b_3x3/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block35_6/Branch_1/Conv2d_0b_3x3/BatchNorm/moving_variance -> InceptionResnetV2/Repeat/block35_6/Branch_1/Conv2d_0b_3x3/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block35_6/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_mean -> InceptionResnetV2/Repeat/block35_6/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block35_6/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_variance -> InceptionResnetV2/Repeat/block35_6/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block35_6/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_mean -> InceptionResnetV2/Repeat/block35_6/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block35_6/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_variance -> InceptionResnetV2/Repeat/block35_6/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block35_6/Branch_2/Conv2d_0c_3x3/BatchNorm/moving_mean -> InceptionResnetV2/Repeat/block35_6/Branch_2/Conv2d_0c_3x3/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block35_6/Branch_2/Conv2d_0c_3x3/BatchNorm/moving_variance -> InceptionResnetV2/Repeat/block35_6/Branch_2/Conv2d_0c_3x3/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block35_7/Branch_0/Conv2d_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat/block35_7/Branch_0/Conv2d_1x1/weights\n",
+      "InceptionResnetV2/block35_7/Branch_0/Conv2d_1x1/BatchNorm/beta -> InceptionResnetV2/Repeat/block35_7/Branch_0/Conv2d_1x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block35_7/Branch_1/Conv2d_0a_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat/block35_7/Branch_1/Conv2d_0a_1x1/weights\n",
+      "InceptionResnetV2/block35_7/Branch_1/Conv2d_0a_1x1/BatchNorm/beta -> InceptionResnetV2/Repeat/block35_7/Branch_1/Conv2d_0a_1x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block35_7/Branch_1/Conv2d_0b_3x3/Conv2D/kernel -> InceptionResnetV2/Repeat/block35_7/Branch_1/Conv2d_0b_3x3/weights\n",
+      "InceptionResnetV2/block35_7/Branch_1/Conv2d_0b_3x3/BatchNorm/beta -> InceptionResnetV2/Repeat/block35_7/Branch_1/Conv2d_0b_3x3/BatchNorm/beta\n",
+      "InceptionResnetV2/block35_7/Branch_2/Conv2d_0a_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat/block35_7/Branch_2/Conv2d_0a_1x1/weights\n",
+      "InceptionResnetV2/block35_7/Branch_2/Conv2d_0a_1x1/BatchNorm/beta -> InceptionResnetV2/Repeat/block35_7/Branch_2/Conv2d_0a_1x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block35_7/Branch_2/Conv2d_0b_3x3/Conv2D/kernel -> InceptionResnetV2/Repeat/block35_7/Branch_2/Conv2d_0b_3x3/weights\n",
+      "InceptionResnetV2/block35_7/Branch_2/Conv2d_0b_3x3/BatchNorm/beta -> InceptionResnetV2/Repeat/block35_7/Branch_2/Conv2d_0b_3x3/BatchNorm/beta\n",
+      "InceptionResnetV2/block35_7/Branch_2/Conv2d_0c_3x3/Conv2D/kernel -> InceptionResnetV2/Repeat/block35_7/Branch_2/Conv2d_0c_3x3/weights\n",
+      "InceptionResnetV2/block35_7/Branch_2/Conv2d_0c_3x3/BatchNorm/beta -> InceptionResnetV2/Repeat/block35_7/Branch_2/Conv2d_0c_3x3/BatchNorm/beta\n",
+      "InceptionResnetV2/block35_7/Conv2d_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat/block35_7/Conv2d_1x1/weights\n",
+      "InceptionResnetV2/block35_7/Conv2d_1x1/Conv2D/bias -> InceptionResnetV2/Repeat/block35_7/Conv2d_1x1/biases\n",
+      "InceptionResnetV2/block35_7/Branch_0/Conv2d_1x1/BatchNorm/moving_mean -> InceptionResnetV2/Repeat/block35_7/Branch_0/Conv2d_1x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block35_7/Branch_0/Conv2d_1x1/BatchNorm/moving_variance -> InceptionResnetV2/Repeat/block35_7/Branch_0/Conv2d_1x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block35_7/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean -> InceptionResnetV2/Repeat/block35_7/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block35_7/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance -> InceptionResnetV2/Repeat/block35_7/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block35_7/Branch_1/Conv2d_0b_3x3/BatchNorm/moving_mean -> InceptionResnetV2/Repeat/block35_7/Branch_1/Conv2d_0b_3x3/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block35_7/Branch_1/Conv2d_0b_3x3/BatchNorm/moving_variance -> InceptionResnetV2/Repeat/block35_7/Branch_1/Conv2d_0b_3x3/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block35_7/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_mean -> InceptionResnetV2/Repeat/block35_7/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block35_7/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_variance -> InceptionResnetV2/Repeat/block35_7/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block35_7/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_mean -> InceptionResnetV2/Repeat/block35_7/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block35_7/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_variance -> InceptionResnetV2/Repeat/block35_7/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block35_7/Branch_2/Conv2d_0c_3x3/BatchNorm/moving_mean -> InceptionResnetV2/Repeat/block35_7/Branch_2/Conv2d_0c_3x3/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block35_7/Branch_2/Conv2d_0c_3x3/BatchNorm/moving_variance -> InceptionResnetV2/Repeat/block35_7/Branch_2/Conv2d_0c_3x3/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block35_8/Branch_0/Conv2d_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat/block35_8/Branch_0/Conv2d_1x1/weights\n",
+      "InceptionResnetV2/block35_8/Branch_0/Conv2d_1x1/BatchNorm/beta -> InceptionResnetV2/Repeat/block35_8/Branch_0/Conv2d_1x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block35_8/Branch_1/Conv2d_0a_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat/block35_8/Branch_1/Conv2d_0a_1x1/weights\n",
+      "InceptionResnetV2/block35_8/Branch_1/Conv2d_0a_1x1/BatchNorm/beta -> InceptionResnetV2/Repeat/block35_8/Branch_1/Conv2d_0a_1x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block35_8/Branch_1/Conv2d_0b_3x3/Conv2D/kernel -> InceptionResnetV2/Repeat/block35_8/Branch_1/Conv2d_0b_3x3/weights\n",
+      "InceptionResnetV2/block35_8/Branch_1/Conv2d_0b_3x3/BatchNorm/beta -> InceptionResnetV2/Repeat/block35_8/Branch_1/Conv2d_0b_3x3/BatchNorm/beta\n",
+      "InceptionResnetV2/block35_8/Branch_2/Conv2d_0a_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat/block35_8/Branch_2/Conv2d_0a_1x1/weights\n",
+      "InceptionResnetV2/block35_8/Branch_2/Conv2d_0a_1x1/BatchNorm/beta -> InceptionResnetV2/Repeat/block35_8/Branch_2/Conv2d_0a_1x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block35_8/Branch_2/Conv2d_0b_3x3/Conv2D/kernel -> InceptionResnetV2/Repeat/block35_8/Branch_2/Conv2d_0b_3x3/weights\n",
+      "InceptionResnetV2/block35_8/Branch_2/Conv2d_0b_3x3/BatchNorm/beta -> InceptionResnetV2/Repeat/block35_8/Branch_2/Conv2d_0b_3x3/BatchNorm/beta\n",
+      "InceptionResnetV2/block35_8/Branch_2/Conv2d_0c_3x3/Conv2D/kernel -> InceptionResnetV2/Repeat/block35_8/Branch_2/Conv2d_0c_3x3/weights\n",
+      "InceptionResnetV2/block35_8/Branch_2/Conv2d_0c_3x3/BatchNorm/beta -> InceptionResnetV2/Repeat/block35_8/Branch_2/Conv2d_0c_3x3/BatchNorm/beta\n",
+      "InceptionResnetV2/block35_8/Conv2d_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat/block35_8/Conv2d_1x1/weights\n",
+      "InceptionResnetV2/block35_8/Conv2d_1x1/Conv2D/bias -> InceptionResnetV2/Repeat/block35_8/Conv2d_1x1/biases\n",
+      "InceptionResnetV2/block35_8/Branch_0/Conv2d_1x1/BatchNorm/moving_mean -> InceptionResnetV2/Repeat/block35_8/Branch_0/Conv2d_1x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block35_8/Branch_0/Conv2d_1x1/BatchNorm/moving_variance -> InceptionResnetV2/Repeat/block35_8/Branch_0/Conv2d_1x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block35_8/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean -> InceptionResnetV2/Repeat/block35_8/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block35_8/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance -> InceptionResnetV2/Repeat/block35_8/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block35_8/Branch_1/Conv2d_0b_3x3/BatchNorm/moving_mean -> InceptionResnetV2/Repeat/block35_8/Branch_1/Conv2d_0b_3x3/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block35_8/Branch_1/Conv2d_0b_3x3/BatchNorm/moving_variance -> InceptionResnetV2/Repeat/block35_8/Branch_1/Conv2d_0b_3x3/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block35_8/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_mean -> InceptionResnetV2/Repeat/block35_8/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block35_8/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_variance -> InceptionResnetV2/Repeat/block35_8/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block35_8/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_mean -> InceptionResnetV2/Repeat/block35_8/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block35_8/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_variance -> InceptionResnetV2/Repeat/block35_8/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block35_8/Branch_2/Conv2d_0c_3x3/BatchNorm/moving_mean -> InceptionResnetV2/Repeat/block35_8/Branch_2/Conv2d_0c_3x3/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block35_8/Branch_2/Conv2d_0c_3x3/BatchNorm/moving_variance -> InceptionResnetV2/Repeat/block35_8/Branch_2/Conv2d_0c_3x3/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block35_9/Branch_0/Conv2d_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat/block35_9/Branch_0/Conv2d_1x1/weights\n",
+      "InceptionResnetV2/block35_9/Branch_0/Conv2d_1x1/BatchNorm/beta -> InceptionResnetV2/Repeat/block35_9/Branch_0/Conv2d_1x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block35_9/Branch_1/Conv2d_0a_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat/block35_9/Branch_1/Conv2d_0a_1x1/weights\n",
+      "InceptionResnetV2/block35_9/Branch_1/Conv2d_0a_1x1/BatchNorm/beta -> InceptionResnetV2/Repeat/block35_9/Branch_1/Conv2d_0a_1x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block35_9/Branch_1/Conv2d_0b_3x3/Conv2D/kernel -> InceptionResnetV2/Repeat/block35_9/Branch_1/Conv2d_0b_3x3/weights\n",
+      "InceptionResnetV2/block35_9/Branch_1/Conv2d_0b_3x3/BatchNorm/beta -> InceptionResnetV2/Repeat/block35_9/Branch_1/Conv2d_0b_3x3/BatchNorm/beta\n",
+      "InceptionResnetV2/block35_9/Branch_2/Conv2d_0a_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat/block35_9/Branch_2/Conv2d_0a_1x1/weights\n",
+      "InceptionResnetV2/block35_9/Branch_2/Conv2d_0a_1x1/BatchNorm/beta -> InceptionResnetV2/Repeat/block35_9/Branch_2/Conv2d_0a_1x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block35_9/Branch_2/Conv2d_0b_3x3/Conv2D/kernel -> InceptionResnetV2/Repeat/block35_9/Branch_2/Conv2d_0b_3x3/weights\n",
+      "InceptionResnetV2/block35_9/Branch_2/Conv2d_0b_3x3/BatchNorm/beta -> InceptionResnetV2/Repeat/block35_9/Branch_2/Conv2d_0b_3x3/BatchNorm/beta\n",
+      "InceptionResnetV2/block35_9/Branch_2/Conv2d_0c_3x3/Conv2D/kernel -> InceptionResnetV2/Repeat/block35_9/Branch_2/Conv2d_0c_3x3/weights\n",
+      "InceptionResnetV2/block35_9/Branch_2/Conv2d_0c_3x3/BatchNorm/beta -> InceptionResnetV2/Repeat/block35_9/Branch_2/Conv2d_0c_3x3/BatchNorm/beta\n",
+      "InceptionResnetV2/block35_9/Conv2d_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat/block35_9/Conv2d_1x1/weights\n",
+      "InceptionResnetV2/block35_9/Conv2d_1x1/Conv2D/bias -> InceptionResnetV2/Repeat/block35_9/Conv2d_1x1/biases\n",
+      "InceptionResnetV2/block35_9/Branch_0/Conv2d_1x1/BatchNorm/moving_mean -> InceptionResnetV2/Repeat/block35_9/Branch_0/Conv2d_1x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block35_9/Branch_0/Conv2d_1x1/BatchNorm/moving_variance -> InceptionResnetV2/Repeat/block35_9/Branch_0/Conv2d_1x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block35_9/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean -> InceptionResnetV2/Repeat/block35_9/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block35_9/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance -> InceptionResnetV2/Repeat/block35_9/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block35_9/Branch_1/Conv2d_0b_3x3/BatchNorm/moving_mean -> InceptionResnetV2/Repeat/block35_9/Branch_1/Conv2d_0b_3x3/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block35_9/Branch_1/Conv2d_0b_3x3/BatchNorm/moving_variance -> InceptionResnetV2/Repeat/block35_9/Branch_1/Conv2d_0b_3x3/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block35_9/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_mean -> InceptionResnetV2/Repeat/block35_9/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block35_9/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_variance -> InceptionResnetV2/Repeat/block35_9/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block35_9/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_mean -> InceptionResnetV2/Repeat/block35_9/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block35_9/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_variance -> InceptionResnetV2/Repeat/block35_9/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block35_9/Branch_2/Conv2d_0c_3x3/BatchNorm/moving_mean -> InceptionResnetV2/Repeat/block35_9/Branch_2/Conv2d_0c_3x3/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block35_9/Branch_2/Conv2d_0c_3x3/BatchNorm/moving_variance -> InceptionResnetV2/Repeat/block35_9/Branch_2/Conv2d_0c_3x3/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block35_10/Branch_0/Conv2d_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat/block35_10/Branch_0/Conv2d_1x1/weights\n",
+      "InceptionResnetV2/block35_10/Branch_0/Conv2d_1x1/BatchNorm/beta -> InceptionResnetV2/Repeat/block35_10/Branch_0/Conv2d_1x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block35_10/Branch_1/Conv2d_0a_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat/block35_10/Branch_1/Conv2d_0a_1x1/weights\n",
+      "InceptionResnetV2/block35_10/Branch_1/Conv2d_0a_1x1/BatchNorm/beta -> InceptionResnetV2/Repeat/block35_10/Branch_1/Conv2d_0a_1x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block35_10/Branch_1/Conv2d_0b_3x3/Conv2D/kernel -> InceptionResnetV2/Repeat/block35_10/Branch_1/Conv2d_0b_3x3/weights\n",
+      "InceptionResnetV2/block35_10/Branch_1/Conv2d_0b_3x3/BatchNorm/beta -> InceptionResnetV2/Repeat/block35_10/Branch_1/Conv2d_0b_3x3/BatchNorm/beta\n",
+      "InceptionResnetV2/block35_10/Branch_2/Conv2d_0a_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat/block35_10/Branch_2/Conv2d_0a_1x1/weights\n",
+      "InceptionResnetV2/block35_10/Branch_2/Conv2d_0a_1x1/BatchNorm/beta -> InceptionResnetV2/Repeat/block35_10/Branch_2/Conv2d_0a_1x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block35_10/Branch_2/Conv2d_0b_3x3/Conv2D/kernel -> InceptionResnetV2/Repeat/block35_10/Branch_2/Conv2d_0b_3x3/weights\n",
+      "InceptionResnetV2/block35_10/Branch_2/Conv2d_0b_3x3/BatchNorm/beta -> InceptionResnetV2/Repeat/block35_10/Branch_2/Conv2d_0b_3x3/BatchNorm/beta\n",
+      "InceptionResnetV2/block35_10/Branch_2/Conv2d_0c_3x3/Conv2D/kernel -> InceptionResnetV2/Repeat/block35_10/Branch_2/Conv2d_0c_3x3/weights\n",
+      "InceptionResnetV2/block35_10/Branch_2/Conv2d_0c_3x3/BatchNorm/beta -> InceptionResnetV2/Repeat/block35_10/Branch_2/Conv2d_0c_3x3/BatchNorm/beta\n",
+      "InceptionResnetV2/block35_10/Conv2d_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat/block35_10/Conv2d_1x1/weights\n",
+      "InceptionResnetV2/block35_10/Conv2d_1x1/Conv2D/bias -> InceptionResnetV2/Repeat/block35_10/Conv2d_1x1/biases\n",
+      "InceptionResnetV2/block35_10/Branch_0/Conv2d_1x1/BatchNorm/moving_mean -> InceptionResnetV2/Repeat/block35_10/Branch_0/Conv2d_1x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block35_10/Branch_0/Conv2d_1x1/BatchNorm/moving_variance -> InceptionResnetV2/Repeat/block35_10/Branch_0/Conv2d_1x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block35_10/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean -> InceptionResnetV2/Repeat/block35_10/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block35_10/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance -> InceptionResnetV2/Repeat/block35_10/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block35_10/Branch_1/Conv2d_0b_3x3/BatchNorm/moving_mean -> InceptionResnetV2/Repeat/block35_10/Branch_1/Conv2d_0b_3x3/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block35_10/Branch_1/Conv2d_0b_3x3/BatchNorm/moving_variance -> InceptionResnetV2/Repeat/block35_10/Branch_1/Conv2d_0b_3x3/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block35_10/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_mean -> InceptionResnetV2/Repeat/block35_10/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block35_10/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_variance -> InceptionResnetV2/Repeat/block35_10/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block35_10/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_mean -> InceptionResnetV2/Repeat/block35_10/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block35_10/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_variance -> InceptionResnetV2/Repeat/block35_10/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block35_10/Branch_2/Conv2d_0c_3x3/BatchNorm/moving_mean -> InceptionResnetV2/Repeat/block35_10/Branch_2/Conv2d_0c_3x3/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block35_10/Branch_2/Conv2d_0c_3x3/BatchNorm/moving_variance -> InceptionResnetV2/Repeat/block35_10/Branch_2/Conv2d_0c_3x3/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/Mixed_6a/Branch_0/Conv2d_1a_3x3/Conv2D/kernel -> InceptionResnetV2/Mixed_6a/Branch_0/Conv2d_1a_3x3/weights\n",
+      "InceptionResnetV2/Mixed_6a/Branch_0/Conv2d_1a_3x3/BatchNorm/beta -> InceptionResnetV2/Mixed_6a/Branch_0/Conv2d_1a_3x3/BatchNorm/beta\n",
+      "InceptionResnetV2/Mixed_6a/Branch_1/Conv2d_0a_1x1/Conv2D/kernel -> InceptionResnetV2/Mixed_6a/Branch_1/Conv2d_0a_1x1/weights\n",
+      "InceptionResnetV2/Mixed_6a/Branch_1/Conv2d_0a_1x1/BatchNorm/beta -> InceptionResnetV2/Mixed_6a/Branch_1/Conv2d_0a_1x1/BatchNorm/beta\n",
+      "InceptionResnetV2/Mixed_6a/Branch_1/Conv2d_0b_3x3/Conv2D/kernel -> InceptionResnetV2/Mixed_6a/Branch_1/Conv2d_0b_3x3/weights\n",
+      "InceptionResnetV2/Mixed_6a/Branch_1/Conv2d_0b_3x3/BatchNorm/beta -> InceptionResnetV2/Mixed_6a/Branch_1/Conv2d_0b_3x3/BatchNorm/beta\n",
+      "InceptionResnetV2/Mixed_6a/Branch_1/Conv2d_1a_3x3/Conv2D/kernel -> InceptionResnetV2/Mixed_6a/Branch_1/Conv2d_1a_3x3/weights\n",
+      "InceptionResnetV2/Mixed_6a/Branch_1/Conv2d_1a_3x3/BatchNorm/beta -> InceptionResnetV2/Mixed_6a/Branch_1/Conv2d_1a_3x3/BatchNorm/beta\n",
+      "InceptionResnetV2/Mixed_6a/Branch_0/Conv2d_1a_3x3/BatchNorm/moving_mean -> InceptionResnetV2/Mixed_6a/Branch_0/Conv2d_1a_3x3/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/Mixed_6a/Branch_0/Conv2d_1a_3x3/BatchNorm/moving_variance -> InceptionResnetV2/Mixed_6a/Branch_0/Conv2d_1a_3x3/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/Mixed_6a/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean -> InceptionResnetV2/Mixed_6a/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/Mixed_6a/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance -> InceptionResnetV2/Mixed_6a/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/Mixed_6a/Branch_1/Conv2d_0b_3x3/BatchNorm/moving_mean -> InceptionResnetV2/Mixed_6a/Branch_1/Conv2d_0b_3x3/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/Mixed_6a/Branch_1/Conv2d_0b_3x3/BatchNorm/moving_variance -> InceptionResnetV2/Mixed_6a/Branch_1/Conv2d_0b_3x3/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/Mixed_6a/Branch_1/Conv2d_1a_3x3/BatchNorm/moving_mean -> InceptionResnetV2/Mixed_6a/Branch_1/Conv2d_1a_3x3/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/Mixed_6a/Branch_1/Conv2d_1a_3x3/BatchNorm/moving_variance -> InceptionResnetV2/Mixed_6a/Branch_1/Conv2d_1a_3x3/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block17_1/Branch_0/Conv2d_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat_1/block17_1/Branch_0/Conv2d_1x1/weights\n",
+      "InceptionResnetV2/block17_1/Branch_0/Conv2d_1x1/BatchNorm/beta -> InceptionResnetV2/Repeat_1/block17_1/Branch_0/Conv2d_1x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block17_1/Branch_1/Conv2d_0a_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat_1/block17_1/Branch_1/Conv2d_0a_1x1/weights\n",
+      "InceptionResnetV2/block17_1/Branch_1/Conv2d_0a_1x1/BatchNorm/beta -> InceptionResnetV2/Repeat_1/block17_1/Branch_1/Conv2d_0a_1x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block17_1/Branch_1/Conv2d_0b_1x7/Conv2D/kernel -> InceptionResnetV2/Repeat_1/block17_1/Branch_1/Conv2d_0b_1x7/weights\n",
+      "InceptionResnetV2/block17_1/Branch_1/Conv2d_0b_1x7/BatchNorm/beta -> InceptionResnetV2/Repeat_1/block17_1/Branch_1/Conv2d_0b_1x7/BatchNorm/beta\n",
+      "InceptionResnetV2/block17_1/Branch_1/Conv2d_0c_7x1/Conv2D/kernel -> InceptionResnetV2/Repeat_1/block17_1/Branch_1/Conv2d_0c_7x1/weights\n",
+      "InceptionResnetV2/block17_1/Branch_1/Conv2d_0c_7x1/BatchNorm/beta -> InceptionResnetV2/Repeat_1/block17_1/Branch_1/Conv2d_0c_7x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block17_1/Conv2d_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat_1/block17_1/Conv2d_1x1/weights\n",
+      "InceptionResnetV2/block17_1/Conv2d_1x1/Conv2D/bias -> InceptionResnetV2/Repeat_1/block17_1/Conv2d_1x1/biases\n",
+      "InceptionResnetV2/block17_1/Branch_0/Conv2d_1x1/BatchNorm/moving_mean -> InceptionResnetV2/Repeat_1/block17_1/Branch_0/Conv2d_1x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block17_1/Branch_0/Conv2d_1x1/BatchNorm/moving_variance -> InceptionResnetV2/Repeat_1/block17_1/Branch_0/Conv2d_1x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block17_1/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean -> InceptionResnetV2/Repeat_1/block17_1/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block17_1/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance -> InceptionResnetV2/Repeat_1/block17_1/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block17_1/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_mean -> InceptionResnetV2/Repeat_1/block17_1/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block17_1/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_variance -> InceptionResnetV2/Repeat_1/block17_1/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block17_1/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_mean -> InceptionResnetV2/Repeat_1/block17_1/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block17_1/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_variance -> InceptionResnetV2/Repeat_1/block17_1/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block17_2/Branch_0/Conv2d_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat_1/block17_2/Branch_0/Conv2d_1x1/weights\n",
+      "InceptionResnetV2/block17_2/Branch_0/Conv2d_1x1/BatchNorm/beta -> InceptionResnetV2/Repeat_1/block17_2/Branch_0/Conv2d_1x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block17_2/Branch_1/Conv2d_0a_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat_1/block17_2/Branch_1/Conv2d_0a_1x1/weights\n",
+      "InceptionResnetV2/block17_2/Branch_1/Conv2d_0a_1x1/BatchNorm/beta -> InceptionResnetV2/Repeat_1/block17_2/Branch_1/Conv2d_0a_1x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block17_2/Branch_1/Conv2d_0b_1x7/Conv2D/kernel -> InceptionResnetV2/Repeat_1/block17_2/Branch_1/Conv2d_0b_1x7/weights\n",
+      "InceptionResnetV2/block17_2/Branch_1/Conv2d_0b_1x7/BatchNorm/beta -> InceptionResnetV2/Repeat_1/block17_2/Branch_1/Conv2d_0b_1x7/BatchNorm/beta\n",
+      "InceptionResnetV2/block17_2/Branch_1/Conv2d_0c_7x1/Conv2D/kernel -> InceptionResnetV2/Repeat_1/block17_2/Branch_1/Conv2d_0c_7x1/weights\n",
+      "InceptionResnetV2/block17_2/Branch_1/Conv2d_0c_7x1/BatchNorm/beta -> InceptionResnetV2/Repeat_1/block17_2/Branch_1/Conv2d_0c_7x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block17_2/Conv2d_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat_1/block17_2/Conv2d_1x1/weights\n",
+      "InceptionResnetV2/block17_2/Conv2d_1x1/Conv2D/bias -> InceptionResnetV2/Repeat_1/block17_2/Conv2d_1x1/biases\n",
+      "InceptionResnetV2/block17_2/Branch_0/Conv2d_1x1/BatchNorm/moving_mean -> InceptionResnetV2/Repeat_1/block17_2/Branch_0/Conv2d_1x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block17_2/Branch_0/Conv2d_1x1/BatchNorm/moving_variance -> InceptionResnetV2/Repeat_1/block17_2/Branch_0/Conv2d_1x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block17_2/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean -> InceptionResnetV2/Repeat_1/block17_2/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block17_2/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance -> InceptionResnetV2/Repeat_1/block17_2/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block17_2/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_mean -> InceptionResnetV2/Repeat_1/block17_2/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block17_2/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_variance -> InceptionResnetV2/Repeat_1/block17_2/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block17_2/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_mean -> InceptionResnetV2/Repeat_1/block17_2/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block17_2/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_variance -> InceptionResnetV2/Repeat_1/block17_2/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block17_3/Branch_0/Conv2d_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat_1/block17_3/Branch_0/Conv2d_1x1/weights\n",
+      "InceptionResnetV2/block17_3/Branch_0/Conv2d_1x1/BatchNorm/beta -> InceptionResnetV2/Repeat_1/block17_3/Branch_0/Conv2d_1x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block17_3/Branch_1/Conv2d_0a_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat_1/block17_3/Branch_1/Conv2d_0a_1x1/weights\n",
+      "InceptionResnetV2/block17_3/Branch_1/Conv2d_0a_1x1/BatchNorm/beta -> InceptionResnetV2/Repeat_1/block17_3/Branch_1/Conv2d_0a_1x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block17_3/Branch_1/Conv2d_0b_1x7/Conv2D/kernel -> InceptionResnetV2/Repeat_1/block17_3/Branch_1/Conv2d_0b_1x7/weights\n",
+      "InceptionResnetV2/block17_3/Branch_1/Conv2d_0b_1x7/BatchNorm/beta -> InceptionResnetV2/Repeat_1/block17_3/Branch_1/Conv2d_0b_1x7/BatchNorm/beta\n",
+      "InceptionResnetV2/block17_3/Branch_1/Conv2d_0c_7x1/Conv2D/kernel -> InceptionResnetV2/Repeat_1/block17_3/Branch_1/Conv2d_0c_7x1/weights\n",
+      "InceptionResnetV2/block17_3/Branch_1/Conv2d_0c_7x1/BatchNorm/beta -> InceptionResnetV2/Repeat_1/block17_3/Branch_1/Conv2d_0c_7x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block17_3/Conv2d_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat_1/block17_3/Conv2d_1x1/weights\n",
+      "InceptionResnetV2/block17_3/Conv2d_1x1/Conv2D/bias -> InceptionResnetV2/Repeat_1/block17_3/Conv2d_1x1/biases\n",
+      "InceptionResnetV2/block17_3/Branch_0/Conv2d_1x1/BatchNorm/moving_mean -> InceptionResnetV2/Repeat_1/block17_3/Branch_0/Conv2d_1x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block17_3/Branch_0/Conv2d_1x1/BatchNorm/moving_variance -> InceptionResnetV2/Repeat_1/block17_3/Branch_0/Conv2d_1x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block17_3/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean -> InceptionResnetV2/Repeat_1/block17_3/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block17_3/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance -> InceptionResnetV2/Repeat_1/block17_3/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block17_3/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_mean -> InceptionResnetV2/Repeat_1/block17_3/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block17_3/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_variance -> InceptionResnetV2/Repeat_1/block17_3/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block17_3/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_mean -> InceptionResnetV2/Repeat_1/block17_3/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block17_3/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_variance -> InceptionResnetV2/Repeat_1/block17_3/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block17_4/Branch_0/Conv2d_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat_1/block17_4/Branch_0/Conv2d_1x1/weights\n",
+      "InceptionResnetV2/block17_4/Branch_0/Conv2d_1x1/BatchNorm/beta -> InceptionResnetV2/Repeat_1/block17_4/Branch_0/Conv2d_1x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block17_4/Branch_1/Conv2d_0a_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat_1/block17_4/Branch_1/Conv2d_0a_1x1/weights\n",
+      "InceptionResnetV2/block17_4/Branch_1/Conv2d_0a_1x1/BatchNorm/beta -> InceptionResnetV2/Repeat_1/block17_4/Branch_1/Conv2d_0a_1x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block17_4/Branch_1/Conv2d_0b_1x7/Conv2D/kernel -> InceptionResnetV2/Repeat_1/block17_4/Branch_1/Conv2d_0b_1x7/weights\n",
+      "InceptionResnetV2/block17_4/Branch_1/Conv2d_0b_1x7/BatchNorm/beta -> InceptionResnetV2/Repeat_1/block17_4/Branch_1/Conv2d_0b_1x7/BatchNorm/beta\n",
+      "InceptionResnetV2/block17_4/Branch_1/Conv2d_0c_7x1/Conv2D/kernel -> InceptionResnetV2/Repeat_1/block17_4/Branch_1/Conv2d_0c_7x1/weights\n",
+      "InceptionResnetV2/block17_4/Branch_1/Conv2d_0c_7x1/BatchNorm/beta -> InceptionResnetV2/Repeat_1/block17_4/Branch_1/Conv2d_0c_7x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block17_4/Conv2d_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat_1/block17_4/Conv2d_1x1/weights\n",
+      "InceptionResnetV2/block17_4/Conv2d_1x1/Conv2D/bias -> InceptionResnetV2/Repeat_1/block17_4/Conv2d_1x1/biases\n",
+      "InceptionResnetV2/block17_4/Branch_0/Conv2d_1x1/BatchNorm/moving_mean -> InceptionResnetV2/Repeat_1/block17_4/Branch_0/Conv2d_1x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block17_4/Branch_0/Conv2d_1x1/BatchNorm/moving_variance -> InceptionResnetV2/Repeat_1/block17_4/Branch_0/Conv2d_1x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block17_4/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean -> InceptionResnetV2/Repeat_1/block17_4/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block17_4/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance -> InceptionResnetV2/Repeat_1/block17_4/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block17_4/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_mean -> InceptionResnetV2/Repeat_1/block17_4/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block17_4/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_variance -> InceptionResnetV2/Repeat_1/block17_4/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block17_4/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_mean -> InceptionResnetV2/Repeat_1/block17_4/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block17_4/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_variance -> InceptionResnetV2/Repeat_1/block17_4/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block17_5/Branch_0/Conv2d_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat_1/block17_5/Branch_0/Conv2d_1x1/weights\n",
+      "InceptionResnetV2/block17_5/Branch_0/Conv2d_1x1/BatchNorm/beta -> InceptionResnetV2/Repeat_1/block17_5/Branch_0/Conv2d_1x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block17_5/Branch_1/Conv2d_0a_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat_1/block17_5/Branch_1/Conv2d_0a_1x1/weights\n",
+      "InceptionResnetV2/block17_5/Branch_1/Conv2d_0a_1x1/BatchNorm/beta -> InceptionResnetV2/Repeat_1/block17_5/Branch_1/Conv2d_0a_1x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block17_5/Branch_1/Conv2d_0b_1x7/Conv2D/kernel -> InceptionResnetV2/Repeat_1/block17_5/Branch_1/Conv2d_0b_1x7/weights\n",
+      "InceptionResnetV2/block17_5/Branch_1/Conv2d_0b_1x7/BatchNorm/beta -> InceptionResnetV2/Repeat_1/block17_5/Branch_1/Conv2d_0b_1x7/BatchNorm/beta\n",
+      "InceptionResnetV2/block17_5/Branch_1/Conv2d_0c_7x1/Conv2D/kernel -> InceptionResnetV2/Repeat_1/block17_5/Branch_1/Conv2d_0c_7x1/weights\n",
+      "InceptionResnetV2/block17_5/Branch_1/Conv2d_0c_7x1/BatchNorm/beta -> InceptionResnetV2/Repeat_1/block17_5/Branch_1/Conv2d_0c_7x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block17_5/Conv2d_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat_1/block17_5/Conv2d_1x1/weights\n",
+      "InceptionResnetV2/block17_5/Conv2d_1x1/Conv2D/bias -> InceptionResnetV2/Repeat_1/block17_5/Conv2d_1x1/biases\n",
+      "InceptionResnetV2/block17_5/Branch_0/Conv2d_1x1/BatchNorm/moving_mean -> InceptionResnetV2/Repeat_1/block17_5/Branch_0/Conv2d_1x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block17_5/Branch_0/Conv2d_1x1/BatchNorm/moving_variance -> InceptionResnetV2/Repeat_1/block17_5/Branch_0/Conv2d_1x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block17_5/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean -> InceptionResnetV2/Repeat_1/block17_5/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block17_5/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance -> InceptionResnetV2/Repeat_1/block17_5/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block17_5/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_mean -> InceptionResnetV2/Repeat_1/block17_5/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block17_5/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_variance -> InceptionResnetV2/Repeat_1/block17_5/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block17_5/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_mean -> InceptionResnetV2/Repeat_1/block17_5/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block17_5/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_variance -> InceptionResnetV2/Repeat_1/block17_5/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block17_6/Branch_0/Conv2d_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat_1/block17_6/Branch_0/Conv2d_1x1/weights\n",
+      "InceptionResnetV2/block17_6/Branch_0/Conv2d_1x1/BatchNorm/beta -> InceptionResnetV2/Repeat_1/block17_6/Branch_0/Conv2d_1x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block17_6/Branch_1/Conv2d_0a_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat_1/block17_6/Branch_1/Conv2d_0a_1x1/weights\n",
+      "InceptionResnetV2/block17_6/Branch_1/Conv2d_0a_1x1/BatchNorm/beta -> InceptionResnetV2/Repeat_1/block17_6/Branch_1/Conv2d_0a_1x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block17_6/Branch_1/Conv2d_0b_1x7/Conv2D/kernel -> InceptionResnetV2/Repeat_1/block17_6/Branch_1/Conv2d_0b_1x7/weights\n",
+      "InceptionResnetV2/block17_6/Branch_1/Conv2d_0b_1x7/BatchNorm/beta -> InceptionResnetV2/Repeat_1/block17_6/Branch_1/Conv2d_0b_1x7/BatchNorm/beta\n",
+      "InceptionResnetV2/block17_6/Branch_1/Conv2d_0c_7x1/Conv2D/kernel -> InceptionResnetV2/Repeat_1/block17_6/Branch_1/Conv2d_0c_7x1/weights\n",
+      "InceptionResnetV2/block17_6/Branch_1/Conv2d_0c_7x1/BatchNorm/beta -> InceptionResnetV2/Repeat_1/block17_6/Branch_1/Conv2d_0c_7x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block17_6/Conv2d_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat_1/block17_6/Conv2d_1x1/weights\n",
+      "InceptionResnetV2/block17_6/Conv2d_1x1/Conv2D/bias -> InceptionResnetV2/Repeat_1/block17_6/Conv2d_1x1/biases\n",
+      "InceptionResnetV2/block17_6/Branch_0/Conv2d_1x1/BatchNorm/moving_mean -> InceptionResnetV2/Repeat_1/block17_6/Branch_0/Conv2d_1x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block17_6/Branch_0/Conv2d_1x1/BatchNorm/moving_variance -> InceptionResnetV2/Repeat_1/block17_6/Branch_0/Conv2d_1x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block17_6/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean -> InceptionResnetV2/Repeat_1/block17_6/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block17_6/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance -> InceptionResnetV2/Repeat_1/block17_6/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block17_6/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_mean -> InceptionResnetV2/Repeat_1/block17_6/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block17_6/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_variance -> InceptionResnetV2/Repeat_1/block17_6/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block17_6/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_mean -> InceptionResnetV2/Repeat_1/block17_6/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block17_6/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_variance -> InceptionResnetV2/Repeat_1/block17_6/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block17_7/Branch_0/Conv2d_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat_1/block17_7/Branch_0/Conv2d_1x1/weights\n",
+      "InceptionResnetV2/block17_7/Branch_0/Conv2d_1x1/BatchNorm/beta -> InceptionResnetV2/Repeat_1/block17_7/Branch_0/Conv2d_1x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block17_7/Branch_1/Conv2d_0a_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat_1/block17_7/Branch_1/Conv2d_0a_1x1/weights\n",
+      "InceptionResnetV2/block17_7/Branch_1/Conv2d_0a_1x1/BatchNorm/beta -> InceptionResnetV2/Repeat_1/block17_7/Branch_1/Conv2d_0a_1x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block17_7/Branch_1/Conv2d_0b_1x7/Conv2D/kernel -> InceptionResnetV2/Repeat_1/block17_7/Branch_1/Conv2d_0b_1x7/weights\n",
+      "InceptionResnetV2/block17_7/Branch_1/Conv2d_0b_1x7/BatchNorm/beta -> InceptionResnetV2/Repeat_1/block17_7/Branch_1/Conv2d_0b_1x7/BatchNorm/beta\n",
+      "InceptionResnetV2/block17_7/Branch_1/Conv2d_0c_7x1/Conv2D/kernel -> InceptionResnetV2/Repeat_1/block17_7/Branch_1/Conv2d_0c_7x1/weights\n",
+      "InceptionResnetV2/block17_7/Branch_1/Conv2d_0c_7x1/BatchNorm/beta -> InceptionResnetV2/Repeat_1/block17_7/Branch_1/Conv2d_0c_7x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block17_7/Conv2d_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat_1/block17_7/Conv2d_1x1/weights\n",
+      "InceptionResnetV2/block17_7/Conv2d_1x1/Conv2D/bias -> InceptionResnetV2/Repeat_1/block17_7/Conv2d_1x1/biases\n",
+      "InceptionResnetV2/block17_7/Branch_0/Conv2d_1x1/BatchNorm/moving_mean -> InceptionResnetV2/Repeat_1/block17_7/Branch_0/Conv2d_1x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block17_7/Branch_0/Conv2d_1x1/BatchNorm/moving_variance -> InceptionResnetV2/Repeat_1/block17_7/Branch_0/Conv2d_1x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block17_7/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean -> InceptionResnetV2/Repeat_1/block17_7/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block17_7/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance -> InceptionResnetV2/Repeat_1/block17_7/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block17_7/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_mean -> InceptionResnetV2/Repeat_1/block17_7/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block17_7/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_variance -> InceptionResnetV2/Repeat_1/block17_7/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block17_7/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_mean -> InceptionResnetV2/Repeat_1/block17_7/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block17_7/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_variance -> InceptionResnetV2/Repeat_1/block17_7/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block17_8/Branch_0/Conv2d_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat_1/block17_8/Branch_0/Conv2d_1x1/weights\n",
+      "InceptionResnetV2/block17_8/Branch_0/Conv2d_1x1/BatchNorm/beta -> InceptionResnetV2/Repeat_1/block17_8/Branch_0/Conv2d_1x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block17_8/Branch_1/Conv2d_0a_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat_1/block17_8/Branch_1/Conv2d_0a_1x1/weights\n",
+      "InceptionResnetV2/block17_8/Branch_1/Conv2d_0a_1x1/BatchNorm/beta -> InceptionResnetV2/Repeat_1/block17_8/Branch_1/Conv2d_0a_1x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block17_8/Branch_1/Conv2d_0b_1x7/Conv2D/kernel -> InceptionResnetV2/Repeat_1/block17_8/Branch_1/Conv2d_0b_1x7/weights\n",
+      "InceptionResnetV2/block17_8/Branch_1/Conv2d_0b_1x7/BatchNorm/beta -> InceptionResnetV2/Repeat_1/block17_8/Branch_1/Conv2d_0b_1x7/BatchNorm/beta\n",
+      "InceptionResnetV2/block17_8/Branch_1/Conv2d_0c_7x1/Conv2D/kernel -> InceptionResnetV2/Repeat_1/block17_8/Branch_1/Conv2d_0c_7x1/weights\n",
+      "InceptionResnetV2/block17_8/Branch_1/Conv2d_0c_7x1/BatchNorm/beta -> InceptionResnetV2/Repeat_1/block17_8/Branch_1/Conv2d_0c_7x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block17_8/Conv2d_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat_1/block17_8/Conv2d_1x1/weights\n",
+      "InceptionResnetV2/block17_8/Conv2d_1x1/Conv2D/bias -> InceptionResnetV2/Repeat_1/block17_8/Conv2d_1x1/biases\n",
+      "InceptionResnetV2/block17_8/Branch_0/Conv2d_1x1/BatchNorm/moving_mean -> InceptionResnetV2/Repeat_1/block17_8/Branch_0/Conv2d_1x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block17_8/Branch_0/Conv2d_1x1/BatchNorm/moving_variance -> InceptionResnetV2/Repeat_1/block17_8/Branch_0/Conv2d_1x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block17_8/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean -> InceptionResnetV2/Repeat_1/block17_8/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block17_8/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance -> InceptionResnetV2/Repeat_1/block17_8/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block17_8/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_mean -> InceptionResnetV2/Repeat_1/block17_8/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block17_8/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_variance -> InceptionResnetV2/Repeat_1/block17_8/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block17_8/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_mean -> InceptionResnetV2/Repeat_1/block17_8/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block17_8/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_variance -> InceptionResnetV2/Repeat_1/block17_8/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block17_9/Branch_0/Conv2d_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat_1/block17_9/Branch_0/Conv2d_1x1/weights\n",
+      "InceptionResnetV2/block17_9/Branch_0/Conv2d_1x1/BatchNorm/beta -> InceptionResnetV2/Repeat_1/block17_9/Branch_0/Conv2d_1x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block17_9/Branch_1/Conv2d_0a_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat_1/block17_9/Branch_1/Conv2d_0a_1x1/weights\n",
+      "InceptionResnetV2/block17_9/Branch_1/Conv2d_0a_1x1/BatchNorm/beta -> InceptionResnetV2/Repeat_1/block17_9/Branch_1/Conv2d_0a_1x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block17_9/Branch_1/Conv2d_0b_1x7/Conv2D/kernel -> InceptionResnetV2/Repeat_1/block17_9/Branch_1/Conv2d_0b_1x7/weights\n",
+      "InceptionResnetV2/block17_9/Branch_1/Conv2d_0b_1x7/BatchNorm/beta -> InceptionResnetV2/Repeat_1/block17_9/Branch_1/Conv2d_0b_1x7/BatchNorm/beta\n",
+      "InceptionResnetV2/block17_9/Branch_1/Conv2d_0c_7x1/Conv2D/kernel -> InceptionResnetV2/Repeat_1/block17_9/Branch_1/Conv2d_0c_7x1/weights\n",
+      "InceptionResnetV2/block17_9/Branch_1/Conv2d_0c_7x1/BatchNorm/beta -> InceptionResnetV2/Repeat_1/block17_9/Branch_1/Conv2d_0c_7x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block17_9/Conv2d_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat_1/block17_9/Conv2d_1x1/weights\n",
+      "InceptionResnetV2/block17_9/Conv2d_1x1/Conv2D/bias -> InceptionResnetV2/Repeat_1/block17_9/Conv2d_1x1/biases\n",
+      "InceptionResnetV2/block17_9/Branch_0/Conv2d_1x1/BatchNorm/moving_mean -> InceptionResnetV2/Repeat_1/block17_9/Branch_0/Conv2d_1x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block17_9/Branch_0/Conv2d_1x1/BatchNorm/moving_variance -> InceptionResnetV2/Repeat_1/block17_9/Branch_0/Conv2d_1x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block17_9/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean -> InceptionResnetV2/Repeat_1/block17_9/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block17_9/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance -> InceptionResnetV2/Repeat_1/block17_9/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block17_9/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_mean -> InceptionResnetV2/Repeat_1/block17_9/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block17_9/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_variance -> InceptionResnetV2/Repeat_1/block17_9/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block17_9/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_mean -> InceptionResnetV2/Repeat_1/block17_9/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block17_9/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_variance -> InceptionResnetV2/Repeat_1/block17_9/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block17_10/Branch_0/Conv2d_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat_1/block17_10/Branch_0/Conv2d_1x1/weights\n",
+      "InceptionResnetV2/block17_10/Branch_0/Conv2d_1x1/BatchNorm/beta -> InceptionResnetV2/Repeat_1/block17_10/Branch_0/Conv2d_1x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block17_10/Branch_1/Conv2d_0a_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat_1/block17_10/Branch_1/Conv2d_0a_1x1/weights\n",
+      "InceptionResnetV2/block17_10/Branch_1/Conv2d_0a_1x1/BatchNorm/beta -> InceptionResnetV2/Repeat_1/block17_10/Branch_1/Conv2d_0a_1x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block17_10/Branch_1/Conv2d_0b_1x7/Conv2D/kernel -> InceptionResnetV2/Repeat_1/block17_10/Branch_1/Conv2d_0b_1x7/weights\n",
+      "InceptionResnetV2/block17_10/Branch_1/Conv2d_0b_1x7/BatchNorm/beta -> InceptionResnetV2/Repeat_1/block17_10/Branch_1/Conv2d_0b_1x7/BatchNorm/beta\n",
+      "InceptionResnetV2/block17_10/Branch_1/Conv2d_0c_7x1/Conv2D/kernel -> InceptionResnetV2/Repeat_1/block17_10/Branch_1/Conv2d_0c_7x1/weights\n",
+      "InceptionResnetV2/block17_10/Branch_1/Conv2d_0c_7x1/BatchNorm/beta -> InceptionResnetV2/Repeat_1/block17_10/Branch_1/Conv2d_0c_7x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block17_10/Conv2d_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat_1/block17_10/Conv2d_1x1/weights\n",
+      "InceptionResnetV2/block17_10/Conv2d_1x1/Conv2D/bias -> InceptionResnetV2/Repeat_1/block17_10/Conv2d_1x1/biases\n",
+      "InceptionResnetV2/block17_10/Branch_0/Conv2d_1x1/BatchNorm/moving_mean -> InceptionResnetV2/Repeat_1/block17_10/Branch_0/Conv2d_1x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block17_10/Branch_0/Conv2d_1x1/BatchNorm/moving_variance -> InceptionResnetV2/Repeat_1/block17_10/Branch_0/Conv2d_1x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block17_10/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean -> InceptionResnetV2/Repeat_1/block17_10/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block17_10/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance -> InceptionResnetV2/Repeat_1/block17_10/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block17_10/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_mean -> InceptionResnetV2/Repeat_1/block17_10/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block17_10/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_variance -> InceptionResnetV2/Repeat_1/block17_10/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block17_10/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_mean -> InceptionResnetV2/Repeat_1/block17_10/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block17_10/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_variance -> InceptionResnetV2/Repeat_1/block17_10/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block17_11/Branch_0/Conv2d_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat_1/block17_11/Branch_0/Conv2d_1x1/weights\n",
+      "InceptionResnetV2/block17_11/Branch_0/Conv2d_1x1/BatchNorm/beta -> InceptionResnetV2/Repeat_1/block17_11/Branch_0/Conv2d_1x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block17_11/Branch_1/Conv2d_0a_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat_1/block17_11/Branch_1/Conv2d_0a_1x1/weights\n",
+      "InceptionResnetV2/block17_11/Branch_1/Conv2d_0a_1x1/BatchNorm/beta -> InceptionResnetV2/Repeat_1/block17_11/Branch_1/Conv2d_0a_1x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block17_11/Branch_1/Conv2d_0b_1x7/Conv2D/kernel -> InceptionResnetV2/Repeat_1/block17_11/Branch_1/Conv2d_0b_1x7/weights\n",
+      "InceptionResnetV2/block17_11/Branch_1/Conv2d_0b_1x7/BatchNorm/beta -> InceptionResnetV2/Repeat_1/block17_11/Branch_1/Conv2d_0b_1x7/BatchNorm/beta\n",
+      "InceptionResnetV2/block17_11/Branch_1/Conv2d_0c_7x1/Conv2D/kernel -> InceptionResnetV2/Repeat_1/block17_11/Branch_1/Conv2d_0c_7x1/weights\n",
+      "InceptionResnetV2/block17_11/Branch_1/Conv2d_0c_7x1/BatchNorm/beta -> InceptionResnetV2/Repeat_1/block17_11/Branch_1/Conv2d_0c_7x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block17_11/Conv2d_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat_1/block17_11/Conv2d_1x1/weights\n",
+      "InceptionResnetV2/block17_11/Conv2d_1x1/Conv2D/bias -> InceptionResnetV2/Repeat_1/block17_11/Conv2d_1x1/biases\n",
+      "InceptionResnetV2/block17_11/Branch_0/Conv2d_1x1/BatchNorm/moving_mean -> InceptionResnetV2/Repeat_1/block17_11/Branch_0/Conv2d_1x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block17_11/Branch_0/Conv2d_1x1/BatchNorm/moving_variance -> InceptionResnetV2/Repeat_1/block17_11/Branch_0/Conv2d_1x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block17_11/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean -> InceptionResnetV2/Repeat_1/block17_11/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block17_11/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance -> InceptionResnetV2/Repeat_1/block17_11/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block17_11/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_mean -> InceptionResnetV2/Repeat_1/block17_11/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block17_11/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_variance -> InceptionResnetV2/Repeat_1/block17_11/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block17_11/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_mean -> InceptionResnetV2/Repeat_1/block17_11/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block17_11/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_variance -> InceptionResnetV2/Repeat_1/block17_11/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block17_12/Branch_0/Conv2d_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat_1/block17_12/Branch_0/Conv2d_1x1/weights\n",
+      "InceptionResnetV2/block17_12/Branch_0/Conv2d_1x1/BatchNorm/beta -> InceptionResnetV2/Repeat_1/block17_12/Branch_0/Conv2d_1x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block17_12/Branch_1/Conv2d_0a_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat_1/block17_12/Branch_1/Conv2d_0a_1x1/weights\n",
+      "InceptionResnetV2/block17_12/Branch_1/Conv2d_0a_1x1/BatchNorm/beta -> InceptionResnetV2/Repeat_1/block17_12/Branch_1/Conv2d_0a_1x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block17_12/Branch_1/Conv2d_0b_1x7/Conv2D/kernel -> InceptionResnetV2/Repeat_1/block17_12/Branch_1/Conv2d_0b_1x7/weights\n",
+      "InceptionResnetV2/block17_12/Branch_1/Conv2d_0b_1x7/BatchNorm/beta -> InceptionResnetV2/Repeat_1/block17_12/Branch_1/Conv2d_0b_1x7/BatchNorm/beta\n",
+      "InceptionResnetV2/block17_12/Branch_1/Conv2d_0c_7x1/Conv2D/kernel -> InceptionResnetV2/Repeat_1/block17_12/Branch_1/Conv2d_0c_7x1/weights\n",
+      "InceptionResnetV2/block17_12/Branch_1/Conv2d_0c_7x1/BatchNorm/beta -> InceptionResnetV2/Repeat_1/block17_12/Branch_1/Conv2d_0c_7x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block17_12/Conv2d_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat_1/block17_12/Conv2d_1x1/weights\n",
+      "InceptionResnetV2/block17_12/Conv2d_1x1/Conv2D/bias -> InceptionResnetV2/Repeat_1/block17_12/Conv2d_1x1/biases\n",
+      "InceptionResnetV2/block17_12/Branch_0/Conv2d_1x1/BatchNorm/moving_mean -> InceptionResnetV2/Repeat_1/block17_12/Branch_0/Conv2d_1x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block17_12/Branch_0/Conv2d_1x1/BatchNorm/moving_variance -> InceptionResnetV2/Repeat_1/block17_12/Branch_0/Conv2d_1x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block17_12/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean -> InceptionResnetV2/Repeat_1/block17_12/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block17_12/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance -> InceptionResnetV2/Repeat_1/block17_12/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block17_12/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_mean -> InceptionResnetV2/Repeat_1/block17_12/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block17_12/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_variance -> InceptionResnetV2/Repeat_1/block17_12/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block17_12/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_mean -> InceptionResnetV2/Repeat_1/block17_12/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block17_12/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_variance -> InceptionResnetV2/Repeat_1/block17_12/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block17_13/Branch_0/Conv2d_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat_1/block17_13/Branch_0/Conv2d_1x1/weights\n",
+      "InceptionResnetV2/block17_13/Branch_0/Conv2d_1x1/BatchNorm/beta -> InceptionResnetV2/Repeat_1/block17_13/Branch_0/Conv2d_1x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block17_13/Branch_1/Conv2d_0a_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat_1/block17_13/Branch_1/Conv2d_0a_1x1/weights\n",
+      "InceptionResnetV2/block17_13/Branch_1/Conv2d_0a_1x1/BatchNorm/beta -> InceptionResnetV2/Repeat_1/block17_13/Branch_1/Conv2d_0a_1x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block17_13/Branch_1/Conv2d_0b_1x7/Conv2D/kernel -> InceptionResnetV2/Repeat_1/block17_13/Branch_1/Conv2d_0b_1x7/weights\n",
+      "InceptionResnetV2/block17_13/Branch_1/Conv2d_0b_1x7/BatchNorm/beta -> InceptionResnetV2/Repeat_1/block17_13/Branch_1/Conv2d_0b_1x7/BatchNorm/beta\n",
+      "InceptionResnetV2/block17_13/Branch_1/Conv2d_0c_7x1/Conv2D/kernel -> InceptionResnetV2/Repeat_1/block17_13/Branch_1/Conv2d_0c_7x1/weights\n",
+      "InceptionResnetV2/block17_13/Branch_1/Conv2d_0c_7x1/BatchNorm/beta -> InceptionResnetV2/Repeat_1/block17_13/Branch_1/Conv2d_0c_7x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block17_13/Conv2d_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat_1/block17_13/Conv2d_1x1/weights\n",
+      "InceptionResnetV2/block17_13/Conv2d_1x1/Conv2D/bias -> InceptionResnetV2/Repeat_1/block17_13/Conv2d_1x1/biases\n",
+      "InceptionResnetV2/block17_13/Branch_0/Conv2d_1x1/BatchNorm/moving_mean -> InceptionResnetV2/Repeat_1/block17_13/Branch_0/Conv2d_1x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block17_13/Branch_0/Conv2d_1x1/BatchNorm/moving_variance -> InceptionResnetV2/Repeat_1/block17_13/Branch_0/Conv2d_1x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block17_13/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean -> InceptionResnetV2/Repeat_1/block17_13/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block17_13/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance -> InceptionResnetV2/Repeat_1/block17_13/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block17_13/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_mean -> InceptionResnetV2/Repeat_1/block17_13/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block17_13/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_variance -> InceptionResnetV2/Repeat_1/block17_13/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block17_13/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_mean -> InceptionResnetV2/Repeat_1/block17_13/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block17_13/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_variance -> InceptionResnetV2/Repeat_1/block17_13/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block17_14/Branch_0/Conv2d_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat_1/block17_14/Branch_0/Conv2d_1x1/weights\n",
+      "InceptionResnetV2/block17_14/Branch_0/Conv2d_1x1/BatchNorm/beta -> InceptionResnetV2/Repeat_1/block17_14/Branch_0/Conv2d_1x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block17_14/Branch_1/Conv2d_0a_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat_1/block17_14/Branch_1/Conv2d_0a_1x1/weights\n",
+      "InceptionResnetV2/block17_14/Branch_1/Conv2d_0a_1x1/BatchNorm/beta -> InceptionResnetV2/Repeat_1/block17_14/Branch_1/Conv2d_0a_1x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block17_14/Branch_1/Conv2d_0b_1x7/Conv2D/kernel -> InceptionResnetV2/Repeat_1/block17_14/Branch_1/Conv2d_0b_1x7/weights\n",
+      "InceptionResnetV2/block17_14/Branch_1/Conv2d_0b_1x7/BatchNorm/beta -> InceptionResnetV2/Repeat_1/block17_14/Branch_1/Conv2d_0b_1x7/BatchNorm/beta\n",
+      "InceptionResnetV2/block17_14/Branch_1/Conv2d_0c_7x1/Conv2D/kernel -> InceptionResnetV2/Repeat_1/block17_14/Branch_1/Conv2d_0c_7x1/weights\n",
+      "InceptionResnetV2/block17_14/Branch_1/Conv2d_0c_7x1/BatchNorm/beta -> InceptionResnetV2/Repeat_1/block17_14/Branch_1/Conv2d_0c_7x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block17_14/Conv2d_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat_1/block17_14/Conv2d_1x1/weights\n",
+      "InceptionResnetV2/block17_14/Conv2d_1x1/Conv2D/bias -> InceptionResnetV2/Repeat_1/block17_14/Conv2d_1x1/biases\n",
+      "InceptionResnetV2/block17_14/Branch_0/Conv2d_1x1/BatchNorm/moving_mean -> InceptionResnetV2/Repeat_1/block17_14/Branch_0/Conv2d_1x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block17_14/Branch_0/Conv2d_1x1/BatchNorm/moving_variance -> InceptionResnetV2/Repeat_1/block17_14/Branch_0/Conv2d_1x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block17_14/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean -> InceptionResnetV2/Repeat_1/block17_14/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block17_14/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance -> InceptionResnetV2/Repeat_1/block17_14/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block17_14/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_mean -> InceptionResnetV2/Repeat_1/block17_14/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block17_14/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_variance -> InceptionResnetV2/Repeat_1/block17_14/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block17_14/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_mean -> InceptionResnetV2/Repeat_1/block17_14/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block17_14/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_variance -> InceptionResnetV2/Repeat_1/block17_14/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block17_15/Branch_0/Conv2d_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat_1/block17_15/Branch_0/Conv2d_1x1/weights\n",
+      "InceptionResnetV2/block17_15/Branch_0/Conv2d_1x1/BatchNorm/beta -> InceptionResnetV2/Repeat_1/block17_15/Branch_0/Conv2d_1x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block17_15/Branch_1/Conv2d_0a_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat_1/block17_15/Branch_1/Conv2d_0a_1x1/weights\n",
+      "InceptionResnetV2/block17_15/Branch_1/Conv2d_0a_1x1/BatchNorm/beta -> InceptionResnetV2/Repeat_1/block17_15/Branch_1/Conv2d_0a_1x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block17_15/Branch_1/Conv2d_0b_1x7/Conv2D/kernel -> InceptionResnetV2/Repeat_1/block17_15/Branch_1/Conv2d_0b_1x7/weights\n",
+      "InceptionResnetV2/block17_15/Branch_1/Conv2d_0b_1x7/BatchNorm/beta -> InceptionResnetV2/Repeat_1/block17_15/Branch_1/Conv2d_0b_1x7/BatchNorm/beta\n",
+      "InceptionResnetV2/block17_15/Branch_1/Conv2d_0c_7x1/Conv2D/kernel -> InceptionResnetV2/Repeat_1/block17_15/Branch_1/Conv2d_0c_7x1/weights\n",
+      "InceptionResnetV2/block17_15/Branch_1/Conv2d_0c_7x1/BatchNorm/beta -> InceptionResnetV2/Repeat_1/block17_15/Branch_1/Conv2d_0c_7x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block17_15/Conv2d_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat_1/block17_15/Conv2d_1x1/weights\n",
+      "InceptionResnetV2/block17_15/Conv2d_1x1/Conv2D/bias -> InceptionResnetV2/Repeat_1/block17_15/Conv2d_1x1/biases\n",
+      "InceptionResnetV2/block17_15/Branch_0/Conv2d_1x1/BatchNorm/moving_mean -> InceptionResnetV2/Repeat_1/block17_15/Branch_0/Conv2d_1x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block17_15/Branch_0/Conv2d_1x1/BatchNorm/moving_variance -> InceptionResnetV2/Repeat_1/block17_15/Branch_0/Conv2d_1x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block17_15/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean -> InceptionResnetV2/Repeat_1/block17_15/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block17_15/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance -> InceptionResnetV2/Repeat_1/block17_15/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block17_15/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_mean -> InceptionResnetV2/Repeat_1/block17_15/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block17_15/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_variance -> InceptionResnetV2/Repeat_1/block17_15/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block17_15/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_mean -> InceptionResnetV2/Repeat_1/block17_15/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block17_15/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_variance -> InceptionResnetV2/Repeat_1/block17_15/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block17_16/Branch_0/Conv2d_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat_1/block17_16/Branch_0/Conv2d_1x1/weights\n",
+      "InceptionResnetV2/block17_16/Branch_0/Conv2d_1x1/BatchNorm/beta -> InceptionResnetV2/Repeat_1/block17_16/Branch_0/Conv2d_1x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block17_16/Branch_1/Conv2d_0a_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat_1/block17_16/Branch_1/Conv2d_0a_1x1/weights\n",
+      "InceptionResnetV2/block17_16/Branch_1/Conv2d_0a_1x1/BatchNorm/beta -> InceptionResnetV2/Repeat_1/block17_16/Branch_1/Conv2d_0a_1x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block17_16/Branch_1/Conv2d_0b_1x7/Conv2D/kernel -> InceptionResnetV2/Repeat_1/block17_16/Branch_1/Conv2d_0b_1x7/weights\n",
+      "InceptionResnetV2/block17_16/Branch_1/Conv2d_0b_1x7/BatchNorm/beta -> InceptionResnetV2/Repeat_1/block17_16/Branch_1/Conv2d_0b_1x7/BatchNorm/beta\n",
+      "InceptionResnetV2/block17_16/Branch_1/Conv2d_0c_7x1/Conv2D/kernel -> InceptionResnetV2/Repeat_1/block17_16/Branch_1/Conv2d_0c_7x1/weights\n",
+      "InceptionResnetV2/block17_16/Branch_1/Conv2d_0c_7x1/BatchNorm/beta -> InceptionResnetV2/Repeat_1/block17_16/Branch_1/Conv2d_0c_7x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block17_16/Conv2d_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat_1/block17_16/Conv2d_1x1/weights\n",
+      "InceptionResnetV2/block17_16/Conv2d_1x1/Conv2D/bias -> InceptionResnetV2/Repeat_1/block17_16/Conv2d_1x1/biases\n",
+      "InceptionResnetV2/block17_16/Branch_0/Conv2d_1x1/BatchNorm/moving_mean -> InceptionResnetV2/Repeat_1/block17_16/Branch_0/Conv2d_1x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block17_16/Branch_0/Conv2d_1x1/BatchNorm/moving_variance -> InceptionResnetV2/Repeat_1/block17_16/Branch_0/Conv2d_1x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block17_16/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean -> InceptionResnetV2/Repeat_1/block17_16/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block17_16/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance -> InceptionResnetV2/Repeat_1/block17_16/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block17_16/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_mean -> InceptionResnetV2/Repeat_1/block17_16/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block17_16/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_variance -> InceptionResnetV2/Repeat_1/block17_16/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block17_16/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_mean -> InceptionResnetV2/Repeat_1/block17_16/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block17_16/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_variance -> InceptionResnetV2/Repeat_1/block17_16/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block17_17/Branch_0/Conv2d_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat_1/block17_17/Branch_0/Conv2d_1x1/weights\n",
+      "InceptionResnetV2/block17_17/Branch_0/Conv2d_1x1/BatchNorm/beta -> InceptionResnetV2/Repeat_1/block17_17/Branch_0/Conv2d_1x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block17_17/Branch_1/Conv2d_0a_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat_1/block17_17/Branch_1/Conv2d_0a_1x1/weights\n",
+      "InceptionResnetV2/block17_17/Branch_1/Conv2d_0a_1x1/BatchNorm/beta -> InceptionResnetV2/Repeat_1/block17_17/Branch_1/Conv2d_0a_1x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block17_17/Branch_1/Conv2d_0b_1x7/Conv2D/kernel -> InceptionResnetV2/Repeat_1/block17_17/Branch_1/Conv2d_0b_1x7/weights\n",
+      "InceptionResnetV2/block17_17/Branch_1/Conv2d_0b_1x7/BatchNorm/beta -> InceptionResnetV2/Repeat_1/block17_17/Branch_1/Conv2d_0b_1x7/BatchNorm/beta\n",
+      "InceptionResnetV2/block17_17/Branch_1/Conv2d_0c_7x1/Conv2D/kernel -> InceptionResnetV2/Repeat_1/block17_17/Branch_1/Conv2d_0c_7x1/weights\n",
+      "InceptionResnetV2/block17_17/Branch_1/Conv2d_0c_7x1/BatchNorm/beta -> InceptionResnetV2/Repeat_1/block17_17/Branch_1/Conv2d_0c_7x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block17_17/Conv2d_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat_1/block17_17/Conv2d_1x1/weights\n",
+      "InceptionResnetV2/block17_17/Conv2d_1x1/Conv2D/bias -> InceptionResnetV2/Repeat_1/block17_17/Conv2d_1x1/biases\n",
+      "InceptionResnetV2/block17_17/Branch_0/Conv2d_1x1/BatchNorm/moving_mean -> InceptionResnetV2/Repeat_1/block17_17/Branch_0/Conv2d_1x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block17_17/Branch_0/Conv2d_1x1/BatchNorm/moving_variance -> InceptionResnetV2/Repeat_1/block17_17/Branch_0/Conv2d_1x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block17_17/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean -> InceptionResnetV2/Repeat_1/block17_17/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block17_17/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance -> InceptionResnetV2/Repeat_1/block17_17/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block17_17/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_mean -> InceptionResnetV2/Repeat_1/block17_17/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block17_17/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_variance -> InceptionResnetV2/Repeat_1/block17_17/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block17_17/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_mean -> InceptionResnetV2/Repeat_1/block17_17/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block17_17/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_variance -> InceptionResnetV2/Repeat_1/block17_17/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block17_18/Branch_0/Conv2d_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat_1/block17_18/Branch_0/Conv2d_1x1/weights\n",
+      "InceptionResnetV2/block17_18/Branch_0/Conv2d_1x1/BatchNorm/beta -> InceptionResnetV2/Repeat_1/block17_18/Branch_0/Conv2d_1x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block17_18/Branch_1/Conv2d_0a_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat_1/block17_18/Branch_1/Conv2d_0a_1x1/weights\n",
+      "InceptionResnetV2/block17_18/Branch_1/Conv2d_0a_1x1/BatchNorm/beta -> InceptionResnetV2/Repeat_1/block17_18/Branch_1/Conv2d_0a_1x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block17_18/Branch_1/Conv2d_0b_1x7/Conv2D/kernel -> InceptionResnetV2/Repeat_1/block17_18/Branch_1/Conv2d_0b_1x7/weights\n",
+      "InceptionResnetV2/block17_18/Branch_1/Conv2d_0b_1x7/BatchNorm/beta -> InceptionResnetV2/Repeat_1/block17_18/Branch_1/Conv2d_0b_1x7/BatchNorm/beta\n",
+      "InceptionResnetV2/block17_18/Branch_1/Conv2d_0c_7x1/Conv2D/kernel -> InceptionResnetV2/Repeat_1/block17_18/Branch_1/Conv2d_0c_7x1/weights\n",
+      "InceptionResnetV2/block17_18/Branch_1/Conv2d_0c_7x1/BatchNorm/beta -> InceptionResnetV2/Repeat_1/block17_18/Branch_1/Conv2d_0c_7x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block17_18/Conv2d_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat_1/block17_18/Conv2d_1x1/weights\n",
+      "InceptionResnetV2/block17_18/Conv2d_1x1/Conv2D/bias -> InceptionResnetV2/Repeat_1/block17_18/Conv2d_1x1/biases\n",
+      "InceptionResnetV2/block17_18/Branch_0/Conv2d_1x1/BatchNorm/moving_mean -> InceptionResnetV2/Repeat_1/block17_18/Branch_0/Conv2d_1x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block17_18/Branch_0/Conv2d_1x1/BatchNorm/moving_variance -> InceptionResnetV2/Repeat_1/block17_18/Branch_0/Conv2d_1x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block17_18/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean -> InceptionResnetV2/Repeat_1/block17_18/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block17_18/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance -> InceptionResnetV2/Repeat_1/block17_18/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block17_18/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_mean -> InceptionResnetV2/Repeat_1/block17_18/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block17_18/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_variance -> InceptionResnetV2/Repeat_1/block17_18/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block17_18/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_mean -> InceptionResnetV2/Repeat_1/block17_18/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block17_18/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_variance -> InceptionResnetV2/Repeat_1/block17_18/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block17_19/Branch_0/Conv2d_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat_1/block17_19/Branch_0/Conv2d_1x1/weights\n",
+      "InceptionResnetV2/block17_19/Branch_0/Conv2d_1x1/BatchNorm/beta -> InceptionResnetV2/Repeat_1/block17_19/Branch_0/Conv2d_1x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block17_19/Branch_1/Conv2d_0a_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat_1/block17_19/Branch_1/Conv2d_0a_1x1/weights\n",
+      "InceptionResnetV2/block17_19/Branch_1/Conv2d_0a_1x1/BatchNorm/beta -> InceptionResnetV2/Repeat_1/block17_19/Branch_1/Conv2d_0a_1x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block17_19/Branch_1/Conv2d_0b_1x7/Conv2D/kernel -> InceptionResnetV2/Repeat_1/block17_19/Branch_1/Conv2d_0b_1x7/weights\n",
+      "InceptionResnetV2/block17_19/Branch_1/Conv2d_0b_1x7/BatchNorm/beta -> InceptionResnetV2/Repeat_1/block17_19/Branch_1/Conv2d_0b_1x7/BatchNorm/beta\n",
+      "InceptionResnetV2/block17_19/Branch_1/Conv2d_0c_7x1/Conv2D/kernel -> InceptionResnetV2/Repeat_1/block17_19/Branch_1/Conv2d_0c_7x1/weights\n",
+      "InceptionResnetV2/block17_19/Branch_1/Conv2d_0c_7x1/BatchNorm/beta -> InceptionResnetV2/Repeat_1/block17_19/Branch_1/Conv2d_0c_7x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block17_19/Conv2d_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat_1/block17_19/Conv2d_1x1/weights\n",
+      "InceptionResnetV2/block17_19/Conv2d_1x1/Conv2D/bias -> InceptionResnetV2/Repeat_1/block17_19/Conv2d_1x1/biases\n",
+      "InceptionResnetV2/block17_19/Branch_0/Conv2d_1x1/BatchNorm/moving_mean -> InceptionResnetV2/Repeat_1/block17_19/Branch_0/Conv2d_1x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block17_19/Branch_0/Conv2d_1x1/BatchNorm/moving_variance -> InceptionResnetV2/Repeat_1/block17_19/Branch_0/Conv2d_1x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block17_19/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean -> InceptionResnetV2/Repeat_1/block17_19/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block17_19/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance -> InceptionResnetV2/Repeat_1/block17_19/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block17_19/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_mean -> InceptionResnetV2/Repeat_1/block17_19/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block17_19/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_variance -> InceptionResnetV2/Repeat_1/block17_19/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block17_19/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_mean -> InceptionResnetV2/Repeat_1/block17_19/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block17_19/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_variance -> InceptionResnetV2/Repeat_1/block17_19/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block17_20/Branch_0/Conv2d_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat_1/block17_20/Branch_0/Conv2d_1x1/weights\n",
+      "InceptionResnetV2/block17_20/Branch_0/Conv2d_1x1/BatchNorm/beta -> InceptionResnetV2/Repeat_1/block17_20/Branch_0/Conv2d_1x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block17_20/Branch_1/Conv2d_0a_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat_1/block17_20/Branch_1/Conv2d_0a_1x1/weights\n",
+      "InceptionResnetV2/block17_20/Branch_1/Conv2d_0a_1x1/BatchNorm/beta -> InceptionResnetV2/Repeat_1/block17_20/Branch_1/Conv2d_0a_1x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block17_20/Branch_1/Conv2d_0b_1x7/Conv2D/kernel -> InceptionResnetV2/Repeat_1/block17_20/Branch_1/Conv2d_0b_1x7/weights\n",
+      "InceptionResnetV2/block17_20/Branch_1/Conv2d_0b_1x7/BatchNorm/beta -> InceptionResnetV2/Repeat_1/block17_20/Branch_1/Conv2d_0b_1x7/BatchNorm/beta\n",
+      "InceptionResnetV2/block17_20/Branch_1/Conv2d_0c_7x1/Conv2D/kernel -> InceptionResnetV2/Repeat_1/block17_20/Branch_1/Conv2d_0c_7x1/weights\n",
+      "InceptionResnetV2/block17_20/Branch_1/Conv2d_0c_7x1/BatchNorm/beta -> InceptionResnetV2/Repeat_1/block17_20/Branch_1/Conv2d_0c_7x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block17_20/Conv2d_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat_1/block17_20/Conv2d_1x1/weights\n",
+      "InceptionResnetV2/block17_20/Conv2d_1x1/Conv2D/bias -> InceptionResnetV2/Repeat_1/block17_20/Conv2d_1x1/biases\n",
+      "InceptionResnetV2/block17_20/Branch_0/Conv2d_1x1/BatchNorm/moving_mean -> InceptionResnetV2/Repeat_1/block17_20/Branch_0/Conv2d_1x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block17_20/Branch_0/Conv2d_1x1/BatchNorm/moving_variance -> InceptionResnetV2/Repeat_1/block17_20/Branch_0/Conv2d_1x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block17_20/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean -> InceptionResnetV2/Repeat_1/block17_20/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block17_20/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance -> InceptionResnetV2/Repeat_1/block17_20/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block17_20/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_mean -> InceptionResnetV2/Repeat_1/block17_20/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block17_20/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_variance -> InceptionResnetV2/Repeat_1/block17_20/Branch_1/Conv2d_0b_1x7/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block17_20/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_mean -> InceptionResnetV2/Repeat_1/block17_20/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block17_20/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_variance -> InceptionResnetV2/Repeat_1/block17_20/Branch_1/Conv2d_0c_7x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/Mixed_7a/Branch_0/Conv2d_0a_1x1/Conv2D/kernel -> InceptionResnetV2/Mixed_7a/Branch_0/Conv2d_0a_1x1/weights\n",
+      "InceptionResnetV2/Mixed_7a/Branch_0/Conv2d_0a_1x1/BatchNorm/beta -> InceptionResnetV2/Mixed_7a/Branch_0/Conv2d_0a_1x1/BatchNorm/beta\n",
+      "InceptionResnetV2/Mixed_7a/Branch_0/Conv2d_1a_3x3/Conv2D/kernel -> InceptionResnetV2/Mixed_7a/Branch_0/Conv2d_1a_3x3/weights\n",
+      "InceptionResnetV2/Mixed_7a/Branch_0/Conv2d_1a_3x3/BatchNorm/beta -> InceptionResnetV2/Mixed_7a/Branch_0/Conv2d_1a_3x3/BatchNorm/beta\n",
+      "InceptionResnetV2/Mixed_7a/Branch_1/Conv2d_0a_1x1/Conv2D/kernel -> InceptionResnetV2/Mixed_7a/Branch_1/Conv2d_0a_1x1/weights\n",
+      "InceptionResnetV2/Mixed_7a/Branch_1/Conv2d_0a_1x1/BatchNorm/beta -> InceptionResnetV2/Mixed_7a/Branch_1/Conv2d_0a_1x1/BatchNorm/beta\n",
+      "InceptionResnetV2/Mixed_7a/Branch_1/Conv2d_1a_3x3/Conv2D/kernel -> InceptionResnetV2/Mixed_7a/Branch_1/Conv2d_1a_3x3/weights\n",
+      "InceptionResnetV2/Mixed_7a/Branch_1/Conv2d_1a_3x3/BatchNorm/beta -> InceptionResnetV2/Mixed_7a/Branch_1/Conv2d_1a_3x3/BatchNorm/beta\n",
+      "InceptionResnetV2/Mixed_7a/Branch_2/Conv2d_0a_1x1/Conv2D/kernel -> InceptionResnetV2/Mixed_7a/Branch_2/Conv2d_0a_1x1/weights\n",
+      "InceptionResnetV2/Mixed_7a/Branch_2/Conv2d_0a_1x1/BatchNorm/beta -> InceptionResnetV2/Mixed_7a/Branch_2/Conv2d_0a_1x1/BatchNorm/beta\n",
+      "InceptionResnetV2/Mixed_7a/Branch_2/Conv2d_0b_3x3/Conv2D/kernel -> InceptionResnetV2/Mixed_7a/Branch_2/Conv2d_0b_3x3/weights\n",
+      "InceptionResnetV2/Mixed_7a/Branch_2/Conv2d_0b_3x3/BatchNorm/beta -> InceptionResnetV2/Mixed_7a/Branch_2/Conv2d_0b_3x3/BatchNorm/beta\n",
+      "InceptionResnetV2/Mixed_7a/Branch_2/Conv2d_1a_3x3/Conv2D/kernel -> InceptionResnetV2/Mixed_7a/Branch_2/Conv2d_1a_3x3/weights\n",
+      "InceptionResnetV2/Mixed_7a/Branch_2/Conv2d_1a_3x3/BatchNorm/beta -> InceptionResnetV2/Mixed_7a/Branch_2/Conv2d_1a_3x3/BatchNorm/beta\n",
+      "InceptionResnetV2/Mixed_7a/Branch_0/Conv2d_0a_1x1/BatchNorm/moving_mean -> InceptionResnetV2/Mixed_7a/Branch_0/Conv2d_0a_1x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/Mixed_7a/Branch_0/Conv2d_0a_1x1/BatchNorm/moving_variance -> InceptionResnetV2/Mixed_7a/Branch_0/Conv2d_0a_1x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/Mixed_7a/Branch_0/Conv2d_1a_3x3/BatchNorm/moving_mean -> InceptionResnetV2/Mixed_7a/Branch_0/Conv2d_1a_3x3/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/Mixed_7a/Branch_0/Conv2d_1a_3x3/BatchNorm/moving_variance -> InceptionResnetV2/Mixed_7a/Branch_0/Conv2d_1a_3x3/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/Mixed_7a/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean -> InceptionResnetV2/Mixed_7a/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/Mixed_7a/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance -> InceptionResnetV2/Mixed_7a/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/Mixed_7a/Branch_1/Conv2d_1a_3x3/BatchNorm/moving_mean -> InceptionResnetV2/Mixed_7a/Branch_1/Conv2d_1a_3x3/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/Mixed_7a/Branch_1/Conv2d_1a_3x3/BatchNorm/moving_variance -> InceptionResnetV2/Mixed_7a/Branch_1/Conv2d_1a_3x3/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/Mixed_7a/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_mean -> InceptionResnetV2/Mixed_7a/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/Mixed_7a/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_variance -> InceptionResnetV2/Mixed_7a/Branch_2/Conv2d_0a_1x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/Mixed_7a/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_mean -> InceptionResnetV2/Mixed_7a/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/Mixed_7a/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_variance -> InceptionResnetV2/Mixed_7a/Branch_2/Conv2d_0b_3x3/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/Mixed_7a/Branch_2/Conv2d_1a_3x3/BatchNorm/moving_mean -> InceptionResnetV2/Mixed_7a/Branch_2/Conv2d_1a_3x3/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/Mixed_7a/Branch_2/Conv2d_1a_3x3/BatchNorm/moving_variance -> InceptionResnetV2/Mixed_7a/Branch_2/Conv2d_1a_3x3/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block8_1/Branch_0/Conv2d_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat_2/block8_1/Branch_0/Conv2d_1x1/weights\n",
+      "InceptionResnetV2/block8_1/Branch_0/Conv2d_1x1/BatchNorm/beta -> InceptionResnetV2/Repeat_2/block8_1/Branch_0/Conv2d_1x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block8_1/Branch_1/Conv2d_0a_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat_2/block8_1/Branch_1/Conv2d_0a_1x1/weights\n",
+      "InceptionResnetV2/block8_1/Branch_1/Conv2d_0a_1x1/BatchNorm/beta -> InceptionResnetV2/Repeat_2/block8_1/Branch_1/Conv2d_0a_1x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block8_1/Branch_1/Conv2d_0b_1x3/Conv2D/kernel -> InceptionResnetV2/Repeat_2/block8_1/Branch_1/Conv2d_0b_1x3/weights\n",
+      "InceptionResnetV2/block8_1/Branch_1/Conv2d_0b_1x3/BatchNorm/beta -> InceptionResnetV2/Repeat_2/block8_1/Branch_1/Conv2d_0b_1x3/BatchNorm/beta\n",
+      "InceptionResnetV2/block8_1/Branch_1/Conv2d_0c_3x1/Conv2D/kernel -> InceptionResnetV2/Repeat_2/block8_1/Branch_1/Conv2d_0c_3x1/weights\n",
+      "InceptionResnetV2/block8_1/Branch_1/Conv2d_0c_3x1/BatchNorm/beta -> InceptionResnetV2/Repeat_2/block8_1/Branch_1/Conv2d_0c_3x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block8_1/Conv2d_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat_2/block8_1/Conv2d_1x1/weights\n",
+      "InceptionResnetV2/block8_1/Conv2d_1x1/Conv2D/bias -> InceptionResnetV2/Repeat_2/block8_1/Conv2d_1x1/biases\n",
+      "InceptionResnetV2/block8_1/Branch_0/Conv2d_1x1/BatchNorm/moving_mean -> InceptionResnetV2/Repeat_2/block8_1/Branch_0/Conv2d_1x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block8_1/Branch_0/Conv2d_1x1/BatchNorm/moving_variance -> InceptionResnetV2/Repeat_2/block8_1/Branch_0/Conv2d_1x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block8_1/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean -> InceptionResnetV2/Repeat_2/block8_1/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block8_1/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance -> InceptionResnetV2/Repeat_2/block8_1/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block8_1/Branch_1/Conv2d_0b_1x3/BatchNorm/moving_mean -> InceptionResnetV2/Repeat_2/block8_1/Branch_1/Conv2d_0b_1x3/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block8_1/Branch_1/Conv2d_0b_1x3/BatchNorm/moving_variance -> InceptionResnetV2/Repeat_2/block8_1/Branch_1/Conv2d_0b_1x3/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block8_1/Branch_1/Conv2d_0c_3x1/BatchNorm/moving_mean -> InceptionResnetV2/Repeat_2/block8_1/Branch_1/Conv2d_0c_3x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block8_1/Branch_1/Conv2d_0c_3x1/BatchNorm/moving_variance -> InceptionResnetV2/Repeat_2/block8_1/Branch_1/Conv2d_0c_3x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block8_2/Branch_0/Conv2d_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat_2/block8_2/Branch_0/Conv2d_1x1/weights\n",
+      "InceptionResnetV2/block8_2/Branch_0/Conv2d_1x1/BatchNorm/beta -> InceptionResnetV2/Repeat_2/block8_2/Branch_0/Conv2d_1x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block8_2/Branch_1/Conv2d_0a_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat_2/block8_2/Branch_1/Conv2d_0a_1x1/weights\n",
+      "InceptionResnetV2/block8_2/Branch_1/Conv2d_0a_1x1/BatchNorm/beta -> InceptionResnetV2/Repeat_2/block8_2/Branch_1/Conv2d_0a_1x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block8_2/Branch_1/Conv2d_0b_1x3/Conv2D/kernel -> InceptionResnetV2/Repeat_2/block8_2/Branch_1/Conv2d_0b_1x3/weights\n",
+      "InceptionResnetV2/block8_2/Branch_1/Conv2d_0b_1x3/BatchNorm/beta -> InceptionResnetV2/Repeat_2/block8_2/Branch_1/Conv2d_0b_1x3/BatchNorm/beta\n",
+      "InceptionResnetV2/block8_2/Branch_1/Conv2d_0c_3x1/Conv2D/kernel -> InceptionResnetV2/Repeat_2/block8_2/Branch_1/Conv2d_0c_3x1/weights\n",
+      "InceptionResnetV2/block8_2/Branch_1/Conv2d_0c_3x1/BatchNorm/beta -> InceptionResnetV2/Repeat_2/block8_2/Branch_1/Conv2d_0c_3x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block8_2/Conv2d_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat_2/block8_2/Conv2d_1x1/weights\n",
+      "InceptionResnetV2/block8_2/Conv2d_1x1/Conv2D/bias -> InceptionResnetV2/Repeat_2/block8_2/Conv2d_1x1/biases\n",
+      "InceptionResnetV2/block8_2/Branch_0/Conv2d_1x1/BatchNorm/moving_mean -> InceptionResnetV2/Repeat_2/block8_2/Branch_0/Conv2d_1x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block8_2/Branch_0/Conv2d_1x1/BatchNorm/moving_variance -> InceptionResnetV2/Repeat_2/block8_2/Branch_0/Conv2d_1x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block8_2/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean -> InceptionResnetV2/Repeat_2/block8_2/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block8_2/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance -> InceptionResnetV2/Repeat_2/block8_2/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block8_2/Branch_1/Conv2d_0b_1x3/BatchNorm/moving_mean -> InceptionResnetV2/Repeat_2/block8_2/Branch_1/Conv2d_0b_1x3/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block8_2/Branch_1/Conv2d_0b_1x3/BatchNorm/moving_variance -> InceptionResnetV2/Repeat_2/block8_2/Branch_1/Conv2d_0b_1x3/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block8_2/Branch_1/Conv2d_0c_3x1/BatchNorm/moving_mean -> InceptionResnetV2/Repeat_2/block8_2/Branch_1/Conv2d_0c_3x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block8_2/Branch_1/Conv2d_0c_3x1/BatchNorm/moving_variance -> InceptionResnetV2/Repeat_2/block8_2/Branch_1/Conv2d_0c_3x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block8_3/Branch_0/Conv2d_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat_2/block8_3/Branch_0/Conv2d_1x1/weights\n",
+      "InceptionResnetV2/block8_3/Branch_0/Conv2d_1x1/BatchNorm/beta -> InceptionResnetV2/Repeat_2/block8_3/Branch_0/Conv2d_1x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block8_3/Branch_1/Conv2d_0a_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat_2/block8_3/Branch_1/Conv2d_0a_1x1/weights\n",
+      "InceptionResnetV2/block8_3/Branch_1/Conv2d_0a_1x1/BatchNorm/beta -> InceptionResnetV2/Repeat_2/block8_3/Branch_1/Conv2d_0a_1x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block8_3/Branch_1/Conv2d_0b_1x3/Conv2D/kernel -> InceptionResnetV2/Repeat_2/block8_3/Branch_1/Conv2d_0b_1x3/weights\n",
+      "InceptionResnetV2/block8_3/Branch_1/Conv2d_0b_1x3/BatchNorm/beta -> InceptionResnetV2/Repeat_2/block8_3/Branch_1/Conv2d_0b_1x3/BatchNorm/beta\n",
+      "InceptionResnetV2/block8_3/Branch_1/Conv2d_0c_3x1/Conv2D/kernel -> InceptionResnetV2/Repeat_2/block8_3/Branch_1/Conv2d_0c_3x1/weights\n",
+      "InceptionResnetV2/block8_3/Branch_1/Conv2d_0c_3x1/BatchNorm/beta -> InceptionResnetV2/Repeat_2/block8_3/Branch_1/Conv2d_0c_3x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block8_3/Conv2d_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat_2/block8_3/Conv2d_1x1/weights\n",
+      "InceptionResnetV2/block8_3/Conv2d_1x1/Conv2D/bias -> InceptionResnetV2/Repeat_2/block8_3/Conv2d_1x1/biases\n",
+      "InceptionResnetV2/block8_3/Branch_0/Conv2d_1x1/BatchNorm/moving_mean -> InceptionResnetV2/Repeat_2/block8_3/Branch_0/Conv2d_1x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block8_3/Branch_0/Conv2d_1x1/BatchNorm/moving_variance -> InceptionResnetV2/Repeat_2/block8_3/Branch_0/Conv2d_1x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block8_3/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean -> InceptionResnetV2/Repeat_2/block8_3/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block8_3/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance -> InceptionResnetV2/Repeat_2/block8_3/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block8_3/Branch_1/Conv2d_0b_1x3/BatchNorm/moving_mean -> InceptionResnetV2/Repeat_2/block8_3/Branch_1/Conv2d_0b_1x3/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block8_3/Branch_1/Conv2d_0b_1x3/BatchNorm/moving_variance -> InceptionResnetV2/Repeat_2/block8_3/Branch_1/Conv2d_0b_1x3/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block8_3/Branch_1/Conv2d_0c_3x1/BatchNorm/moving_mean -> InceptionResnetV2/Repeat_2/block8_3/Branch_1/Conv2d_0c_3x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block8_3/Branch_1/Conv2d_0c_3x1/BatchNorm/moving_variance -> InceptionResnetV2/Repeat_2/block8_3/Branch_1/Conv2d_0c_3x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block8_4/Branch_0/Conv2d_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat_2/block8_4/Branch_0/Conv2d_1x1/weights\n",
+      "InceptionResnetV2/block8_4/Branch_0/Conv2d_1x1/BatchNorm/beta -> InceptionResnetV2/Repeat_2/block8_4/Branch_0/Conv2d_1x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block8_4/Branch_1/Conv2d_0a_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat_2/block8_4/Branch_1/Conv2d_0a_1x1/weights\n",
+      "InceptionResnetV2/block8_4/Branch_1/Conv2d_0a_1x1/BatchNorm/beta -> InceptionResnetV2/Repeat_2/block8_4/Branch_1/Conv2d_0a_1x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block8_4/Branch_1/Conv2d_0b_1x3/Conv2D/kernel -> InceptionResnetV2/Repeat_2/block8_4/Branch_1/Conv2d_0b_1x3/weights\n",
+      "InceptionResnetV2/block8_4/Branch_1/Conv2d_0b_1x3/BatchNorm/beta -> InceptionResnetV2/Repeat_2/block8_4/Branch_1/Conv2d_0b_1x3/BatchNorm/beta\n",
+      "InceptionResnetV2/block8_4/Branch_1/Conv2d_0c_3x1/Conv2D/kernel -> InceptionResnetV2/Repeat_2/block8_4/Branch_1/Conv2d_0c_3x1/weights\n",
+      "InceptionResnetV2/block8_4/Branch_1/Conv2d_0c_3x1/BatchNorm/beta -> InceptionResnetV2/Repeat_2/block8_4/Branch_1/Conv2d_0c_3x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block8_4/Conv2d_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat_2/block8_4/Conv2d_1x1/weights\n",
+      "InceptionResnetV2/block8_4/Conv2d_1x1/Conv2D/bias -> InceptionResnetV2/Repeat_2/block8_4/Conv2d_1x1/biases\n",
+      "InceptionResnetV2/block8_4/Branch_0/Conv2d_1x1/BatchNorm/moving_mean -> InceptionResnetV2/Repeat_2/block8_4/Branch_0/Conv2d_1x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block8_4/Branch_0/Conv2d_1x1/BatchNorm/moving_variance -> InceptionResnetV2/Repeat_2/block8_4/Branch_0/Conv2d_1x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block8_4/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean -> InceptionResnetV2/Repeat_2/block8_4/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block8_4/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance -> InceptionResnetV2/Repeat_2/block8_4/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block8_4/Branch_1/Conv2d_0b_1x3/BatchNorm/moving_mean -> InceptionResnetV2/Repeat_2/block8_4/Branch_1/Conv2d_0b_1x3/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block8_4/Branch_1/Conv2d_0b_1x3/BatchNorm/moving_variance -> InceptionResnetV2/Repeat_2/block8_4/Branch_1/Conv2d_0b_1x3/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block8_4/Branch_1/Conv2d_0c_3x1/BatchNorm/moving_mean -> InceptionResnetV2/Repeat_2/block8_4/Branch_1/Conv2d_0c_3x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block8_4/Branch_1/Conv2d_0c_3x1/BatchNorm/moving_variance -> InceptionResnetV2/Repeat_2/block8_4/Branch_1/Conv2d_0c_3x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block8_5/Branch_0/Conv2d_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat_2/block8_5/Branch_0/Conv2d_1x1/weights\n",
+      "InceptionResnetV2/block8_5/Branch_0/Conv2d_1x1/BatchNorm/beta -> InceptionResnetV2/Repeat_2/block8_5/Branch_0/Conv2d_1x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block8_5/Branch_1/Conv2d_0a_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat_2/block8_5/Branch_1/Conv2d_0a_1x1/weights\n",
+      "InceptionResnetV2/block8_5/Branch_1/Conv2d_0a_1x1/BatchNorm/beta -> InceptionResnetV2/Repeat_2/block8_5/Branch_1/Conv2d_0a_1x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block8_5/Branch_1/Conv2d_0b_1x3/Conv2D/kernel -> InceptionResnetV2/Repeat_2/block8_5/Branch_1/Conv2d_0b_1x3/weights\n",
+      "InceptionResnetV2/block8_5/Branch_1/Conv2d_0b_1x3/BatchNorm/beta -> InceptionResnetV2/Repeat_2/block8_5/Branch_1/Conv2d_0b_1x3/BatchNorm/beta\n",
+      "InceptionResnetV2/block8_5/Branch_1/Conv2d_0c_3x1/Conv2D/kernel -> InceptionResnetV2/Repeat_2/block8_5/Branch_1/Conv2d_0c_3x1/weights\n",
+      "InceptionResnetV2/block8_5/Branch_1/Conv2d_0c_3x1/BatchNorm/beta -> InceptionResnetV2/Repeat_2/block8_5/Branch_1/Conv2d_0c_3x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block8_5/Conv2d_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat_2/block8_5/Conv2d_1x1/weights\n",
+      "InceptionResnetV2/block8_5/Conv2d_1x1/Conv2D/bias -> InceptionResnetV2/Repeat_2/block8_5/Conv2d_1x1/biases\n",
+      "InceptionResnetV2/block8_5/Branch_0/Conv2d_1x1/BatchNorm/moving_mean -> InceptionResnetV2/Repeat_2/block8_5/Branch_0/Conv2d_1x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block8_5/Branch_0/Conv2d_1x1/BatchNorm/moving_variance -> InceptionResnetV2/Repeat_2/block8_5/Branch_0/Conv2d_1x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block8_5/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean -> InceptionResnetV2/Repeat_2/block8_5/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block8_5/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance -> InceptionResnetV2/Repeat_2/block8_5/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block8_5/Branch_1/Conv2d_0b_1x3/BatchNorm/moving_mean -> InceptionResnetV2/Repeat_2/block8_5/Branch_1/Conv2d_0b_1x3/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block8_5/Branch_1/Conv2d_0b_1x3/BatchNorm/moving_variance -> InceptionResnetV2/Repeat_2/block8_5/Branch_1/Conv2d_0b_1x3/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block8_5/Branch_1/Conv2d_0c_3x1/BatchNorm/moving_mean -> InceptionResnetV2/Repeat_2/block8_5/Branch_1/Conv2d_0c_3x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block8_5/Branch_1/Conv2d_0c_3x1/BatchNorm/moving_variance -> InceptionResnetV2/Repeat_2/block8_5/Branch_1/Conv2d_0c_3x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block8_6/Branch_0/Conv2d_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat_2/block8_6/Branch_0/Conv2d_1x1/weights\n",
+      "InceptionResnetV2/block8_6/Branch_0/Conv2d_1x1/BatchNorm/beta -> InceptionResnetV2/Repeat_2/block8_6/Branch_0/Conv2d_1x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block8_6/Branch_1/Conv2d_0a_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat_2/block8_6/Branch_1/Conv2d_0a_1x1/weights\n",
+      "InceptionResnetV2/block8_6/Branch_1/Conv2d_0a_1x1/BatchNorm/beta -> InceptionResnetV2/Repeat_2/block8_6/Branch_1/Conv2d_0a_1x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block8_6/Branch_1/Conv2d_0b_1x3/Conv2D/kernel -> InceptionResnetV2/Repeat_2/block8_6/Branch_1/Conv2d_0b_1x3/weights\n",
+      "InceptionResnetV2/block8_6/Branch_1/Conv2d_0b_1x3/BatchNorm/beta -> InceptionResnetV2/Repeat_2/block8_6/Branch_1/Conv2d_0b_1x3/BatchNorm/beta\n",
+      "InceptionResnetV2/block8_6/Branch_1/Conv2d_0c_3x1/Conv2D/kernel -> InceptionResnetV2/Repeat_2/block8_6/Branch_1/Conv2d_0c_3x1/weights\n",
+      "InceptionResnetV2/block8_6/Branch_1/Conv2d_0c_3x1/BatchNorm/beta -> InceptionResnetV2/Repeat_2/block8_6/Branch_1/Conv2d_0c_3x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block8_6/Conv2d_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat_2/block8_6/Conv2d_1x1/weights\n",
+      "InceptionResnetV2/block8_6/Conv2d_1x1/Conv2D/bias -> InceptionResnetV2/Repeat_2/block8_6/Conv2d_1x1/biases\n",
+      "InceptionResnetV2/block8_6/Branch_0/Conv2d_1x1/BatchNorm/moving_mean -> InceptionResnetV2/Repeat_2/block8_6/Branch_0/Conv2d_1x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block8_6/Branch_0/Conv2d_1x1/BatchNorm/moving_variance -> InceptionResnetV2/Repeat_2/block8_6/Branch_0/Conv2d_1x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block8_6/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean -> InceptionResnetV2/Repeat_2/block8_6/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block8_6/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance -> InceptionResnetV2/Repeat_2/block8_6/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block8_6/Branch_1/Conv2d_0b_1x3/BatchNorm/moving_mean -> InceptionResnetV2/Repeat_2/block8_6/Branch_1/Conv2d_0b_1x3/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block8_6/Branch_1/Conv2d_0b_1x3/BatchNorm/moving_variance -> InceptionResnetV2/Repeat_2/block8_6/Branch_1/Conv2d_0b_1x3/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block8_6/Branch_1/Conv2d_0c_3x1/BatchNorm/moving_mean -> InceptionResnetV2/Repeat_2/block8_6/Branch_1/Conv2d_0c_3x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block8_6/Branch_1/Conv2d_0c_3x1/BatchNorm/moving_variance -> InceptionResnetV2/Repeat_2/block8_6/Branch_1/Conv2d_0c_3x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block8_7/Branch_0/Conv2d_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat_2/block8_7/Branch_0/Conv2d_1x1/weights\n",
+      "InceptionResnetV2/block8_7/Branch_0/Conv2d_1x1/BatchNorm/beta -> InceptionResnetV2/Repeat_2/block8_7/Branch_0/Conv2d_1x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block8_7/Branch_1/Conv2d_0a_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat_2/block8_7/Branch_1/Conv2d_0a_1x1/weights\n",
+      "InceptionResnetV2/block8_7/Branch_1/Conv2d_0a_1x1/BatchNorm/beta -> InceptionResnetV2/Repeat_2/block8_7/Branch_1/Conv2d_0a_1x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block8_7/Branch_1/Conv2d_0b_1x3/Conv2D/kernel -> InceptionResnetV2/Repeat_2/block8_7/Branch_1/Conv2d_0b_1x3/weights\n",
+      "InceptionResnetV2/block8_7/Branch_1/Conv2d_0b_1x3/BatchNorm/beta -> InceptionResnetV2/Repeat_2/block8_7/Branch_1/Conv2d_0b_1x3/BatchNorm/beta\n",
+      "InceptionResnetV2/block8_7/Branch_1/Conv2d_0c_3x1/Conv2D/kernel -> InceptionResnetV2/Repeat_2/block8_7/Branch_1/Conv2d_0c_3x1/weights\n",
+      "InceptionResnetV2/block8_7/Branch_1/Conv2d_0c_3x1/BatchNorm/beta -> InceptionResnetV2/Repeat_2/block8_7/Branch_1/Conv2d_0c_3x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block8_7/Conv2d_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat_2/block8_7/Conv2d_1x1/weights\n",
+      "InceptionResnetV2/block8_7/Conv2d_1x1/Conv2D/bias -> InceptionResnetV2/Repeat_2/block8_7/Conv2d_1x1/biases\n",
+      "InceptionResnetV2/block8_7/Branch_0/Conv2d_1x1/BatchNorm/moving_mean -> InceptionResnetV2/Repeat_2/block8_7/Branch_0/Conv2d_1x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block8_7/Branch_0/Conv2d_1x1/BatchNorm/moving_variance -> InceptionResnetV2/Repeat_2/block8_7/Branch_0/Conv2d_1x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block8_7/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean -> InceptionResnetV2/Repeat_2/block8_7/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block8_7/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance -> InceptionResnetV2/Repeat_2/block8_7/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block8_7/Branch_1/Conv2d_0b_1x3/BatchNorm/moving_mean -> InceptionResnetV2/Repeat_2/block8_7/Branch_1/Conv2d_0b_1x3/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block8_7/Branch_1/Conv2d_0b_1x3/BatchNorm/moving_variance -> InceptionResnetV2/Repeat_2/block8_7/Branch_1/Conv2d_0b_1x3/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block8_7/Branch_1/Conv2d_0c_3x1/BatchNorm/moving_mean -> InceptionResnetV2/Repeat_2/block8_7/Branch_1/Conv2d_0c_3x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block8_7/Branch_1/Conv2d_0c_3x1/BatchNorm/moving_variance -> InceptionResnetV2/Repeat_2/block8_7/Branch_1/Conv2d_0c_3x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block8_8/Branch_0/Conv2d_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat_2/block8_8/Branch_0/Conv2d_1x1/weights\n",
+      "InceptionResnetV2/block8_8/Branch_0/Conv2d_1x1/BatchNorm/beta -> InceptionResnetV2/Repeat_2/block8_8/Branch_0/Conv2d_1x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block8_8/Branch_1/Conv2d_0a_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat_2/block8_8/Branch_1/Conv2d_0a_1x1/weights\n",
+      "InceptionResnetV2/block8_8/Branch_1/Conv2d_0a_1x1/BatchNorm/beta -> InceptionResnetV2/Repeat_2/block8_8/Branch_1/Conv2d_0a_1x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block8_8/Branch_1/Conv2d_0b_1x3/Conv2D/kernel -> InceptionResnetV2/Repeat_2/block8_8/Branch_1/Conv2d_0b_1x3/weights\n",
+      "InceptionResnetV2/block8_8/Branch_1/Conv2d_0b_1x3/BatchNorm/beta -> InceptionResnetV2/Repeat_2/block8_8/Branch_1/Conv2d_0b_1x3/BatchNorm/beta\n",
+      "InceptionResnetV2/block8_8/Branch_1/Conv2d_0c_3x1/Conv2D/kernel -> InceptionResnetV2/Repeat_2/block8_8/Branch_1/Conv2d_0c_3x1/weights\n",
+      "InceptionResnetV2/block8_8/Branch_1/Conv2d_0c_3x1/BatchNorm/beta -> InceptionResnetV2/Repeat_2/block8_8/Branch_1/Conv2d_0c_3x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block8_8/Conv2d_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat_2/block8_8/Conv2d_1x1/weights\n",
+      "InceptionResnetV2/block8_8/Conv2d_1x1/Conv2D/bias -> InceptionResnetV2/Repeat_2/block8_8/Conv2d_1x1/biases\n",
+      "InceptionResnetV2/block8_8/Branch_0/Conv2d_1x1/BatchNorm/moving_mean -> InceptionResnetV2/Repeat_2/block8_8/Branch_0/Conv2d_1x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block8_8/Branch_0/Conv2d_1x1/BatchNorm/moving_variance -> InceptionResnetV2/Repeat_2/block8_8/Branch_0/Conv2d_1x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block8_8/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean -> InceptionResnetV2/Repeat_2/block8_8/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block8_8/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance -> InceptionResnetV2/Repeat_2/block8_8/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block8_8/Branch_1/Conv2d_0b_1x3/BatchNorm/moving_mean -> InceptionResnetV2/Repeat_2/block8_8/Branch_1/Conv2d_0b_1x3/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block8_8/Branch_1/Conv2d_0b_1x3/BatchNorm/moving_variance -> InceptionResnetV2/Repeat_2/block8_8/Branch_1/Conv2d_0b_1x3/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block8_8/Branch_1/Conv2d_0c_3x1/BatchNorm/moving_mean -> InceptionResnetV2/Repeat_2/block8_8/Branch_1/Conv2d_0c_3x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block8_8/Branch_1/Conv2d_0c_3x1/BatchNorm/moving_variance -> InceptionResnetV2/Repeat_2/block8_8/Branch_1/Conv2d_0c_3x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block8_9/Branch_0/Conv2d_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat_2/block8_9/Branch_0/Conv2d_1x1/weights\n",
+      "InceptionResnetV2/block8_9/Branch_0/Conv2d_1x1/BatchNorm/beta -> InceptionResnetV2/Repeat_2/block8_9/Branch_0/Conv2d_1x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block8_9/Branch_1/Conv2d_0a_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat_2/block8_9/Branch_1/Conv2d_0a_1x1/weights\n",
+      "InceptionResnetV2/block8_9/Branch_1/Conv2d_0a_1x1/BatchNorm/beta -> InceptionResnetV2/Repeat_2/block8_9/Branch_1/Conv2d_0a_1x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block8_9/Branch_1/Conv2d_0b_1x3/Conv2D/kernel -> InceptionResnetV2/Repeat_2/block8_9/Branch_1/Conv2d_0b_1x3/weights\n",
+      "InceptionResnetV2/block8_9/Branch_1/Conv2d_0b_1x3/BatchNorm/beta -> InceptionResnetV2/Repeat_2/block8_9/Branch_1/Conv2d_0b_1x3/BatchNorm/beta\n",
+      "InceptionResnetV2/block8_9/Branch_1/Conv2d_0c_3x1/Conv2D/kernel -> InceptionResnetV2/Repeat_2/block8_9/Branch_1/Conv2d_0c_3x1/weights\n",
+      "InceptionResnetV2/block8_9/Branch_1/Conv2d_0c_3x1/BatchNorm/beta -> InceptionResnetV2/Repeat_2/block8_9/Branch_1/Conv2d_0c_3x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block8_9/Conv2d_1x1/Conv2D/kernel -> InceptionResnetV2/Repeat_2/block8_9/Conv2d_1x1/weights\n",
+      "InceptionResnetV2/block8_9/Conv2d_1x1/Conv2D/bias -> InceptionResnetV2/Repeat_2/block8_9/Conv2d_1x1/biases\n",
+      "InceptionResnetV2/block8_9/Branch_0/Conv2d_1x1/BatchNorm/moving_mean -> InceptionResnetV2/Repeat_2/block8_9/Branch_0/Conv2d_1x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block8_9/Branch_0/Conv2d_1x1/BatchNorm/moving_variance -> InceptionResnetV2/Repeat_2/block8_9/Branch_0/Conv2d_1x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block8_9/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean -> InceptionResnetV2/Repeat_2/block8_9/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block8_9/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance -> InceptionResnetV2/Repeat_2/block8_9/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block8_9/Branch_1/Conv2d_0b_1x3/BatchNorm/moving_mean -> InceptionResnetV2/Repeat_2/block8_9/Branch_1/Conv2d_0b_1x3/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block8_9/Branch_1/Conv2d_0b_1x3/BatchNorm/moving_variance -> InceptionResnetV2/Repeat_2/block8_9/Branch_1/Conv2d_0b_1x3/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block8_9/Branch_1/Conv2d_0c_3x1/BatchNorm/moving_mean -> InceptionResnetV2/Repeat_2/block8_9/Branch_1/Conv2d_0c_3x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block8_9/Branch_1/Conv2d_0c_3x1/BatchNorm/moving_variance -> InceptionResnetV2/Repeat_2/block8_9/Branch_1/Conv2d_0c_3x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block8_10/Branch_0/Conv2d_1x1/Conv2D/kernel -> InceptionResnetV2/Block8/Branch_0/Conv2d_1x1/weights\n",
+      "InceptionResnetV2/block8_10/Branch_0/Conv2d_1x1/BatchNorm/beta -> InceptionResnetV2/Block8/Branch_0/Conv2d_1x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block8_10/Branch_1/Conv2d_0a_1x1/Conv2D/kernel -> InceptionResnetV2/Block8/Branch_1/Conv2d_0a_1x1/weights\n",
+      "InceptionResnetV2/block8_10/Branch_1/Conv2d_0a_1x1/BatchNorm/beta -> InceptionResnetV2/Block8/Branch_1/Conv2d_0a_1x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block8_10/Branch_1/Conv2d_0b_1x3/Conv2D/kernel -> InceptionResnetV2/Block8/Branch_1/Conv2d_0b_1x3/weights\n",
+      "InceptionResnetV2/block8_10/Branch_1/Conv2d_0b_1x3/BatchNorm/beta -> InceptionResnetV2/Block8/Branch_1/Conv2d_0b_1x3/BatchNorm/beta\n",
+      "InceptionResnetV2/block8_10/Branch_1/Conv2d_0c_3x1/Conv2D/kernel -> InceptionResnetV2/Block8/Branch_1/Conv2d_0c_3x1/weights\n",
+      "InceptionResnetV2/block8_10/Branch_1/Conv2d_0c_3x1/BatchNorm/beta -> InceptionResnetV2/Block8/Branch_1/Conv2d_0c_3x1/BatchNorm/beta\n",
+      "InceptionResnetV2/block8_10/Conv2d_1x1/Conv2D/kernel -> InceptionResnetV2/Block8/Conv2d_1x1/weights\n",
+      "InceptionResnetV2/block8_10/Conv2d_1x1/Conv2D/bias -> InceptionResnetV2/Block8/Conv2d_1x1/biases\n",
+      "InceptionResnetV2/block8_10/Branch_0/Conv2d_1x1/BatchNorm/moving_mean -> InceptionResnetV2/Block8/Branch_0/Conv2d_1x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block8_10/Branch_0/Conv2d_1x1/BatchNorm/moving_variance -> InceptionResnetV2/Block8/Branch_0/Conv2d_1x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block8_10/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean -> InceptionResnetV2/Block8/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block8_10/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance -> InceptionResnetV2/Block8/Branch_1/Conv2d_0a_1x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block8_10/Branch_1/Conv2d_0b_1x3/BatchNorm/moving_mean -> InceptionResnetV2/Block8/Branch_1/Conv2d_0b_1x3/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block8_10/Branch_1/Conv2d_0b_1x3/BatchNorm/moving_variance -> InceptionResnetV2/Block8/Branch_1/Conv2d_0b_1x3/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/block8_10/Branch_1/Conv2d_0c_3x1/BatchNorm/moving_mean -> InceptionResnetV2/Block8/Branch_1/Conv2d_0c_3x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/block8_10/Branch_1/Conv2d_0c_3x1/BatchNorm/moving_variance -> InceptionResnetV2/Block8/Branch_1/Conv2d_0c_3x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/Conv2d_7b_1x1/Conv2D/kernel -> InceptionResnetV2/Conv2d_7b_1x1/weights\n",
+      "InceptionResnetV2/Conv2d_7b_1x1/BatchNorm/beta -> InceptionResnetV2/Conv2d_7b_1x1/BatchNorm/beta\n",
+      "InceptionResnetV2/Conv2d_7b_1x1/BatchNorm/moving_mean -> InceptionResnetV2/Conv2d_7b_1x1/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/Conv2d_7b_1x1/BatchNorm/moving_variance -> InceptionResnetV2/Conv2d_7b_1x1/BatchNorm/moving_variance\n",
+      "InceptionResnetV2/Bottleneck/kernel -> InceptionResnetV2/Bottleneck/weights\n",
+      "InceptionResnetV2/Bottleneck/BatchNorm/beta -> InceptionResnetV2/Bottleneck/BatchNorm/beta\n",
+      "InceptionResnetV2/Bottleneck/BatchNorm/moving_mean -> InceptionResnetV2/Bottleneck/BatchNorm/moving_mean\n",
+      "InceptionResnetV2/Bottleneck/BatchNorm/moving_variance -> InceptionResnetV2/Bottleneck/BatchNorm/moving_variance\n",
+      "WARNING:tensorflow:Saver is deprecated, please switch to tf.train.Checkpoint or tf.keras.Model.save_weights for training checkpoints. When executing eagerly variables do not necessarily have unique names, and so the variable.name-based lookups Saver performs are error-prone.\n",
+      "INFO:tensorflow:Restoring parameters from /idiap/home/amohammadi/scratch/tmp/msceleb/inception-v2_batchnorm_rgb/model.ckpt-444274\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Create a function that converts variable names of the model to checkpoint names\n",
+    "# If the mapped variable names are not found in the checkpoint, an error is raised\n",
+    "# To make this job easier, it's best to change the variables in the Keras model to\n",
+    "# match the names in the checkpoint and then do minor modifications here.\n",
+    "def normalizer(name):\n",
+    "    old_name = name = name.split(\":\")[0]\n",
+    "\n",
+    "    # name differences between slim layer names and keras layer names\n",
+    "    name = name.replace(\"/kernel\", \"/weights\")\n",
+    "    name = name.replace(\"/bias\", \"/biases\")\n",
+    "\n",
+    "    # for Conv2D layers\n",
+    "    name = name.replace(\"Conv2D/weights\", \"weights\")\n",
+    "    name = name.replace(\"Conv2D/biases\", \"biases\")\n",
+    "\n",
+    "    # name differences between our architecture and the checkpoint\n",
+    "    name = name.replace(\"block35\", \"Repeat/block35\")\n",
+    "    name = name.replace(\"block17\", \"Repeat_1/block17\")\n",
+    "    name = name.replace(\"block8\", \"Repeat_2/block8\")\n",
+    "    name = name.replace(\"Repeat_2/block8_10\", \"Block8\")\n",
+    "\n",
+    "    print(f\"{old_name} -> {name}\")\n",
+    "    return name\n",
+    "\n",
+    "\n",
+    "restore_model_variables_from_checkpoint(model, checkpoint, normalizer=normalizer)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "WARNING:tensorflow:From /idiap/temp/amohammadi/conda/envs/tf2/lib/python3.7/site-packages/tensorflow/python/training/tracking/tracking.py:111: Model.state_updates (from tensorflow.python.keras.engine.training) is deprecated and will be removed in a future version.\n",
+      "Instructions for updating:\n",
+      "This property should not be used in TensorFlow 2.0, as updates are applied automatically.\n",
+      "WARNING:tensorflow:From /idiap/temp/amohammadi/conda/envs/tf2/lib/python3.7/site-packages/tensorflow/python/training/tracking/tracking.py:111: Layer.updates (from tensorflow.python.keras.engine.base_layer) is deprecated and will be removed in a future version.\n",
+      "Instructions for updating:\n",
+      "This property should not be used in TensorFlow 2.0, as updates are applied automatically.\n",
+      "INFO:tensorflow:Assets written to: /idiap/home/amohammadi/scratch/tmp/msceleb/inception-v2_batchnorm_rgb_v2_format/assets\n"
+     ]
+    }
+   ],
+   "source": [
+    "# now the save the model in tensorflow 2 format\n",
+    "model.save(f\"{checkpoint}_v2_format\", save_format=\"tf\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# load the old model with a conda installation of old tensorflow\n",
+    "# create a random input, give it to the network, and save both input and output\n",
+    "# the code below is an exmple, but this cell must run in another notebook with an\n",
+    "# old bob (like bob 7) installation\n",
+    "assert (\n",
+    "    False\n",
+    "), \"run the code in this cell in another notebook with a Bob 7 conda environment, do `conda create -n bob7 -c <bob-conda-channel> bob=7 bob.learn.tensorflow`\"\n",
+    "\n",
+    "import numpy as np\n",
+    "import tensorflow as tf\n",
+    "from bob.learn.tensorflow.network import inception_resnet_v2_batch_norm\n",
+    "\n",
+    "input_shape = (1, 160, 160, 3)\n",
+    "input_tensor = tf.placeholder(tf.float32, shape=input_shape)\n",
+    "\n",
+    "np.random.seed(10)\n",
+    "data = np.random.rand(1, 160, 160, 3).astype(\"float32\")\n",
+    "np.save(\"input.npy\", data)\n",
+    "\n",
+    "for mode, name in [\n",
+    "    (tf.estimator.ModeKeys.TRAIN, \"train_mode\"),\n",
+    "    (tf.estimator.ModeKeys.PREDICT, \"predict_mode\"),\n",
+    "]:\n",
+    "    prelogits = inception_resnet_v2_batch_norm(input_tensor, mode=mode)[0]\n",
+    "    checkpoint = \"/tmp/msceleb/inception-v2_batchnorm_rgb\"\n",
+    "    session = tf.compat.v1.Session()\n",
+    "    session.run(tf.compat.v1.global_variables_initializer())\n",
+    "    saver = tf.compat.v1.train.Saver()\n",
+    "    saver.restore(session, tf.train.latest_checkpoint(checkpoint))\n",
+    "    output = session.run(prelogits, feed_dict={input_tensor: data})\n",
+    "    np.save(f\"output_{name}.npy\", output)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "((1, 160, 160, 3), dtype('float32'))"
+      ]
+     },
+     "execution_count": 14,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# now test the input on our new model and see if it matches the old output\n",
+    "import numpy as np\n",
+    "\n",
+    "data = np.load(\"input.npy\")\n",
+    "ref_predict = np.load(\"output_predict_mode.npy\")\n",
+    "ref_train = np.load(\"output_train_mode.npy\")\n",
+    "data.shape, data.dtype"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([[ 1.0397079 ,  0.9887184 , -1.0198569 , -0.715245  ,  1.2795925 ,\n",
+       "        -0.87935054,  0.78027606, -0.72593015, -2.5908837 ,  0.12451646,\n",
+       "        -0.902159  , -0.47172827, -0.18754381,  1.522578  , -2.1243768 ,\n",
+       "        -0.01743102, -0.7484883 ,  0.99135643, -0.20708205, -0.21483701,\n",
+       "         0.02057505,  1.129887  ,  1.3154736 , -0.6671123 ,  0.49072063,\n",
+       "        -1.5672749 , -0.89510536,  0.7542453 ,  0.9450621 , -1.4345936 ,\n",
+       "         0.7585461 , -0.07696939, -2.1004128 , -0.49109945, -1.1812558 ,\n",
+       "        -1.1648568 , -0.87621725,  0.8581683 ,  0.7901715 , -1.0273908 ,\n",
+       "         0.7262468 ,  0.9869791 , -2.0192435 ,  1.1500988 ,  1.0887905 ,\n",
+       "         2.6515627 ,  0.00478584, -1.6734318 , -0.01522779, -0.7636968 ,\n",
+       "         2.0314407 , -0.7928898 ,  0.45450714, -2.1267045 , -0.4153372 ,\n",
+       "         0.62672305,  0.10927075,  0.41156793, -1.0595654 ,  0.3506987 ,\n",
+       "         0.20369251,  0.39730582,  1.7632729 ,  0.57014084,  0.08623758,\n",
+       "         0.5078342 , -0.28096065,  1.9077072 , -1.2491908 ,  0.7497569 ,\n",
+       "        -0.8506024 ,  0.88291407, -0.69780695,  0.9216435 , -0.16078427,\n",
+       "         2.8368008 ,  1.3023876 ,  2.0996416 ,  0.37888718,  0.0656991 ,\n",
+       "        -0.9813288 , -0.3329994 ,  0.7650786 , -1.2162722 ,  2.029986  ,\n",
+       "         1.4763666 ,  1.0834064 , -0.5205603 , -0.7379463 , -2.6376038 ,\n",
+       "         0.8725066 , -0.28173503,  0.98598987, -1.1632819 ,  0.01457607,\n",
+       "         0.02376672, -0.19675732,  0.83125085,  1.4034328 , -0.32809442,\n",
+       "         2.0181274 , -1.9440424 ,  0.30915022, -0.3909355 ,  2.0706754 ,\n",
+       "        -1.5847499 ,  1.0432297 , -1.2549043 ,  0.00503191, -0.4830594 ,\n",
+       "        -1.728585  , -0.97627854, -0.05380236,  0.25582924, -0.49854404,\n",
+       "        -0.41763973,  1.4456007 , -0.29970488, -0.21606404, -0.14868468,\n",
+       "        -0.19190133, -0.8587687 , -1.212713  ,  0.9776963 ,  0.6765988 ,\n",
+       "        -1.5350266 ,  2.462872  ,  0.01092067]], dtype=float32)"
+      ]
+     },
+     "execution_count": 18,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "output_predict = model(data, training=False).numpy()\n",
+    "output_predict"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "AssertionError",
+     "evalue": "\nNot equal to tolerance rtol=1e-05, atol=0\n\nMismatched elements: 12 / 128 (9.38%)\nMax absolute difference: 1.66893e-06\nMax relative difference: 9.847395e-05\n x: array([[ 1.039708,  0.988718, -1.019857, -0.715245,  1.279593, -0.879351,\n         0.780276, -0.72593 , -2.590884,  0.124516, -0.902159, -0.471728,\n        -0.187544,  1.522578, -2.124377, -0.017431, -0.748488,  0.991356,...\n y: array([[ 1.039708,  0.988719, -1.019857, -0.715245,  1.279593, -0.879351,\n         0.780276, -0.725931, -2.590884,  0.124517, -0.902159, -0.471728,\n        -0.187544,  1.522577, -2.124377, -0.017431, -0.748489,  0.991358,...",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mAssertionError\u001b[0m                            Traceback (most recent call last)",
+      "\u001b[0;32m<ipython-input-16-a3b015d911f9>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtesting\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0massert_allclose\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0moutput_predict\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mref_predict\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrtol\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m1e-05\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
+      "    \u001b[0;31m[... skipping hidden 2 frame]\u001b[0m\n",
+      "\u001b[0;31mAssertionError\u001b[0m: \nNot equal to tolerance rtol=1e-05, atol=0\n\nMismatched elements: 12 / 128 (9.38%)\nMax absolute difference: 1.66893e-06\nMax relative difference: 9.847395e-05\n x: array([[ 1.039708,  0.988718, -1.019857, -0.715245,  1.279593, -0.879351,\n         0.780276, -0.72593 , -2.590884,  0.124516, -0.902159, -0.471728,\n        -0.187544,  1.522578, -2.124377, -0.017431, -0.748488,  0.991356,...\n y: array([[ 1.039708,  0.988719, -1.019857, -0.715245,  1.279593, -0.879351,\n         0.780276, -0.725931, -2.590884,  0.124517, -0.902159, -0.471728,\n        -0.187544,  1.522577, -2.124377, -0.017431, -0.748489,  0.991358,..."
+     ]
+    }
+   ],
+   "source": [
+    "# they are close enough\n",
+    "np.testing.assert_allclose(output_predict, ref_predict, rtol=1e-05)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# they are close enough\n",
+    "np.testing.assert_allclose(output_predict, ref_predict, rtol=1e-04)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([[ 1.2059948 ,  0.90457535, -1.6966419 , -0.6469312 ,  1.8390388 ,\n",
+       "        -0.82886887,  0.20409298, -1.2975912 , -2.2515354 ,  0.16945124,\n",
+       "        -0.6430161 , -0.7341013 , -0.37252808,  1.208333  , -2.1846251 ,\n",
+       "         0.5799656 , -0.9761491 ,  0.5671108 , -0.46018916, -1.0497742 ,\n",
+       "         0.36604214,  1.0983484 ,  1.6091515 , -0.4587916 ,  0.49420023,\n",
+       "        -1.3356848 , -0.7289448 ,  0.98772526,  1.3689709 , -1.3146491 ,\n",
+       "         0.69885445, -0.82313013, -1.8048697 , -0.0885191 , -0.95188236,\n",
+       "        -1.2162575 , -0.20026398,  0.84856796,  0.2631035 , -1.0577492 ,\n",
+       "         0.42345095,  1.2333517 , -1.5610876 , -0.29854918,  0.58075297,\n",
+       "         0.9959574 ,  0.18556404, -1.828722  , -0.43610954, -0.6100631 ,\n",
+       "         1.5355668 , -0.88698196, -0.24892426, -1.5401478 , -0.20694387,\n",
+       "        -0.43096346,  0.34370852,  0.42531586, -1.1192467 , -0.26605558,\n",
+       "         0.32238102, -0.09467888,  1.9468937 ,  0.1408875 ,  0.33964324,\n",
+       "         0.47477245, -0.40066433,  1.738759  , -1.5919895 ,  0.4351349 ,\n",
+       "        -0.6090865 ,  0.8377094 ,  0.55933   ,  0.1080389 , -0.04150879,\n",
+       "         2.6030731 , -0.07762146,  2.0182095 ,  0.5724497 ,  0.10365391,\n",
+       "        -0.935689  , -0.66622305,  0.71880484, -1.1720471 ,  2.35266   ,\n",
+       "         1.5547676 ,  1.0567551 , -0.5162802 , -0.3426876 , -2.376378  ,\n",
+       "         1.2193823 , -0.03678417,  1.1837311 , -0.3259096 , -0.33225632,\n",
+       "        -0.13122219, -0.68957293,  0.6326618 ,  1.4436685 , -1.070931  ,\n",
+       "         1.0629826 , -1.3427727 , -0.4593153 , -0.82589483,  1.7713051 ,\n",
+       "        -1.6192143 ,  1.014153  , -0.51861286, -0.08098346, -0.05481911,\n",
+       "        -1.7958102 , -0.27956295,  0.4140396 ,  0.53047895, -0.76364136,\n",
+       "         0.07553291,  1.597281  , -0.04399967, -0.49591756,  0.4441948 ,\n",
+       "         0.20369053,  0.01039219, -0.74036837,  0.8180666 ,  0.835022  ,\n",
+       "        -1.2091255 ,  1.5800235 , -0.41675472]], dtype=float32)"
+      ]
+     },
+     "execution_count": 20,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# test in train mode\n",
+    "output_train = model(data, training=True).numpy()\n",
+    "output_train"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "AssertionError",
+     "evalue": "\nNot equal to tolerance rtol=1e-05, atol=0\n\nMismatched elements: 1 / 128 (0.781%)\nMax absolute difference: 7.748604e-07\nMax relative difference: 4.5886292e-05\n x: array([[ 1.205995,  0.904575, -1.696642, -0.646931,  1.839039, -0.828869,\n         0.204093, -1.297591, -2.251535,  0.169451, -0.643016, -0.734101,\n        -0.372528,  1.208333, -2.184625,  0.579966, -0.976149,  0.567111,...\n y: array([[ 1.205995,  0.904575, -1.696642, -0.646931,  1.839038, -0.828869,\n         0.204093, -1.297591, -2.251535,  0.169451, -0.643016, -0.734101,\n        -0.372528,  1.208333, -2.184625,  0.579965, -0.976149,  0.567111,...",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mAssertionError\u001b[0m                            Traceback (most recent call last)",
+      "\u001b[0;32m<ipython-input-21-e2ac78cb0d24>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0;31m# they are close enough\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtesting\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0massert_allclose\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0moutput_train\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mref_train\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrtol\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m1e-05\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
+      "    \u001b[0;31m[... skipping hidden 2 frame]\u001b[0m\n",
+      "\u001b[0;31mAssertionError\u001b[0m: \nNot equal to tolerance rtol=1e-05, atol=0\n\nMismatched elements: 1 / 128 (0.781%)\nMax absolute difference: 7.748604e-07\nMax relative difference: 4.5886292e-05\n x: array([[ 1.205995,  0.904575, -1.696642, -0.646931,  1.839039, -0.828869,\n         0.204093, -1.297591, -2.251535,  0.169451, -0.643016, -0.734101,\n        -0.372528,  1.208333, -2.184625,  0.579966, -0.976149,  0.567111,...\n y: array([[ 1.205995,  0.904575, -1.696642, -0.646931,  1.839038, -0.828869,\n         0.204093, -1.297591, -2.251535,  0.169451, -0.643016, -0.734101,\n        -0.372528,  1.208333, -2.184625,  0.579965, -0.976149,  0.567111,..."
+     ]
+    }
+   ],
+   "source": [
+    "# they are close enough\n",
+    "np.testing.assert_allclose(output_train, ref_train, rtol=1e-05)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# they are close enough\n",
+    "np.testing.assert_allclose(output_train, ref_train, rtol=1e-04)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python [conda env:tf2]",
+   "language": "python",
+   "name": "conda-env-tf2-py"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/requirements.txt b/requirements.txt
index 08102dedde30eea810e0e99d7f175f81392c4f05..d4073f98022ae3c0d249fe10c5072dca5c20739d 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,22 +1,8 @@
 setuptools
-docopt
-bob.blitz
-bob.core
-bob.db.base
 bob.extension
 bob.io.base
 bob.io.image
-bob.learn.activation
-bob.learn.em
-bob.learn.linear
-bob.ip.base
-bob.math
 bob.measure
-bob.sp
-bob.db.mnist
-bob.db.atnt
-gridtk
-bob.bio.base
 numpy
-scipy
 click >= 7
+scipy
\ No newline at end of file
diff --git a/setup.py b/setup.py
index 2b1997fbdcb0bf7d8bbe19fa0280a3f20d01f0e1..a2ce901099acc2d2c5f608b0f317b5bf22a6d17f 100644
--- a/setup.py
+++ b/setup.py
@@ -3,36 +3,35 @@
 # Andre Anjos <andre.anjos@idiap.ch>
 # Mon 16 Apr 08:18:08 2012 CEST
 
+from setuptools import dist
 from setuptools import setup
 
-from setuptools import setup, dist
-dist.Distribution(dict(setup_requires=['bob.extension']))
+from bob.extension.utils import find_packages
+from bob.extension.utils import load_requirements
+
+dist.Distribution(dict(setup_requires=["bob.extension"]))
+
 
-from bob.extension.utils import load_requirements, find_packages
 install_requires = load_requirements()
 
 # The only thing we do in this file is to call the setup() function with all
 # parameters that define our package.
 setup(
-
     # This is the basic information about your project. Modify all this
     # information before releasing code publicly.
-    name='bob.learn.tensorflow',
+    name="bob.learn.tensorflow",
     version=open("version.txt").read().rstrip(),
-    description='Bob bindings for tensorflow',
-    url='',
-    license='BSD',
-    author='Tiago de Freitas Pereira',
-    author_email='tiago.pereira@idiap.ch',
-    keywords='tensorflow',
-
+    description="Bob bindings for tensorflow",
+    url="",
+    license="BSD",
+    author="Tiago de Freitas Pereira",
+    author_email="tiago.pereira@idiap.ch",
+    keywords="tensorflow",
     # If you have a better, long description of your package, place it on the
     # 'doc' directory and then hook it here
-    long_description=open('README.rst').read(),
-
+    long_description=open("README.rst").read(),
     # This line is required for any distutils based packaging.
     include_package_data=True,
-
     # This line defines which packages should be installed when you "install"
     # this package. All packages that are mentioned here, but are not installed
     # on the current system will be installed locally and only visible to the
@@ -42,46 +41,43 @@ setup(
     packages=find_packages(),
     zip_safe=False,
     entry_points={
-
         # main entry for bob tf cli
-        'bob.cli': [
-            'tf = bob.learn.tensorflow.script.tf:tf',
-            'keras = bob.learn.tensorflow.script.keras:keras',
+        "bob.cli": [
+            "tf = bob.learn.tensorflow.script.tf:tf",
+            "keras = bob.learn.tensorflow.script.keras:keras",
         ],
-
         # bob tf scripts
-        'bob.learn.tensorflow.cli': [
-            'cache-dataset = bob.learn.tensorflow.script.cache_dataset:cache_dataset',
-            'compute-statistics = bob.learn.tensorflow.script.compute_statistics:compute_statistics',
-            'dataset-to-hdf5 = bob.learn.tensorflow.script.db_to_tfrecords:dataset_to_hdf5',
-            'datasets-to-tfrecords = bob.learn.tensorflow.script.db_to_tfrecords:datasets_to_tfrecords',
-            'db-to-tfrecords = bob.learn.tensorflow.script.db_to_tfrecords:db_to_tfrecords',
-            'describe-tfrecord = bob.learn.tensorflow.script.db_to_tfrecords:describe_tfrecord',
-            'distance-matrix = bob.learn.tensorflow.script.cgm:distance_matrix',
-            'eval = bob.learn.tensorflow.script.eval:eval',
-            'predict = bob.learn.tensorflow.script.predict_bio:predict',
-            'predict-bio = bob.learn.tensorflow.script.predict_bio:predict_bio',
-            'style-transfer = bob.learn.tensorflow.script.style_transfer:style_transfer',
-            'train = bob.learn.tensorflow.script.train:train',
-            'train-and-evaluate = bob.learn.tensorflow.script.train_and_evaluate:train_and_evaluate',
-            'trim = bob.learn.tensorflow.script.trim:trim',
+        "bob.learn.tensorflow.cli": [
+            "cache-dataset = bob.learn.tensorflow.script.cache_dataset:cache_dataset",
+            "compute-statistics = bob.learn.tensorflow.script.compute_statistics:compute_statistics",
+            "dataset-to-hdf5 = bob.learn.tensorflow.script.db_to_tfrecords:dataset_to_hdf5",
+            "datasets-to-tfrecords = bob.learn.tensorflow.script.db_to_tfrecords:datasets_to_tfrecords",
+            "db-to-tfrecords = bob.learn.tensorflow.script.db_to_tfrecords:db_to_tfrecords",
+            "describe-tfrecord = bob.learn.tensorflow.script.db_to_tfrecords:describe_tfrecord",
+            "distance-matrix = bob.learn.tensorflow.script.cgm:distance_matrix",
+            "eval = bob.learn.tensorflow.script.eval:eval",
+            "predict = bob.learn.tensorflow.script.predict_bio:predict",
+            "predict-bio = bob.learn.tensorflow.script.predict_bio:predict_bio",
+            "style-transfer = bob.learn.tensorflow.script.style_transfer:style_transfer",
+            "train = bob.learn.tensorflow.script.train:train",
+            "train-and-evaluate = bob.learn.tensorflow.script.train_and_evaluate:train_and_evaluate",
+            "trim = bob.learn.tensorflow.script.trim:trim",
         ],
         # bob keras scripts
-        'bob.learn.tensorflow.keras_cli': [
-            'fit = bob.learn.tensorflow.script.fit:fit',
+        "bob.learn.tensorflow.keras_cli": [
+            "fit = bob.learn.tensorflow.script.fit:fit",
         ],
     },
-
     # Classifiers are important if you plan to distribute this package through
     # PyPI. You can find the complete list of classifiers that are valid and
     # useful here (http://pypi.python.org/pypi?%3Aaction=list_classifiers).
     classifiers=[
-        'Framework :: Bob',
-        'Development Status :: 3 - Alpha',
-        'Intended Audience :: Developers',
-        'License :: OSI Approved :: BSD License',
-        'Natural Language :: English',
-        'Programming Language :: Python',
-        'Topic :: Scientific/Engineering :: Artificial Intelligence',
+        "Framework :: Bob",
+        "Development Status :: 3 - Alpha",
+        "Intended Audience :: Developers",
+        "License :: OSI Approved :: BSD License",
+        "Natural Language :: English",
+        "Programming Language :: Python",
+        "Topic :: Scientific/Engineering :: Artificial Intelligence",
     ],
 )