From 8f81468ccb0db59f1b277efb041e51ad043647f4 Mon Sep 17 00:00:00 2001
From: Tiago Freitas Pereira <tiagofrepereira@gmail.com>
Date: Tue, 27 Apr 2021 08:44:46 +0200
Subject: [PATCH] Ported IJBC database

---
 cnn_training/msceleb_to_tfrecord.py           | 291 +++++++++++++
 cnn_training/vgg2_2_tfrecords.py              | 189 ++++++++
 cnn_training/vgg2_2_tfrecords_loose.py        | 209 +++++++++
 cnn_training/webface360_to_tfrecord.py        | 190 ++++++++
 .../._centerloss_mixed_precision.py           | Bin 0 -> 4096 bytes
 cnn_training_cpy/arcface.py                   | 409 ++++++++++++++++++
 cnn_training_cpy/centerloss.py                | 251 +++++++++++
 .../centerloss_mixed_precision.py             | 279 ++++++++++++
 .../facecrop-checkpoint.ipynb                 |  70 +++
 .../.ipynb_checkpoints/facecrop-checkpoint.py |  54 +++
 doc/faq/facecrop.ipynb                        | 109 +++++
 11 files changed, 2051 insertions(+)
 create mode 100644 cnn_training/msceleb_to_tfrecord.py
 create mode 100644 cnn_training/vgg2_2_tfrecords.py
 create mode 100644 cnn_training/vgg2_2_tfrecords_loose.py
 create mode 100644 cnn_training/webface360_to_tfrecord.py
 create mode 100644 cnn_training_cpy/._centerloss_mixed_precision.py
 create mode 100644 cnn_training_cpy/arcface.py
 create mode 100644 cnn_training_cpy/centerloss.py
 create mode 100644 cnn_training_cpy/centerloss_mixed_precision.py
 create mode 100644 doc/faq/.ipynb_checkpoints/facecrop-checkpoint.ipynb
 create mode 100644 doc/faq/.ipynb_checkpoints/facecrop-checkpoint.py
 create mode 100644 doc/faq/facecrop.ipynb

diff --git a/cnn_training/msceleb_to_tfrecord.py b/cnn_training/msceleb_to_tfrecord.py
new file mode 100644
index 00000000..7138d7cc
--- /dev/null
+++ b/cnn_training/msceleb_to_tfrecord.py
@@ -0,0 +1,291 @@
+#!/usr/bin/env python
+# coding: utf-8
+
+"""
+Converts the MSCeleb annotated version to TF RECORD
+
+Usage:
+    msceleb_to_tfrecord.py <msceleb-path> <output-path> [--keys=<arg> --image-size=<arg> --use-eyes]
+    msceleb_to_tfrecord.py -h | --help
+
+Options:
+  -h --help             Show this screen.    
+  --keys=<arg>          Pickle with the keys
+  --image-size=<arg>    Final image size [default: 126]  
+  --use-eyes            Use eyes annotations. If not set, it will use the face crop only
+
+"""
+
+
+from docopt import docopt
+import numpy as np
+import os
+import bob.io.image
+import bob.io.base
+import tensorflow as tf
+import sys
+from datetime import datetime
+import pickle
+import numpy
+from bob.bio.face.preprocessor import FaceCrop
+
+
+def _bytes_feature(value):
+    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
+
+
+def _int64_feature(value):
+    return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
+
+
+def detect_mtcnn_margin_face_crop(annotations, image, margin=44, final_size=126):
+    """
+    Face crop using bounding box
+    """
+
+    annotations["topleft"] = [annotations["topleft"][0], annotations["topleft"][1]]
+    annotations["bottomright"] = [
+        annotations["bottomright"][0],
+        annotations["bottomright"][1],
+    ]
+
+    annotations["topleft"][0] = numpy.uint(
+        numpy.maximum(annotations["topleft"][0] - margin / 2, 0)
+    )
+    annotations["topleft"][1] = numpy.uint(
+        numpy.maximum(annotations["topleft"][1] - margin / 2, 0)
+    )
+
+    annotations["bottomright"][0] = numpy.uint(
+        numpy.minimum(annotations["bottomright"][0] + margin / 2, image.shape[1])
+    )
+    annotations["bottomright"][1] = numpy.uint(
+        numpy.minimum(annotations["bottomright"][1] + margin / 2, image.shape[2])
+    )
+
+    cropped_positions = {"topleft": (0, 0), "bottomright": (final_size, final_size)}
+    cropper = FaceCrop(
+        cropped_image_size=(final_size, final_size),
+        cropped_positions=cropped_positions,
+        color_channel="rgb",
+        fixed_positions=None,
+        annotator=None,
+    )
+
+    detected = cropper.transform([image], [annotations])[0]
+
+    return detected.astype("uint8")
+
+
+def detect_mtcnn_margin_eyes(annotations, image, margin=44, final_size=126):
+
+    # final image position w.r.t the image size
+    RIGHT_EYE_POS = (final_size / 3.44, final_size / 3.02)
+    LEFT_EYE_POS = (final_size / 3.44, final_size / 1.49)
+
+    # RIGHT_EYE_POS = (final_size / 3.34,
+    #                 final_size / 3.02)
+    # LEFT_EYE_POS = (final_size / 3.44,
+    #                final_size / 1.59)
+
+    cropped_positions = {"leye": LEFT_EYE_POS, "reye": RIGHT_EYE_POS}
+
+    cropper = FaceCrop(
+        cropped_image_size=(final_size, final_size),
+        cropped_positions=cropped_positions,
+        color_channel="rgb",
+        fixed_positions=None,
+        annotator=None,
+    )
+
+    detected = cropper.transform([image], [annotations])[0]
+
+    return detected.astype("uint8")
+
+
+def generate_tfrecord(
+    chunk_path,
+    output_tf_record_path,
+    detector,
+    keys,
+    final_size=126,
+    margin=44,
+    use_eyes=False,
+):
+    def write_single_line_tfrecord(writer, image, offset, user_id):
+
+        # Serializing
+        serialized_img = image.tobytes()
+
+        # Writing
+        feature = {
+            "data": _bytes_feature(serialized_img),
+            "label": _int64_feature(offset),
+            "key": _bytes_feature(str.encode(user_id)),
+        }
+
+        example = tf.train.Example(features=tf.train.Features(feature=feature))
+        writer.write(example.SerializeToString())
+
+    os.makedirs(os.path.dirname(output_tf_record_path), exist_ok=True)
+
+    with tf.io.TFRecordWriter(output_tf_record_path) as tf_writer:
+
+        for identity in os.listdir(chunk_path):
+            # Discarting the one we've discarted already
+            if identity not in keys:
+                continue
+
+            identity_path = os.path.join(chunk_path, identity)
+            if not os.path.isdir(identity_path):
+                continue
+
+            sys.stdout.write(f"Processing {identity} \n")
+            sys.stdout.flush()
+
+            for image_path in os.listdir(identity_path):
+                image_path = os.path.join(identity_path, image_path)
+                if os.path.splitext(image_path)[-1] != ".png":
+                    continue
+                image = bob.io.image.load(image_path)
+                annotations = detector(image)
+
+                if len(annotations) == 0:
+                    continue
+                else:
+                    # Getting the first annotation
+                    annotations = annotations[0]
+
+                if use_eyes:
+                    detected_image = detect_mtcnn_margin_eyes(
+                        annotations, image, margin=margin, final_size=final_size
+                    )
+                else:
+
+                    detected_image = detect_mtcnn_margin_face_crop(
+                        annotations, image, margin=margin, final_size=final_size
+                    )
+                # Converting H x W x C
+                detected_image = bob.io.image.to_matplotlib(detected_image)
+
+                write_single_line_tfrecord(
+                    tf_writer, detected_image, keys[identity], identity
+                )
+
+
+def get_keys(base_path, all_chunks):
+    """
+    Read the file structure from `annotations.csv` to get the samples properly annotated
+    """
+
+    def decide(annotations_path):
+        """
+        Decide if we should consider an identity or not.
+        The annotation has the following format.
+
+        ```
+        0,3.png,4.png,1
+        1,4.png,40.png,1
+        2,40.png,46.png,1
+        3,46.png,47.png,1
+        4,47.png,55.png,1
+        5,55.png,56.png,1
+        6,56.png,71.png,1
+        7,71.png,79.png,1
+        8,79.png,99.png,1
+        9,99.png,100.png,1
+        10,100.png,3.png,1
+        ```
+
+        The last collumn can assume the values:
+          - `0`: The pair of images are NOT from the same identity
+          - `1`: The pair of images ARE from the same identity
+          - `2`: The annotator doesn't know what to say
+
+
+        Here I'm considering the identity if you have more than 75% `1` and  `2`.
+
+        """
+
+        with open(annotations_path) as f:
+            lines = 0
+            lines_with_zero = 0
+            for l in f.readlines():
+                lines += 1
+                if l.split(",")[-1] == "\n":
+                    lines_with_zero += 1
+                    continue
+
+                decision = int(l.split(",")[-1])
+                if decision == 0:
+                    lines_with_zero += 1
+
+        # Discarting identities with more than 50% of the pairs not
+        # considered from the same identity
+        # This is the first cut
+        return True if lines_with_zero / lines < 0.50 else False
+
+    offset = 0
+    keys = dict()
+    for chunk in all_chunks:
+        path = os.path.join(base_path, chunk)
+        for identity in os.listdir(path):
+            if not os.path.isdir(os.path.join(path, identity)):
+                continue
+
+            statistics = os.path.join(path, identity, "annotations.csv")
+
+            if decide(statistics):
+                keys[identity] = offset
+                offset += 1
+            else:
+                print(f"Rejected {identity}")
+    return keys
+
+
+if __name__ == "__main__":
+    args = docopt(__doc__)
+
+    MSCELEB_PATH = args["<msceleb-path>"]
+    output_path = args["<output-path>"]
+    image_size = int(args["--image-size"])
+    use_eyes = args["--use-eyes"]
+    margin = 0
+
+    if "SGE_TASK_LAST" in os.environ:
+        TOTAL_CHUNKS = int(os.environ["SGE_TASK_LAST"])
+        CURRENT_CHUNK = int(os.environ["SGE_TASK_ID"]) - 1
+    else:
+        TOTAL_CHUNKS = 1
+        CURRENT_CHUNK = 0
+
+    all_chunks = [f"chunk_{i}" for i in range(43)]
+
+    if args["--keys"] is None:
+        keys = get_keys(MSCELEB_PATH, all_chunks)
+        with open("keys.pickle", "wb") as f:
+            f.write(pickle.dumps(keys))
+    else:
+        keys = pickle.loads(open(args["--keys"], "rb").read())
+
+    chunk_id = all_chunks[CURRENT_CHUNK]
+
+    from bob.ip.facedetect.mtcnn import MTCNN
+
+    detector = MTCNN()
+
+    output_tf_record_path = os.path.join(output_path, chunk_id + ".tfrecords")
+
+    generate_tfrecord(
+        os.path.join(MSCELEB_PATH, chunk_id),
+        output_tf_record_path,
+        detector,
+        keys,
+        final_size=image_size,
+        margin=margin,
+        use_eyes=use_eyes,
+    )
+
+    sys.stdout.write("Done \n")
+    sys.stdout.flush()
+
diff --git a/cnn_training/vgg2_2_tfrecords.py b/cnn_training/vgg2_2_tfrecords.py
new file mode 100644
index 00000000..8beeb95a
--- /dev/null
+++ b/cnn_training/vgg2_2_tfrecords.py
@@ -0,0 +1,189 @@
+#!/usr/bin/env python
+# coding: utf-8
+
+"""
+Trains some face recognition baselines using ARC based models
+
+Usage:
+    vgg2_2_tfrecords.py <vgg-path> <output-path> 
+    vgg2_2_tfrecords.py -h | --help
+
+Options:
+  -h --help             Show this screen.  
+
+"""
+
+
+from docopt import docopt
+import numpy as np
+import os
+import bob.io.image
+import bob.io.base
+import tensorflow as tf
+import sys
+from datetime import datetime
+
+
+def _bytes_feature(value):
+    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
+
+
+def _int64_feature(value):
+    return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
+
+
+def search_landmark(landmark_path, img_path):
+    with open(landmark_path) as f:
+        next(f)
+        for line in f:
+            line = line.split(",")
+            if img_path in line[0]:
+                return np.array(
+                    [[float(line[i + 1]), float(line[i + 2])] for i in [0, 2, 4, 6, 8]]
+                )
+        else:
+            return None
+
+
+from bob.bio.face.preprocessor import FaceCrop
+
+
+def align(image, annotations, cropped_image_size=(126, 126)):
+
+    cropped_image_height, cropped_image_width = cropped_image_size
+
+    # RIGHT_EYE_POS = (40, 46)
+    # LEFT_EYE_POS = (40, 80)
+    # cropped_positions = {"leye": LEFT_EYE_POS, "reye": RIGHT_EYE_POS}
+    # cropped_positions = {"leye": (49, 72), "reye": (49, 38)}
+    cropped_positions = {"leye": (55, 81), "reye": (55, 42)}
+
+    cropper = FaceCrop(
+        cropped_image_size=cropped_image_size,
+        cropped_positions=cropped_positions,
+        color_channel="rgb",
+        fixed_positions=None,
+        annotator=None,
+    )
+    return bob.io.image.to_matplotlib(
+        cropper.transform([image], [annotations])[0].astype("uint8")
+    )
+
+
+def get_id_by_line(line):
+    return line.split("/")[0]
+
+
+def generate_tfrecord(
+    base_path, landmark_path, file_list, output_tf_record_path, indexes
+):
+    def write_single_line_tfrecord(writer, image, offset, user_id):
+
+        # Serializing
+        serialized_img = image.tobytes()
+
+        # Writing
+        feature = {
+            "data": _bytes_feature(serialized_img),
+            "label": _int64_feature(offset),
+            "key": _bytes_feature(str.encode(user_id)),
+        }
+
+        example = tf.train.Example(features=tf.train.Features(feature=feature))
+        writer.write(example.SerializeToString())
+
+    with tf.io.TFRecordWriter(output_tf_record_path) as tf_writer:
+
+        current_id = None
+        with open(file_list) as f:
+            for file_name in f.readlines():
+
+                user_id = get_id_by_line(file_name)
+                if user_id in indexes:
+
+                    img = bob.io.base.load(
+                        os.path.join(base_path, file_name).rstrip("\n")
+                    )
+                    l_name = file_name.rstrip(".jpg\n")
+
+                    if current_id != user_id:
+                        current_id = user_id
+                        sys.stdout.write(
+                            f"Writing user {current_id}. {str(datetime.now())} \n"
+                        )
+                        sys.stdout.flush()
+
+                    landmarks = search_landmark(landmark_path, l_name)
+                    if landmarks[0][0] > landmarks[1][0]:
+                        annotations = {
+                            "reye": (landmarks[1][1], landmarks[1][0]),
+                            "leye": (landmarks[0][1], landmarks[0][0]),
+                        }
+                    else:
+                        annotations = {
+                            "reye": (landmarks[0][1], landmarks[0][0]),
+                            "leye": (landmarks[1][1], landmarks[1][0]),
+                        }
+                    if landmarks is None:
+                        raise ValueError(f"Landmark for {file_name} not found!")
+
+                    aligned_image = align(img, annotations)
+                    write_single_line_tfrecord(
+                        tf_writer, aligned_image, int(indexes[user_id]), user_id
+                    )
+
+
+def map_indexes(image_path, n_chunks):
+    """
+    Create a dictionary mapping the ID to VGG2-ID, like:
+
+    {0: 'n000001'],
+    1: 'n000002']}
+
+    """
+
+    indexes = sorted(list(set([l.split("/")[0] for l in open(image_path).readlines()])))
+
+    identities_map = {indexes[i]: i for i in range(len(indexes))}
+
+    # SPLIT THE DICTIONARY IN TOTAL_CHUNKS
+    indexes_as_list = list(identities_map.items())
+    dict_as_list = np.array_split(indexes_as_list, n_chunks)
+    dicts = [dict(d) for d in dict_as_list]
+
+    return dicts
+
+
+if __name__ == "__main__":
+    args = docopt(__doc__)
+
+    VGG2_PATH = args["<vgg-path>"]
+    LANDMARK_PATH = os.path.join(VGG2_PATH, "bb_landmark", "loose_landmark_train.csv")
+
+    if "SGE_TASK_LAST" in os.environ:
+        TOTAL_CHUNKS = int(os.environ["SGE_TASK_LAST"])
+        CURRENT_CHUNK = int(os.environ["SGE_TASK_ID"]) - 1
+    else:
+        TOTAL_CHUNKS = 1
+        CURRENT_CHUNK = 0
+
+    # TOTAL_CHUNKS = 140
+    # CURRENT_CHUNK = 0
+
+    TRAINING_LIST = os.path.join(VGG2_PATH, "train_list.txt")
+    # TEST_LIST = os.path.join(VGG2_PATH, "test_list.txt")
+
+    # MAP ALL INDEXES
+
+    indexes = map_indexes(TRAINING_LIST, TOTAL_CHUNKS)
+
+    generate_tfrecord(
+        os.path.join(VGG2_PATH, "train"),
+        LANDMARK_PATH,
+        TRAINING_LIST,
+        os.path.join(
+            args["<output-path>"], f"train_vgg2_chunk{CURRENT_CHUNK}.tfrecords"
+        ),
+        indexes[CURRENT_CHUNK],
+    )
+
diff --git a/cnn_training/vgg2_2_tfrecords_loose.py b/cnn_training/vgg2_2_tfrecords_loose.py
new file mode 100644
index 00000000..1fb8d6c8
--- /dev/null
+++ b/cnn_training/vgg2_2_tfrecords_loose.py
@@ -0,0 +1,209 @@
+#!/usr/bin/env python
+# coding: utf-8
+
+"""
+Crop VGG2 with loose crop based on bounding box
+
+Usage:
+    vgg2_2_tfrecords.py <vgg-path> <output-path>  [--factor=<kn>]
+    vgg2_2_tfrecords.py -h | --help
+
+Options:
+  -h --help             Show this screen.  
+  --factor=<kn>         Crop Factor [default: 0.3]
+
+"""
+
+
+from docopt import docopt
+import numpy as np
+import os
+import bob.io.image
+import bob.io.base
+import tensorflow as tf
+import sys
+from datetime import datetime
+
+
+def _bytes_feature(value):
+    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
+
+
+def _int64_feature(value):
+    return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
+
+
+def search_landmark(landmark_path, img_path):
+    with open(landmark_path) as f:
+        next(f)
+        for line in f:
+            line = line.split(",")
+            if img_path in line[0]:
+                landmarks = np.array([float(line[i]) for i in [1, 2, 3, 4]])
+                return {
+                    "topleft": (landmarks[1], landmarks[0]),
+                    "dimensions": (landmarks[3], landmarks[2]),
+                    "bottomright": (
+                        landmarks[1] + landmarks[3],
+                        landmarks[0] + landmarks[2],
+                    ),
+                }
+
+        else:
+            return None
+
+
+def extend_annotations(annotations, img_bottom_right, factor=0.3):
+    width = annotations["dimensions"][1]
+    height = annotations["dimensions"][0]
+
+    new_annotations = {"topleft": [0, 0], "bottomright": [0, 0]}
+
+    new_annotations["topleft"][0] = max(0, annotations["topleft"][0] - height * factor)
+    new_annotations["topleft"][1] = max(0, annotations["topleft"][1] - width * factor)
+
+    new_annotations["bottomright"][0] = min(
+        img_bottom_right[1], annotations["bottomright"][0] + height * factor
+    )
+    new_annotations["bottomright"][1] = min(
+        img_bottom_right[0], annotations["bottomright"][1] + width * factor
+    )
+
+    return new_annotations
+
+
+from bob.bio.face.preprocessor import FaceCrop
+
+
+def align(image, annotations, cropped_image_size=(126, 126), factor=0.3):
+
+    cropped_image_height, cropped_image_width = cropped_image_size
+
+    img_bottom_right = (image.shape[1], image.shape[2])
+    new_annotations = extend_annotations(annotations, img_bottom_right, factor=factor)
+
+    cropped_positions = {"topleft": (0, 0), "bottomright": cropped_image_size}
+    cropper = FaceCrop(
+        cropped_image_size=cropped_image_size,
+        cropped_positions=cropped_positions,
+        color_channel="rgb",
+        fixed_positions=None,
+        annotator=None,
+    )
+    return bob.io.image.to_matplotlib(
+        cropper.transform([image], [new_annotations])[0]
+    ).astype("uint8")
+
+
+def get_id_by_line(line):
+    return line.split("/")[0]
+
+
+def generate_tfrecord(
+    base_path, landmark_path, file_list, output_tf_record_path, indexes, factor=0.3
+):
+    def write_single_line_tfrecord(writer, image, offset, user_id):
+
+        # Serializing
+        serialized_img = image.tobytes()
+
+        # Writing
+        feature = {
+            "data": _bytes_feature(serialized_img),
+            "label": _int64_feature(offset),
+            "key": _bytes_feature(str.encode(user_id)),
+        }
+
+        example = tf.train.Example(features=tf.train.Features(feature=feature))
+        writer.write(example.SerializeToString())
+
+    with tf.io.TFRecordWriter(output_tf_record_path) as tf_writer:
+
+        current_id = None
+        with open(file_list) as f:
+            for file_name in f.readlines():
+
+                user_id = get_id_by_line(file_name)
+                if user_id in indexes:
+
+                    img = bob.io.base.load(
+                        os.path.join(base_path, file_name).rstrip("\n")
+                    )
+                    l_name = file_name.rstrip(".jpg\n")
+
+                    if current_id != user_id:
+                        current_id = user_id
+                        sys.stdout.write(
+                            f"Writing user {current_id}. {str(datetime.now())} \n"
+                        )
+                        sys.stdout.flush()
+
+                    landmarks = search_landmark(landmark_path, l_name)
+                    if landmarks is None:
+                        raise ValueError(f"Landmark for {file_name} not found!")
+
+                    # aligned_image = align(img, annotations)
+                    aligned_image = align(
+                        img, landmarks, factor=factor, cropped_image_size=(126, 126)
+                    )
+                    bob.io.base.save(bob.io.image.to_bob(aligned_image), "xuucu.png")
+                    write_single_line_tfrecord(
+                        tf_writer, aligned_image, int(indexes[user_id]), user_id
+                    )
+
+
+def map_indexes(image_path, n_chunks):
+    """
+    Create a dictionary mapping the ID to VGG2-ID, like:
+
+    {0: 'n000001'],
+    1: 'n000002']}
+
+    """
+
+    indexes = sorted(list(set([l.split("/")[0] for l in open(image_path).readlines()])))
+
+    identities_map = {indexes[i]: i for i in range(len(indexes))}
+
+    # SPLIT THE DICTIONARY IN TOTAL_CHUNKS
+    indexes_as_list = list(identities_map.items())
+    dict_as_list = np.array_split(indexes_as_list, n_chunks)
+    dicts = [dict(d) for d in dict_as_list]
+
+    return dicts
+
+
+if __name__ == "__main__":
+    args = docopt(__doc__)
+
+    VGG2_PATH = args["<vgg-path>"]
+    LANDMARK_PATH = os.path.join(VGG2_PATH, "bb_landmark", "loose_bb_train.csv")
+
+    if "SGE_TASK_LAST" in os.environ:
+        TOTAL_CHUNKS = int(os.environ["SGE_TASK_LAST"])
+        CURRENT_CHUNK = int(os.environ["SGE_TASK_ID"]) - 1
+    else:
+        TOTAL_CHUNKS = 1
+        CURRENT_CHUNK = 0
+
+    # TOTAL_CHUNKS = 140
+    # CURRENT_CHUNK = 0
+
+    TRAINING_LIST = os.path.join(VGG2_PATH, "train_list.txt")
+    # TEST_LIST = os.path.join(VGG2_PATH, "test_list.txt")
+
+    # MAP ALL INDEXES
+
+    indexes = map_indexes(TRAINING_LIST, TOTAL_CHUNKS)
+
+    generate_tfrecord(
+        os.path.join(VGG2_PATH, "train"),
+        LANDMARK_PATH,
+        TRAINING_LIST,
+        os.path.join(
+            args["<output-path>"], f"train_vgg2_chunk{CURRENT_CHUNK}.tfrecords"
+        ),
+        indexes[CURRENT_CHUNK],
+        factor=float(args["--factor"]),
+    )
+
diff --git a/cnn_training/webface360_to_tfrecord.py b/cnn_training/webface360_to_tfrecord.py
new file mode 100644
index 00000000..e0c40a1d
--- /dev/null
+++ b/cnn_training/webface360_to_tfrecord.py
@@ -0,0 +1,190 @@
+#!/usr/bin/env python
+# coding: utf-8
+
+"""
+Converts WEB360 to TF RECORD
+
+Usage:
+    webface360_to_tfrecords.py <web360-path> <output-path> 
+    webface360_to_tfrecords.py -h | --help
+
+Options:
+  -h --help             Show this screen.  
+
+"""
+
+
+from docopt import docopt
+import numpy as np
+import os
+import bob.io.image
+import bob.io.base
+import tensorflow as tf
+import sys
+from datetime import datetime
+
+
+def _bytes_feature(value):
+    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
+
+
+def _int64_feature(value):
+    return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
+
+
+def get_web360dirs():
+    """
+    Here I'm hardcoding the paths so we get consistent tfrecords,
+    just in case the IT decides to reestructure the web360 directory
+    """
+
+    return [
+        "0_0_000",
+        "0_0_001",
+        "0_0_002",
+        "0_1_003",
+        "0_1_004",
+        "0_1_005",
+        "0_2_006",
+        "0_2_007",
+        "0_2_008",
+        "0_3_009",
+        "0_3_010",
+        "0_3_011",
+        "0_4_012",
+        "0_4_013",
+        "0_4_014",
+        "0_5_015",
+        "0_5_016",
+        "0_5_017",
+        "0_6_018",
+        "0_6_019",
+        "0_6_020",
+        "1_0_000",
+        "1_0_001",
+        "1_0_002",
+        "1_1_003",
+        "1_1_004",
+        "1_1_005",
+        "1_2_006",
+        "1_2_007",
+        "1_2_008",
+        "1_3_009",
+        "1_3_010",
+        "1_3_011",
+        "1_4_012",
+        "1_4_013",
+        "1_4_014",
+        "1_5_015",
+        "1_5_016",
+        "1_5_017",
+        "1_6_018",
+        "1_6_019",
+        "1_6_020",
+        "2_0_000",
+        "2_0_001",
+        "2_0_002",
+        "2_1_003",
+        "2_1_004",
+        "2_1_005",
+        "2_2_006",
+        "2_2_007",
+        "2_2_008",
+        "2_3_009",
+        "2_3_010",
+        "2_3_011",
+        "2_4_012",
+        "2_4_013",
+        "2_4_014",
+        "2_5_015",
+        "2_5_016",
+        "2_5_017",
+        "2_6_018",
+        "2_6_019",
+        "2_6_020",
+    ]
+
+
+def get_keys(base_path):
+
+    root_dirs = get_web360dirs()
+    keys = dict()
+    offset = 0
+    for r in root_dirs:
+
+        identities_dir = os.path.join(base_path, r)
+        for i in os.listdir(identities_dir):
+            id_dir = os.path.join(identities_dir, i)
+            if os.path.isdir(id_dir):
+                keys[i] = offset
+                offset += 1
+    return keys
+
+
+def generate_tfrecord(
+    chunk_path, output_tf_record_path, keys,
+):
+    def write_single_line_tfrecord(writer, image, offset, user_id):
+
+        # Serializing
+        serialized_img = image.tobytes()
+
+        # Writing
+        feature = {
+            "data": _bytes_feature(serialized_img),
+            "label": _int64_feature(offset),
+            "key": _bytes_feature(str.encode(user_id)),
+        }
+
+        example = tf.train.Example(features=tf.train.Features(feature=feature))
+        writer.write(example.SerializeToString())
+
+    os.makedirs(os.path.dirname(output_tf_record_path), exist_ok=True)
+
+    with tf.io.TFRecordWriter(output_tf_record_path) as tf_writer:
+
+        for identity in os.listdir(chunk_path):
+            # Discarting the one we've discarted already
+            if identity not in keys:
+                continue
+
+            identity_path = os.path.join(chunk_path, identity)
+            if not os.path.isdir(identity_path):
+                continue
+
+            sys.stdout.write(f"Processing {identity} \n")
+            sys.stdout.flush()
+
+            for image_path in os.listdir(identity_path):
+                image_path = os.path.join(identity_path, image_path)
+                if os.path.splitext(image_path)[-1] != ".jpg":
+                    continue
+                image = bob.io.image.to_matplotlib(bob.io.image.load(image_path))
+
+                write_single_line_tfrecord(tf_writer, image, keys[identity], identity)
+
+
+if __name__ == "__main__":
+    args = docopt(__doc__)
+
+    WEB360_PATH = args["<web360-path>"]
+    output_path = args["<output-path>"]
+
+    if "SGE_TASK_LAST" in os.environ:
+        TOTAL_CHUNKS = int(os.environ["SGE_TASK_LAST"])
+        CURRENT_CHUNK = int(os.environ["SGE_TASK_ID"]) - 1
+    else:
+        TOTAL_CHUNKS = 1
+        CURRENT_CHUNK = 0
+
+    # keys = get_keys(WEB360_PATH)
+    import pickle
+
+    keys = pickle.loads(open("keys-web360.pickle", "rb").read())
+
+    root_dirs = get_web360dirs()
+    output_tf_record_path = os.path.join(output_path, f"chunk_{CURRENT_CHUNK}.tfrecord")
+    chunk_path = os.path.join(WEB360_PATH, root_dirs[CURRENT_CHUNK])
+
+    generate_tfrecord(chunk_path, output_tf_record_path, keys)
+
diff --git a/cnn_training_cpy/._centerloss_mixed_precision.py b/cnn_training_cpy/._centerloss_mixed_precision.py
new file mode 100644
index 0000000000000000000000000000000000000000..881da051d61cfeb4e02036b9ea9c9df85b2fefdf
GIT binary patch
literal 4096
zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDJkFz{^v(m+1nBL)UWIUt(=a103v0xDSr
z(ZR3)$QD4;!U*I;#if$-bM+Dn3UX5QauSP6N{drdQW8s2l>>r7dM5YIiU-k_pJPF;
z83m&uFd71*Aut*OqaiRF0;3@?8UmvsFd71*Aut*OqalDZ1VEil5C#Lekc`Y?g`(8r
z{L-T2RE4zsqHKlCypq(slFa<P#GIT;g`Cv15{0Ck#Jp@^Ul^)uNK>f(hkHeaLGJ&5
E0JF3zQ~&?~

literal 0
HcmV?d00001

diff --git a/cnn_training_cpy/arcface.py b/cnn_training_cpy/arcface.py
new file mode 100644
index 00000000..fb593509
--- /dev/null
+++ b/cnn_training_cpy/arcface.py
@@ -0,0 +1,409 @@
+#!/usr/bin/env python
+# coding: utf-8
+
+"""
+Trains some face recognition baselines using ARC based models
+
+# ARCFACE PARAMETERS from eq.4
+# FROM https://github.com/deepinsight/insightface/blob/master/recognition/ArcFace/sample_config.py#L153
+ M1 = 1.0
+ M2 = 0.3
+ M3 = 0.2
+
+
+# ARCFACE PARAMETERS from eq.3
+M = 0.5  # ArcFace Margin #CHECK SECTION 3.1
+SCALE = 64.0  # Scale
+# ORIGINAL = False  # Original implementation
+
+
+The config file has the following format to train an ARCFACE model:
+
+```yml
+# VGG2 params
+batch-size: 90
+face-size: 182
+face-output-size: 160
+n-classes: 87662
+
+
+## Backbone
+backbone: 'mobilenet-v2'
+head: 'arcface'
+s: 10
+bottleneck: 512
+m: 0.5
+
+# Training parameters
+#solver: "rmsprop"
+solver: "sgd"
+lr: 0.1
+dropout-rate: 0.5
+epochs: 310
+lerning-rate-schedule: 'cosine-decay-restarts'
+
+
+
+train-tf-record-path: "/path/*.tfrecord"
+validation-tf-record-path: "/path/lfw_pairs.tfrecord"
+```
+
+
+
+
+Usage:
+    arcface.py <config-yaml> <checkpoint_path> 
+    arcface.py -h | --help
+
+Options:
+  -h --help             Show this screen.
+  arcface.py arcface -h | help
+
+"""
+
+import os
+from functools import partial
+
+import pkg_resources
+import tensorflow as tf
+from bob.learn.tensorflow.models.inception_resnet_v2 import InceptionResNetV2
+from bob.learn.tensorflow.metrics import predict_using_tensors
+from tensorflow.keras import layers
+from bob.learn.tensorflow.callbacks import add_backup_callback
+from bob.learn.tensorflow.metrics.embedding_accuracy import accuracy_from_embeddings
+from bob.extension import rc
+from bob.bio.face.tensorflow.preprocessing import prepare_dataset
+import yaml
+
+from bob.learn.tensorflow.layers import (
+    add_bottleneck,
+    add_top,
+    SphereFaceLayer,
+    ModifiedSoftMaxLayer,
+)
+
+from bob.learn.tensorflow.models import (
+    EmbeddingValidation,
+    ArcFaceLayer,
+    ArcFaceModel,
+    ArcFaceLayer3Penalties,
+)
+
+
+##############################
+# CNN Backbones
+# Add your NN backbone here
+##############################
+BACKBONES = dict()
+BACKBONES["inception-resnet-v2"] = InceptionResNetV2
+BACKBONES["efficientnet-B0"] = tf.keras.applications.EfficientNetB0
+BACKBONES["resnet50"] = tf.keras.applications.ResNet50
+BACKBONES["mobilenet-v2"] = tf.keras.applications.MobileNetV2
+
+##############################
+# SOLVER SPECIFICATIONS
+##############################
+
+SOLVERS = dict()
+# Parameters taken from https://github.com/davidsandberg/facenet/blob/master/src/facenet.py#L181
+# Fixing the start learning rate
+learning_rate = 0.1
+SOLVERS["rmsprop"] = partial(
+    tf.keras.optimizers.RMSprop,
+    learning_rate=learning_rate,
+    rho=0.9,
+    momentum=0.9,
+    epsilon=1.0,
+)
+SOLVERS["adam"] = partial(tf.keras.optimizers.Adam, learning_rate=learning_rate)
+SOLVERS["adagrad"] = partial(tf.keras.optimizers.Adagrad, learning_rate=learning_rate)
+SOLVERS["sgd"] = partial(
+    tf.keras.optimizers.SGD, learning_rate=learning_rate, momentum=0.9, nesterov=True
+)
+
+
+################################
+# DATA SPECIFICATION
+###############################
+DATA_SHAPES = dict()
+
+# Inputs with 182x182 are cropped to 160x160
+DATA_SHAPES[182] = 160
+DATA_SHAPES[112] = 98
+DATA_SHAPES[126] = 112
+
+
+# SHAPES EXPECTED FROM THE DATASET USING THIS BACKBONE
+# DATA_SHAPE = (182, 182, 3)  # size of faces
+DATA_TYPE = tf.uint8
+# OUTPUT_SHAPE = (160, 160)
+
+AUTOTUNE = tf.data.experimental.AUTOTUNE
+
+# HERE WE VALIDATE WITH LFW RUNNING A
+# INFORMATION ABOUT THE VALIDATION SET
+# VALIDATION_TF_RECORD_PATHS = rc["bob.bio.face.cnn.lfw_tfrecord_path"]
+
+# there are 2812 samples in the validation set
+VALIDATION_SAMPLES = 2812
+VALIDATION_BATCH_SIZE = 38
+
+
+def create_model(
+    n_classes, model_spec, backbone, bottleneck, dropout_rate, input_shape
+):
+
+    if backbone == "inception-resnet-v2":
+        pre_model = BACKBONES[backbone](
+            include_top=False, bottleneck=False, input_shape=input_shape,
+        )
+    else:
+        pre_model = BACKBONES[backbone](
+            include_top=False, input_shape=input_shape, weights=None,
+        )
+
+    # Adding the bottleneck
+    pre_model = add_bottleneck(
+        pre_model, bottleneck_size=bottleneck, dropout_rate=dropout_rate
+    )
+    pre_model = add_top(pre_model, n_classes=n_classes)
+
+    float32_layer = layers.Activation("linear", dtype="float32")
+
+    embeddings = tf.nn.l2_normalize(
+        pre_model.get_layer("embeddings/BatchNorm").output, axis=1
+    )
+
+    logits_premodel = float32_layer(pre_model.get_layer("logits").output)
+
+    # Wrapping the embedding validation
+    pre_model = EmbeddingValidation(
+        pre_model.input, outputs=[logits_premodel, embeddings], name=pre_model.name
+    )
+
+    ################################
+    ## Creating the specific models
+    if "arcface" in model_spec:
+        labels = tf.keras.layers.Input([], name="label")
+        logits_arcface = ArcFaceLayer(
+            n_classes, s=model_spec["arcface"]["s"], m=model_spec["arcface"]["m"]
+        )(embeddings, labels)
+        arc_model = ArcFaceModel(
+            inputs=(pre_model.input, labels), outputs=[logits_arcface, embeddings]
+        )
+    elif "arcface-3p" in model_spec:
+        labels = tf.keras.layers.Input([], name="label")
+        logits_arcface = ArcFaceLayer3Penalties(
+            n_classes,
+            s=model_spec["arcface-3p"]["s"],
+            m1=model_spec["arcface-3p"]["m1"],
+            m2=model_spec["arcface-3p"]["m2"],
+            m3=model_spec["arcface-3p"]["m3"],
+        )(embeddings, labels)
+        arc_model = ArcFaceModel(
+            inputs=(pre_model.input, labels), outputs=[logits_arcface, embeddings]
+        )
+    elif "sphereface" in model_spec:
+        logits_arcface = SphereFaceLayer(n_classes, m=model_spec["sphereface"]["m"],)(
+            embeddings
+        )
+        arc_model = EmbeddingValidation(
+            pre_model.input, outputs=[logits_arcface, embeddings]
+        )
+
+    elif "modified-softmax" in model_spec:
+        logits_modified_softmax = ModifiedSoftMaxLayer(n_classes)(embeddings)
+        arc_model = EmbeddingValidation(
+            pre_model.input, outputs=[logits_modified_softmax, embeddings]
+        )
+
+    return pre_model, arc_model
+
+
+def build_and_compile_models(
+    n_classes, optimizer, model_spec, backbone, bottleneck, dropout_rate, input_shape
+):
+    pre_model, arc_model = create_model(
+        n_classes, model_spec, backbone, bottleneck, dropout_rate, input_shape
+    )
+
+    cross_entropy = tf.keras.losses.SparseCategoricalCrossentropy(
+        from_logits=True, name="cross_entropy"
+    )
+
+    pre_model.compile(optimizer=optimizer, loss=cross_entropy, metrics=["accuracy"])
+
+    arc_model.compile(optimizer=optimizer, loss=cross_entropy, metrics=["accuracy"])
+
+    return pre_model, arc_model
+
+
+def train_and_evaluate(
+    tf_record_paths,
+    checkpoint_path,
+    n_classes,
+    batch_size,
+    epochs,
+    model_spec,
+    backbone,
+    optimizer,
+    bottleneck,
+    dropout_rate,
+    face_size,
+    validation_path,
+    lerning_rate_schedule,
+):
+
+    # number of training steps to do before validating a model. This also defines an epoch
+    # for keras which is not really true. We want to evaluate every 180000 (90 * 2000)
+    # samples
+    # STEPS_PER_EPOCH = 180000 // batch_size
+    # KERAS_EPOCH_MULTIPLIER = 6
+    STEPS_PER_EPOCH = 2000
+
+    DATA_SHAPE = (face_size, face_size, 3)
+    OUTPUT_SHAPE = (DATA_SHAPES[face_size], DATA_SHAPES[face_size])
+
+    if validation_path is None:
+        validation_path = rc["bob.bio.face.cnn.lfw_tfrecord_path"]
+        if validation_path is None:
+            raise ValueError(
+                "No validation set was set. Please, do `bob config set bob.bio.face.cnn.lfw_tfrecord_path [PATH]`"
+            )
+
+    train_ds = prepare_dataset(
+        tf_record_paths,
+        batch_size,
+        epochs,
+        data_shape=DATA_SHAPE,
+        output_shape=OUTPUT_SHAPE,
+        shuffle=True,
+        augment=True,
+    )
+
+    val_ds = prepare_dataset(
+        validation_path,
+        data_shape=DATA_SHAPE,
+        output_shape=OUTPUT_SHAPE,
+        epochs=epochs,
+        batch_size=VALIDATION_BATCH_SIZE,
+        shuffle=False,
+        augment=False,
+    )
+    val_metric_name = "val_accuracy"
+
+    pre_model, arc_model = build_and_compile_models(
+        n_classes,
+        optimizer,
+        model_spec,
+        backbone,
+        bottleneck=bottleneck,
+        dropout_rate=dropout_rate,
+        input_shape=OUTPUT_SHAPE + (3,),
+    )
+
+    def scheduler(epoch, lr):
+        # 200 epochs at 0.1, 10 at 0.01 and 5 0.001
+        # The epoch number here is Keras's which is different from actual epoch number
+        # epoch = epoch // KERAS_EPOCH_MULTIPLIER
+
+        # Tracking in the tensorboard
+        tf.summary.scalar("learning rate", data=lr, step=epoch)
+
+        if epoch in range(200):
+            return 1 * lr
+        else:
+            return lr * tf.math.exp(-0.01)
+
+    if lerning_rate_schedule == "cosine-decay-restarts":
+        decay_steps = 50
+        lr_decayed_fn = tf.keras.callbacks.LearningRateScheduler(
+            tf.keras.experimental.CosineDecayRestarts(
+                0.1, decay_steps, t_mul=2.0, m_mul=0.8, alpha=0.1
+            ),
+            verbose=1,
+        )
+
+    else:
+        lr_decayed_fn = tf.keras.callbacks.LearningRateScheduler(scheduler, verbose=1)
+
+    callbacks = {
+        "latest": tf.keras.callbacks.ModelCheckpoint(
+            f"{checkpoint_path}/latest", verbose=1
+        ),
+        "tensorboard": tf.keras.callbacks.TensorBoard(
+            log_dir=f"{checkpoint_path}/logs", update_freq=15, profile_batch=0
+        ),
+        "lr": lr_decayed_fn,
+        "nan": tf.keras.callbacks.TerminateOnNaN(),
+    }
+
+    callbacks = add_backup_callback(callbacks, backup_dir=f"{checkpoint_path}/backup")
+    # STEPS_PER_EPOCH
+    pre_model.fit(
+        train_ds,
+        epochs=2,
+        validation_data=val_ds,
+        steps_per_epoch=STEPS_PER_EPOCH,
+        validation_steps=VALIDATION_SAMPLES // VALIDATION_BATCH_SIZE,
+        callbacks=callbacks,
+        verbose=2,
+    )
+
+    # STEPS_PER_EPOCH
+    # epochs=epochs * KERAS_EPOCH_MULTIPLIER,
+    arc_model.fit(
+        train_ds,
+        validation_data=val_ds,
+        epochs=epochs,
+        steps_per_epoch=STEPS_PER_EPOCH,
+        validation_steps=VALIDATION_SAMPLES // VALIDATION_BATCH_SIZE,
+        callbacks=callbacks,
+        verbose=2,
+    )
+
+
+from docopt import docopt
+
+if __name__ == "__main__":
+    args = docopt(__doc__)
+
+    config = yaml.full_load(open(args["<config-yaml>"]))
+
+    model_spec = dict()
+    if config["head"] == "arcface":
+        model_spec["arcface"] = dict()
+        model_spec["arcface"]["m"] = float(config["m"])
+        model_spec["arcface"]["s"] = int(config["s"])
+
+    if config["head"] == "arcface-3p":
+        model_spec["arcface-3p"] = dict()
+        model_spec["arcface-3p"]["m1"] = float(config["m1"])
+        model_spec["arcface-3p"]["m2"] = float(config["m2"])
+        model_spec["arcface-3p"]["m3"] = float(config["m3"])
+        model_spec["arcface-3p"]["s"] = int(config["s"])
+
+    if config["head"] == "sphereface":
+        model_spec["sphereface"] = dict()
+        model_spec["sphereface"]["m"] = float(config["m"])
+
+    if config["head"] == "modified-softmax":
+        # There's no hyper parameter here
+        model_spec["modified-softmax"] = dict()
+
+    train_and_evaluate(
+        config["train-tf-record-path"],
+        args["<checkpoint_path>"],
+        int(config["n-classes"]),
+        int(config["batch-size"]),
+        int(config["epochs"]),
+        model_spec,
+        config["backbone"],
+        optimizer=SOLVERS[config["solver"]](learning_rate=float(config["lr"])),
+        bottleneck=int(config["bottleneck"]),
+        dropout_rate=float(config["dropout-rate"]),
+        face_size=int(config["face-size"]),
+        validation_path=config["validation-tf-record-path"],
+        lerning_rate_schedule=config["lerning-rate-schedule"],
+    )
+
diff --git a/cnn_training_cpy/centerloss.py b/cnn_training_cpy/centerloss.py
new file mode 100644
index 00000000..bac26eb5
--- /dev/null
+++ b/cnn_training_cpy/centerloss.py
@@ -0,0 +1,251 @@
+#!/usr/bin/env python
+# coding: utf-8
+
+"""
+Trains a face recognition CNN using the strategy from the paper
+
+"A Discriminative Feature Learning Approach
+for Deep Face Recognition" https://ydwen.github.io/papers/WenECCV16.pdf
+
+The default backbone is the InceptionResnetv2
+
+Do `./bin/python centerloss.py --help` for more information
+
+"""
+
+import os
+from functools import partial
+import click
+import pkg_resources
+import tensorflow as tf
+from bob.learn.tensorflow.losses import CenterLoss, CenterLossLayer
+from bob.learn.tensorflow.models.inception_resnet_v2 import InceptionResNetV2
+from bob.learn.tensorflow.metrics import predict_using_tensors
+from tensorflow.keras import layers
+from bob.learn.tensorflow.callbacks import add_backup_callback
+from bob.learn.tensorflow.metrics.embedding_accuracy import accuracy_from_embeddings
+from bob.extension import rc
+from bob.bio.face.tensorflow.preprocessing import prepare_dataset
+
+# CNN Backbone
+# Change your NN backbone here
+BACKBONE = InceptionResNetV2
+
+# SHAPES EXPECTED FROM THE DATASET USING THIS BACKBONE
+DATA_SHAPE = (182, 182, 3)  # size of faces
+DATA_TYPE = tf.uint8
+OUTPUT_SHAPE = (160, 160)
+
+AUTOTUNE = tf.data.experimental.AUTOTUNE
+
+# HERE WE VALIDATE WITH LFW RUNNING A
+# INFORMATION ABOUT THE VALIDATION SET
+VALIDATION_TF_RECORD_PATHS = rc["bob.bio.face.cnn.lfw_tfrecord_path"]
+
+# there are 2812 samples in the validation set
+VALIDATION_SAMPLES = 2812
+VALIDATION_BATCH_SIZE = 38
+
+# WEIGHTS BEWTWEEN the two losses
+LOSS_WEIGHTS = {"cross_entropy": 1.0, "center_loss": 0.01}
+
+
+class CenterLossModel(tf.keras.Model):
+    def compile(
+        self,
+        cross_entropy,
+        center_loss,
+        loss_weights,
+        train_loss,
+        train_cross_entropy,
+        train_center_loss,
+        test_acc,
+        **kwargs,
+    ):
+        super().compile(**kwargs)
+        self.cross_entropy = cross_entropy
+        self.center_loss = center_loss
+        self.loss_weights = loss_weights
+        self.train_loss = train_loss
+        self.train_cross_entropy = train_cross_entropy
+        self.train_center_loss = train_center_loss
+        self.test_acc = test_acc
+
+    def train_step(self, data):
+        images, labels = data
+        with tf.GradientTape() as tape:
+            logits, prelogits = self(images, training=True)
+            loss_cross = self.cross_entropy(labels, logits)
+            loss_center = self.center_loss(labels, prelogits)
+            loss = (
+                loss_cross * self.loss_weights[self.cross_entropy.name]
+                + loss_center * self.loss_weights[self.center_loss.name]
+            )
+        trainable_vars = self.trainable_variables
+        gradients = tape.gradient(loss, trainable_vars)
+        self.optimizer.apply_gradients(zip(gradients, trainable_vars))
+
+        self.train_loss(loss)
+        self.train_cross_entropy(loss_cross)
+        self.train_center_loss(loss_center)
+        return {
+            m.name: m.result()
+            for m in [self.train_loss, self.train_cross_entropy, self.train_center_loss]
+        }
+
+    def test_step(self, data):
+        images, labels = data
+        logits, prelogits = self(images, training=False)
+        self.test_acc(accuracy_from_embeddings(labels, prelogits))
+        return {m.name: m.result() for m in [self.test_acc]}
+
+
+def create_model(n_classes):
+
+    model = BACKBONE(
+        include_top=True,
+        classes=n_classes,
+        bottleneck=True,
+        input_shape=OUTPUT_SHAPE + (3,),
+    )
+
+    prelogits = model.get_layer("Bottleneck/BatchNorm").output
+    prelogits = CenterLossLayer(
+        n_classes=n_classes, n_features=prelogits.shape[-1], name="centers"
+    )(prelogits)
+
+    logits = model.get_layer("logits").output
+    model = CenterLossModel(
+        inputs=model.input, outputs=[logits, prelogits], name=model.name
+    )
+    return model
+
+
+def build_and_compile_model(n_classes, learning_rate):
+    model = create_model(n_classes)
+
+    cross_entropy = tf.keras.losses.SparseCategoricalCrossentropy(
+        from_logits=True, name="cross_entropy"
+    )
+    center_loss = CenterLoss(
+        centers_layer=model.get_layer("centers"), alpha=0.9, name="center_loss",
+    )
+
+    optimizer = tf.keras.optimizers.RMSprop(
+        learning_rate=learning_rate, rho=0.9, momentum=0.9, epsilon=1.0
+    )
+
+    train_loss = tf.keras.metrics.Mean(name="loss")
+    train_cross_entropy = tf.keras.metrics.Mean(name="cross_entropy")
+    train_center_loss = tf.keras.metrics.Mean(name="center_loss")
+
+    test_acc = tf.keras.metrics.Mean(name="accuracy")
+
+    model.compile(
+        optimizer=optimizer,
+        cross_entropy=cross_entropy,
+        center_loss=center_loss,
+        loss_weights=LOSS_WEIGHTS,
+        train_loss=train_loss,
+        train_cross_entropy=train_cross_entropy,
+        train_center_loss=train_center_loss,
+        test_acc=test_acc,
+    )
+    return model
+
+
+@click.command()
+@click.argument("tf-record-paths")
+@click.argument("checkpoint-path")
+@click.option(
+    "-n",
+    "--n-classes",
+    default=87662,
+    help="Number of classes in the classification problem. Default to `87662`, which is the number of identities in our pruned MSCeleb",
+)
+@click.option(
+    "-b",
+    "--batch-size",
+    default=90,
+    help="Batch size. Be aware that we are using single precision. Batch size should be high.",
+)
+@click.option(
+    "-e", "--epochs", default=35, help="Number of epochs",
+)
+def train_and_evaluate(tf_record_paths, checkpoint_path, n_classes, batch_size, epochs):
+    # number of training steps to do before validating a model. This also defines an epoch
+    # for keras which is not really true. We want to evaluate every 180000 (90 * 2000)
+    # samples
+    STEPS_PER_EPOCH = 180000 // batch_size
+    learning_rate = 0.1
+    KERAS_EPOCH_MULTIPLIER = 6
+    train_ds = prepare_dataset(
+        tf_record_paths,
+        batch_size,
+        epochs,
+        data_shape=DATA_SHAPE,
+        output_shape=OUTPUT_SHAPE,
+        shuffle=True,
+        augment=True,
+    )
+
+    if VALIDATION_TF_RECORD_PATHS is None:
+        raise ValueError(
+            "No validation set was set. Please, do `bob config set bob.bio.face.cnn.lfw_tfrecord_path [PATH]`"
+        )
+
+    val_ds = prepare_dataset(
+        VALIDATION_TF_RECORD_PATHS,
+        data_shape=DATA_SHAPE,
+        output_shape=OUTPUT_SHAPE,
+        epochs=epochs,
+        batch_size=VALIDATION_BATCH_SIZE,
+        shuffle=False,
+        augment=False,
+    )
+    val_metric_name = "val_accuracy"
+
+    model = build_and_compile_model(n_classes, learning_rate)
+
+    def scheduler(epoch, lr):
+        # 20 epochs at 0.1, 10 at 0.01 and 5 0.001
+        # The epoch number here is Keras's which is different from actual epoch number
+        epoch = epoch // KERAS_EPOCH_MULTIPLIER
+        if epoch in range(20):
+            return 0.1
+        elif epoch in range(20, 30):
+            return 0.01
+        else:
+            return 0.001
+
+    callbacks = {
+        "latest": tf.keras.callbacks.ModelCheckpoint(
+            f"{checkpoint_path}/latest", verbose=1
+        ),
+        "best": tf.keras.callbacks.ModelCheckpoint(
+            f"{checkpoint_path}/best",
+            monitor=val_metric_name,
+            save_best_only=True,
+            mode="max",
+            verbose=1,
+        ),
+        "tensorboard": tf.keras.callbacks.TensorBoard(
+            log_dir=f"{checkpoint_path}/logs", update_freq=15, profile_batch=0
+        ),
+        "lr": tf.keras.callbacks.LearningRateScheduler(scheduler, verbose=1),
+        "nan": tf.keras.callbacks.TerminateOnNaN(),
+    }
+    callbacks = add_backup_callback(callbacks, backup_dir=f"{checkpoint_path}/backup")
+    model.fit(
+        train_ds,
+        validation_data=val_ds,
+        epochs=epochs * KERAS_EPOCH_MULTIPLIER,
+        steps_per_epoch=STEPS_PER_EPOCH,
+        validation_steps=VALIDATION_SAMPLES // VALIDATION_BATCH_SIZE,
+        callbacks=callbacks,
+        verbose=2,
+    )
+
+
+if __name__ == "__main__":
+    train_and_evaluate()
diff --git a/cnn_training_cpy/centerloss_mixed_precision.py b/cnn_training_cpy/centerloss_mixed_precision.py
new file mode 100644
index 00000000..e14069f5
--- /dev/null
+++ b/cnn_training_cpy/centerloss_mixed_precision.py
@@ -0,0 +1,279 @@
+w  #!/usr/bin/env python
+# coding: utf-8
+
+"""
+Trains a face recognition CNN using the strategy from the paper
+
+"A Discriminative Feature Learning Approach
+for Deep Face Recognition" https://ydwen.github.io/papers/WenECCV16.pdf
+
+#########
+# THIS ONE USES FLOAT16 TO COMPUTE THE GRADIENTS
+# CHECKE HERE FOR MORE INFO: # https://www.tensorflow.org/api_docs/python/tf/keras/mixed_precision/experimental/Policy
+########
+
+The default backbone is the InceptionResnetv2
+
+Do `./bin/python centerloss_mixed_precision.py --help` for more information
+
+"""
+
+import os
+from functools import partial
+import click
+import pkg_resources
+import tensorflow as tf
+from bob.learn.tensorflow.losses import CenterLoss, CenterLossLayer
+from bob.learn.tensorflow.models.inception_resnet_v2 import InceptionResNetV2
+from bob.learn.tensorflow.metrics import predict_using_tensors
+from tensorflow.keras import layers
+from tensorflow.keras.mixed_precision import experimental as mixed_precision
+from bob.learn.tensorflow.callbacks import add_backup_callback
+from bob.learn.tensorflow.metrics.embedding_accuracy import accuracy_from_embeddings
+from bob.extension import rc
+from bob.bio.face.tensorflow.preprocessing import prepare_dataset
+
+# Setting mixed precision policy
+# https://www.tensorflow.org/api_docs/python/tf/keras/mixed_precision/experimental/Policy
+policy = mixed_precision.Policy("mixed_float16")
+mixed_precision.set_policy(policy)
+
+# CNN Backbone
+# Change your NN backbone here
+BACKBONE = InceptionResNetV2
+
+# SHAPES EXPECTED FROM THE DATASET USING THIS BACKBONE
+DATA_SHAPE = (182, 182, 3)  # size of faces
+DATA_TYPE = tf.uint8
+OUTPUT_SHAPE = (160, 160)
+
+AUTOTUNE = tf.data.experimental.AUTOTUNE
+
+# HERE WE VALIDATE WITH LFW RUNNING A
+# INFORMATION ABOUT THE VALIDATION SET
+VALIDATION_TF_RECORD_PATHS = rc["bob.bio.face.cnn.lfw_tfrecord_path"]
+
+# there are 2812 samples in the validation set
+VALIDATION_SAMPLES = 2812
+VALIDATION_BATCH_SIZE = 38
+
+# WEIGHTS BEWTWEEN the two losses
+LOSS_WEIGHTS = {"cross_entropy": 1.0, "center_loss": 0.01}
+
+
+class CenterLossModel(tf.keras.Model):
+    def compile(
+        self,
+        cross_entropy,
+        center_loss,
+        loss_weights,
+        train_loss,
+        train_cross_entropy,
+        train_center_loss,
+        test_acc,
+        global_batch_size,
+        **kwargs,
+    ):
+        super().compile(**kwargs)
+        self.cross_entropy = cross_entropy
+        self.center_loss = center_loss
+        self.loss_weights = loss_weights
+        self.train_loss = train_loss
+        self.train_cross_entropy = train_cross_entropy
+        self.train_center_loss = train_center_loss
+        self.test_acc = test_acc
+        self.global_batch_size = global_batch_size
+
+    def train_step(self, data):
+        images, labels = data
+        with tf.GradientTape() as tape:
+            logits, prelogits = self(images, training=True)
+            loss_cross = self.cross_entropy(labels, logits)
+            loss_center = self.center_loss(labels, prelogits)
+            loss = (
+                loss_cross * self.loss_weights[self.cross_entropy.name]
+                + loss_center * self.loss_weights[self.center_loss.name]
+            )
+            unscaled_loss = tf.nn.compute_average_loss(
+                loss, global_batch_size=self.global_batch_size
+            )
+            loss = self.optimizer.get_scaled_loss(unscaled_loss)
+
+        trainable_vars = self.trainable_variables
+        gradients = tape.gradient(loss, trainable_vars)
+        gradients = self.optimizer.get_unscaled_gradients(gradients)
+        self.optimizer.apply_gradients(zip(gradients, trainable_vars))
+
+        self.train_loss(unscaled_loss)
+        self.train_cross_entropy(loss_cross)
+        self.train_center_loss(loss_center)
+        return {
+            m.name: m.result()
+            for m in [self.train_loss, self.train_cross_entropy, self.train_center_loss]
+        }
+
+    def test_step(self, data):
+        images, labels = data
+        logits, prelogits = self(images, training=False)
+        self.test_acc(accuracy_from_embeddings(labels, prelogits))
+        return {m.name: m.result() for m in [self.test_acc]}
+
+
+def create_model(n_classes):
+
+    model = BACKBONE(
+        include_top=True,
+        classes=n_classes,
+        bottleneck=True,
+        input_shape=OUTPUT_SHAPE + (3,),
+        kernel_regularizer=tf.keras.regularizers.L2(5e-5),
+    )
+    float32_layer = layers.Activation("linear", dtype="float32")
+
+    prelogits = model.get_layer("Bottleneck/BatchNorm").output
+    prelogits = CenterLossLayer(
+        n_classes=n_classes, n_features=prelogits.shape[-1], name="centers"
+    )(prelogits)
+    prelogits = float32_layer(prelogits)
+    logits = float32_layer(model.get_layer("logits").output)
+    model = CenterLossModel(
+        inputs=model.input, outputs=[logits, prelogits], name=model.name
+    )
+    return model
+
+
+def build_and_compile_model(n_classes, learning_rate, global_batch_size):
+    model = create_model(n_classes)
+
+    cross_entropy = tf.keras.losses.SparseCategoricalCrossentropy(
+        from_logits=True, name="cross_entropy", reduction=tf.keras.losses.Reduction.NONE
+    )
+    center_loss = CenterLoss(
+        centers_layer=model.get_layer("centers"),
+        alpha=0.9,
+        name="center_loss",
+        reduction=tf.keras.losses.Reduction.NONE,
+    )
+
+    optimizer = tf.keras.optimizers.RMSprop(
+        learning_rate=learning_rate, rho=0.9, momentum=0.9, epsilon=1.0
+    )
+    optimizer = mixed_precision.LossScaleOptimizer(optimizer, loss_scale="dynamic")
+
+    train_loss = tf.keras.metrics.Mean(name="loss")
+    train_cross_entropy = tf.keras.metrics.Mean(name="cross_entropy")
+    train_center_loss = tf.keras.metrics.Mean(name="center_loss")
+
+    test_acc = tf.keras.metrics.Mean(name="accuracy")
+
+    model.compile(
+        optimizer=optimizer,
+        cross_entropy=cross_entropy,
+        center_loss=center_loss,
+        loss_weights=LOSS_WEIGHTS,
+        train_loss=train_loss,
+        train_cross_entropy=train_cross_entropy,
+        train_center_loss=train_center_loss,
+        test_acc=test_acc,
+        global_batch_size=global_batch_size,
+    )
+    return model
+
+
+@click.command()
+@click.argument("tf-record-paths")
+@click.argument("checkpoint-path")
+@click.option(
+    "-n",
+    "--n-classes",
+    default=87662,
+    help="Number of classes in the classification problem. Default to `87662`, which is the number of identities in our pruned MSCeleb",
+)
+@click.option(
+    "-b",
+    "--batch-size",
+    default=90 * 2,
+    help="Batch size. Be aware that we are using single precision. Batch size should be high.",
+)
+@click.option(
+    "-e", "--epochs", default=35, help="Number of epochs",
+)
+def train_and_evaluate(tf_record_paths, checkpoint_path, n_classes, batch_size, epochs):
+    # number of training steps to do before validating a model. This also defines an epoch
+    # for keras which is not really true. We want to evaluate every 180000 (90 * 2000)
+    # samples
+    STEPS_PER_EPOCH = 180000 // batch_size
+    learning_rate = 0.1
+    KERAS_EPOCH_MULTIPLIER = 6
+    train_ds = prepare_dataset(
+        tf_record_paths,
+        batch_size,
+        epochs,
+        data_shape=DATA_SHAPE,
+        output_shape=OUTPUT_SHAPE,
+        shuffle=True,
+        augment=True,
+    )
+
+    if VALIDATION_TF_RECORD_PATHS is None:
+        raise ValueError(
+            "No validation set was set. Please, do `bob config set bob.bio.face.cnn.lfw_tfrecord_path [PATH]`"
+        )
+
+    val_ds = prepare_dataset(
+        VALIDATION_TF_RECORD_PATHS,
+        data_shape=DATA_SHAPE,
+        output_shape=OUTPUT_SHAPE,
+        epochs=epochs,
+        batch_size=VALIDATION_BATCH_SIZE,
+        shuffle=False,
+        augment=False,
+    )
+    val_metric_name = "val_accuracy"
+
+    model = build_and_compile_model(
+        n_classes, learning_rate, global_batch_size=batch_size
+    )
+
+    def scheduler(epoch, lr):
+        # 20 epochs at 0.1, 10 at 0.01 and 5 0.001
+        # The epoch number here is Keras's which is different from actual epoch number
+        epoch = epoch // KERAS_EPOCH_MULTIPLIER
+        if epoch in range(20):
+            return 0.1
+        elif epoch in range(20, 30):
+            return 0.01
+        else:
+            return 0.001
+
+    callbacks = {
+        "latest": tf.keras.callbacks.ModelCheckpoint(
+            f"{checkpoint_path}/latest", verbose=1
+        ),
+        "best": tf.keras.callbacks.ModelCheckpoint(
+            f"{checkpoint_path}/best",
+            monitor=val_metric_name,
+            save_best_only=True,
+            mode="max",
+            verbose=1,
+        ),
+        "tensorboard": tf.keras.callbacks.TensorBoard(
+            log_dir=f"{checkpoint_path}/logs", update_freq=15, profile_batch="10,50"
+        ),
+        "lr": tf.keras.callbacks.LearningRateScheduler(scheduler, verbose=1),
+        "nan": tf.keras.callbacks.TerminateOnNaN(),
+    }
+    callbacks = add_backup_callback(callbacks, backup_dir=f"{checkpoint_path}/backup")
+    model.fit(
+        train_ds,
+        validation_data=val_ds,
+        epochs=epochs * KERAS_EPOCH_MULTIPLIER,
+        steps_per_epoch=STEPS_PER_EPOCH,
+        validation_steps=VALIDATION_SAMPLES // VALIDATION_BATCH_SIZE,
+        callbacks=callbacks,
+        verbose=2,
+    )
+
+
+if __name__ == "__main__":
+    train_and_evaluate()
diff --git a/doc/faq/.ipynb_checkpoints/facecrop-checkpoint.ipynb b/doc/faq/.ipynb_checkpoints/facecrop-checkpoint.ipynb
new file mode 100644
index 00000000..8ab56222
--- /dev/null
+++ b/doc/faq/.ipynb_checkpoints/facecrop-checkpoint.ipynb
@@ -0,0 +1,70 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## How to crop a face"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "CROPPED_IMAGE_HEIGHT = 128\n",
+    "CROPPED_IMAGE_WIDTH = 128\n",
+    "\n",
+    "EYE_CENTER_POS = (40, 64)\n",
+    "MOUTH_CENTER_POS = (88, 64)\n",
+    "\n",
+    "\n",
+    "mouth_center=(int((annotations['mouthleft'][0]+annotations['mouthright'][0])/2.0), int((annotations['mouthleft'][1]+annotations['mouthright'][1])/2.0))\n",
+    "\n",
+    "eye_center=(int((annotations['leye'][0]+annotations['reye'][0])/2.0), int((annotations['leye'][1]+annotations['reye'][1])/2.0))\n",
+    "\n",
+    "annotations['eye_center'] =eye_center\n",
+    "\n",
+    "annotations['mouth_center']=mouth_center\n",
+    "\n",
+    "light_cnn_face_cropper=bob.bio.face.preprocessor.FaceCrop(\n",
+    "    cropped_image_size=(CROPPED_IMAGE_HEIGHT, CROPPED_IMAGE_WIDTH),\n",
+    "    cropped_positions={'eye_center': EYE_CENTER_POS, 'mouth_center': MOUTH_CENTER_POS})\n",
+    "\n",
+    "\n",
+    "normalized_image = light_cnn_face_cropper.crop_face( image, annotations=annotations)"
+   ]
+  }
+ ],
+ "metadata": {
+  "jupytext": {
+   "formats": "ipynb,py:light",
+   "text_representation": {
+    "extension": ".py",
+    "format_name": "light",
+    "format_version": "1.5",
+    "jupytext_version": "1.7.1"
+   }
+  },
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/doc/faq/.ipynb_checkpoints/facecrop-checkpoint.py b/doc/faq/.ipynb_checkpoints/facecrop-checkpoint.py
new file mode 100644
index 00000000..ac6b78f4
--- /dev/null
+++ b/doc/faq/.ipynb_checkpoints/facecrop-checkpoint.py
@@ -0,0 +1,54 @@
+# ---
+# jupyter:
+#   jupytext:
+#     formats: ipynb,py:light
+#     text_representation:
+#       extension: .py
+#       format_name: light
+#       format_version: '1.5'
+#       jupytext_version: 1.11.1
+#   kernelspec:
+#     display_name: Python 3
+#     language: python
+#     name: python3
+# ---
+
+# ## How to crop a face
+
+# +
+import bob.bio.face
+import bob.io.image
+
+# Loading Ada's images
+image = bob.io.image.load("./img/838_ada.jpg")
+
+# Setting Ada's eyes
+annotations = dict()
+annotations['reye'] = (265, 203)
+annotations['leye'] = (278, 294)
+
+# Final cropped size
+cropped_image_size = (224, 224)
+
+# Defining where we want the eyes to be located after the crop
+cropped_positions = {"leye": (65, 150), "reye": (65, 77)}
+
+
+face_cropper=bob.bio.face.preprocessor.FaceCrop(
+    cropped_image_size=cropped_image_size,
+    cropped_positions=cropped_positions,
+    color_channel="rgb")
+
+# Crops always a batch of images
+cropped_image = face_cropper.transform([image], annotations=[annotations])
+
+
+# +
+import matplotlib.pyplot as plt
+
+figure = plt.figure()
+plt.subplot(121)
+bob.io.image.imshow(image)
+plt.subplot(122)
+bob.io.image.imshow(cropped_image[0].astype("uint8"))
+figure.show()
diff --git a/doc/faq/facecrop.ipynb b/doc/faq/facecrop.ipynb
new file mode 100644
index 00000000..03044580
--- /dev/null
+++ b/doc/faq/facecrop.ipynb
@@ -0,0 +1,109 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## How to crop a face"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {
+    "lines_to_next_cell": 2
+   },
+   "outputs": [],
+   "source": [
+    "import bob.bio.face\n",
+    "import bob.io.image\n",
+    "\n",
+    "# Loading Ada's images\n",
+    "image = bob.io.image.load(\"./img/838_ada.jpg\")\n",
+    "\n",
+    "# Setting Ada's eyes\n",
+    "annotations = dict()\n",
+    "annotations['reye'] = (265, 203)\n",
+    "annotations['leye'] = (278, 294)\n",
+    "\n",
+    "# Final cropped size\n",
+    "cropped_image_size = (224, 224)\n",
+    "\n",
+    "# Defining where we want the eyes to be located after the crop\n",
+    "cropped_positions = {\"leye\": (65, 150), \"reye\": (65, 77)}\n",
+    "\n",
+    "\n",
+    "face_cropper=bob.bio.face.preprocessor.FaceCrop(\n",
+    "    cropped_image_size=cropped_image_size,\n",
+    "    cropped_positions=cropped_positions,\n",
+    "    color_channel=\"rgb\")\n",
+    "\n",
+    "# Crops always a batch of images\n",
+    "cropped_image = face_cropper.transform([image], annotations=[annotations])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "62f5e3a70d3247e4beeed7318775d33a",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous â€¦"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "%matplotlib widget\n",
+    "import matplotlib.pyplot as plt\n",
+    "\n",
+    "figure = plt.figure()\n",
+    "plt.subplot(121)\n",
+    "bob.io.image.imshow(image)\n",
+    "plt.subplot(122)\n",
+    "bob.io.image.imshow(cropped_image[0].astype(\"uint8\"))\n",
+    "figure.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "jupytext": {
+   "formats": "ipynb,py:light"
+  },
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
-- 
GitLab