From 8f81468ccb0db59f1b277efb041e51ad043647f4 Mon Sep 17 00:00:00 2001 From: Tiago Freitas Pereira <tiagofrepereira@gmail.com> Date: Tue, 27 Apr 2021 08:44:46 +0200 Subject: [PATCH] Ported IJBC database --- cnn_training/msceleb_to_tfrecord.py | 291 +++++++++++++ cnn_training/vgg2_2_tfrecords.py | 189 ++++++++ cnn_training/vgg2_2_tfrecords_loose.py | 209 +++++++++ cnn_training/webface360_to_tfrecord.py | 190 ++++++++ .../._centerloss_mixed_precision.py | Bin 0 -> 4096 bytes cnn_training_cpy/arcface.py | 409 ++++++++++++++++++ cnn_training_cpy/centerloss.py | 251 +++++++++++ .../centerloss_mixed_precision.py | 279 ++++++++++++ .../facecrop-checkpoint.ipynb | 70 +++ .../.ipynb_checkpoints/facecrop-checkpoint.py | 54 +++ doc/faq/facecrop.ipynb | 109 +++++ 11 files changed, 2051 insertions(+) create mode 100644 cnn_training/msceleb_to_tfrecord.py create mode 100644 cnn_training/vgg2_2_tfrecords.py create mode 100644 cnn_training/vgg2_2_tfrecords_loose.py create mode 100644 cnn_training/webface360_to_tfrecord.py create mode 100644 cnn_training_cpy/._centerloss_mixed_precision.py create mode 100644 cnn_training_cpy/arcface.py create mode 100644 cnn_training_cpy/centerloss.py create mode 100644 cnn_training_cpy/centerloss_mixed_precision.py create mode 100644 doc/faq/.ipynb_checkpoints/facecrop-checkpoint.ipynb create mode 100644 doc/faq/.ipynb_checkpoints/facecrop-checkpoint.py create mode 100644 doc/faq/facecrop.ipynb diff --git a/cnn_training/msceleb_to_tfrecord.py b/cnn_training/msceleb_to_tfrecord.py new file mode 100644 index 00000000..7138d7cc --- /dev/null +++ b/cnn_training/msceleb_to_tfrecord.py @@ -0,0 +1,291 @@ +#!/usr/bin/env python +# coding: utf-8 + +""" +Converts the MSCeleb annotated version to TF RECORD + +Usage: + msceleb_to_tfrecord.py <msceleb-path> <output-path> [--keys=<arg> --image-size=<arg> --use-eyes] + msceleb_to_tfrecord.py -h | --help + +Options: + -h --help Show this screen. + --keys=<arg> Pickle with the keys + --image-size=<arg> Final image size [default: 126] + --use-eyes Use eyes annotations. If not set, it will use the face crop only + +""" + + +from docopt import docopt +import numpy as np +import os +import bob.io.image +import bob.io.base +import tensorflow as tf +import sys +from datetime import datetime +import pickle +import numpy +from bob.bio.face.preprocessor import FaceCrop + + +def _bytes_feature(value): + return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value])) + + +def _int64_feature(value): + return tf.train.Feature(int64_list=tf.train.Int64List(value=[value])) + + +def detect_mtcnn_margin_face_crop(annotations, image, margin=44, final_size=126): + """ + Face crop using bounding box + """ + + annotations["topleft"] = [annotations["topleft"][0], annotations["topleft"][1]] + annotations["bottomright"] = [ + annotations["bottomright"][0], + annotations["bottomright"][1], + ] + + annotations["topleft"][0] = numpy.uint( + numpy.maximum(annotations["topleft"][0] - margin / 2, 0) + ) + annotations["topleft"][1] = numpy.uint( + numpy.maximum(annotations["topleft"][1] - margin / 2, 0) + ) + + annotations["bottomright"][0] = numpy.uint( + numpy.minimum(annotations["bottomright"][0] + margin / 2, image.shape[1]) + ) + annotations["bottomright"][1] = numpy.uint( + numpy.minimum(annotations["bottomright"][1] + margin / 2, image.shape[2]) + ) + + cropped_positions = {"topleft": (0, 0), "bottomright": (final_size, final_size)} + cropper = FaceCrop( + cropped_image_size=(final_size, final_size), + cropped_positions=cropped_positions, + color_channel="rgb", + fixed_positions=None, + annotator=None, + ) + + detected = cropper.transform([image], [annotations])[0] + + return detected.astype("uint8") + + +def detect_mtcnn_margin_eyes(annotations, image, margin=44, final_size=126): + + # final image position w.r.t the image size + RIGHT_EYE_POS = (final_size / 3.44, final_size / 3.02) + LEFT_EYE_POS = (final_size / 3.44, final_size / 1.49) + + # RIGHT_EYE_POS = (final_size / 3.34, + # final_size / 3.02) + # LEFT_EYE_POS = (final_size / 3.44, + # final_size / 1.59) + + cropped_positions = {"leye": LEFT_EYE_POS, "reye": RIGHT_EYE_POS} + + cropper = FaceCrop( + cropped_image_size=(final_size, final_size), + cropped_positions=cropped_positions, + color_channel="rgb", + fixed_positions=None, + annotator=None, + ) + + detected = cropper.transform([image], [annotations])[0] + + return detected.astype("uint8") + + +def generate_tfrecord( + chunk_path, + output_tf_record_path, + detector, + keys, + final_size=126, + margin=44, + use_eyes=False, +): + def write_single_line_tfrecord(writer, image, offset, user_id): + + # Serializing + serialized_img = image.tobytes() + + # Writing + feature = { + "data": _bytes_feature(serialized_img), + "label": _int64_feature(offset), + "key": _bytes_feature(str.encode(user_id)), + } + + example = tf.train.Example(features=tf.train.Features(feature=feature)) + writer.write(example.SerializeToString()) + + os.makedirs(os.path.dirname(output_tf_record_path), exist_ok=True) + + with tf.io.TFRecordWriter(output_tf_record_path) as tf_writer: + + for identity in os.listdir(chunk_path): + # Discarting the one we've discarted already + if identity not in keys: + continue + + identity_path = os.path.join(chunk_path, identity) + if not os.path.isdir(identity_path): + continue + + sys.stdout.write(f"Processing {identity} \n") + sys.stdout.flush() + + for image_path in os.listdir(identity_path): + image_path = os.path.join(identity_path, image_path) + if os.path.splitext(image_path)[-1] != ".png": + continue + image = bob.io.image.load(image_path) + annotations = detector(image) + + if len(annotations) == 0: + continue + else: + # Getting the first annotation + annotations = annotations[0] + + if use_eyes: + detected_image = detect_mtcnn_margin_eyes( + annotations, image, margin=margin, final_size=final_size + ) + else: + + detected_image = detect_mtcnn_margin_face_crop( + annotations, image, margin=margin, final_size=final_size + ) + # Converting H x W x C + detected_image = bob.io.image.to_matplotlib(detected_image) + + write_single_line_tfrecord( + tf_writer, detected_image, keys[identity], identity + ) + + +def get_keys(base_path, all_chunks): + """ + Read the file structure from `annotations.csv` to get the samples properly annotated + """ + + def decide(annotations_path): + """ + Decide if we should consider an identity or not. + The annotation has the following format. + + ``` + 0,3.png,4.png,1 + 1,4.png,40.png,1 + 2,40.png,46.png,1 + 3,46.png,47.png,1 + 4,47.png,55.png,1 + 5,55.png,56.png,1 + 6,56.png,71.png,1 + 7,71.png,79.png,1 + 8,79.png,99.png,1 + 9,99.png,100.png,1 + 10,100.png,3.png,1 + ``` + + The last collumn can assume the values: + - `0`: The pair of images are NOT from the same identity + - `1`: The pair of images ARE from the same identity + - `2`: The annotator doesn't know what to say + + + Here I'm considering the identity if you have more than 75% `1` and `2`. + + """ + + with open(annotations_path) as f: + lines = 0 + lines_with_zero = 0 + for l in f.readlines(): + lines += 1 + if l.split(",")[-1] == "\n": + lines_with_zero += 1 + continue + + decision = int(l.split(",")[-1]) + if decision == 0: + lines_with_zero += 1 + + # Discarting identities with more than 50% of the pairs not + # considered from the same identity + # This is the first cut + return True if lines_with_zero / lines < 0.50 else False + + offset = 0 + keys = dict() + for chunk in all_chunks: + path = os.path.join(base_path, chunk) + for identity in os.listdir(path): + if not os.path.isdir(os.path.join(path, identity)): + continue + + statistics = os.path.join(path, identity, "annotations.csv") + + if decide(statistics): + keys[identity] = offset + offset += 1 + else: + print(f"Rejected {identity}") + return keys + + +if __name__ == "__main__": + args = docopt(__doc__) + + MSCELEB_PATH = args["<msceleb-path>"] + output_path = args["<output-path>"] + image_size = int(args["--image-size"]) + use_eyes = args["--use-eyes"] + margin = 0 + + if "SGE_TASK_LAST" in os.environ: + TOTAL_CHUNKS = int(os.environ["SGE_TASK_LAST"]) + CURRENT_CHUNK = int(os.environ["SGE_TASK_ID"]) - 1 + else: + TOTAL_CHUNKS = 1 + CURRENT_CHUNK = 0 + + all_chunks = [f"chunk_{i}" for i in range(43)] + + if args["--keys"] is None: + keys = get_keys(MSCELEB_PATH, all_chunks) + with open("keys.pickle", "wb") as f: + f.write(pickle.dumps(keys)) + else: + keys = pickle.loads(open(args["--keys"], "rb").read()) + + chunk_id = all_chunks[CURRENT_CHUNK] + + from bob.ip.facedetect.mtcnn import MTCNN + + detector = MTCNN() + + output_tf_record_path = os.path.join(output_path, chunk_id + ".tfrecords") + + generate_tfrecord( + os.path.join(MSCELEB_PATH, chunk_id), + output_tf_record_path, + detector, + keys, + final_size=image_size, + margin=margin, + use_eyes=use_eyes, + ) + + sys.stdout.write("Done \n") + sys.stdout.flush() + diff --git a/cnn_training/vgg2_2_tfrecords.py b/cnn_training/vgg2_2_tfrecords.py new file mode 100644 index 00000000..8beeb95a --- /dev/null +++ b/cnn_training/vgg2_2_tfrecords.py @@ -0,0 +1,189 @@ +#!/usr/bin/env python +# coding: utf-8 + +""" +Trains some face recognition baselines using ARC based models + +Usage: + vgg2_2_tfrecords.py <vgg-path> <output-path> + vgg2_2_tfrecords.py -h | --help + +Options: + -h --help Show this screen. + +""" + + +from docopt import docopt +import numpy as np +import os +import bob.io.image +import bob.io.base +import tensorflow as tf +import sys +from datetime import datetime + + +def _bytes_feature(value): + return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value])) + + +def _int64_feature(value): + return tf.train.Feature(int64_list=tf.train.Int64List(value=[value])) + + +def search_landmark(landmark_path, img_path): + with open(landmark_path) as f: + next(f) + for line in f: + line = line.split(",") + if img_path in line[0]: + return np.array( + [[float(line[i + 1]), float(line[i + 2])] for i in [0, 2, 4, 6, 8]] + ) + else: + return None + + +from bob.bio.face.preprocessor import FaceCrop + + +def align(image, annotations, cropped_image_size=(126, 126)): + + cropped_image_height, cropped_image_width = cropped_image_size + + # RIGHT_EYE_POS = (40, 46) + # LEFT_EYE_POS = (40, 80) + # cropped_positions = {"leye": LEFT_EYE_POS, "reye": RIGHT_EYE_POS} + # cropped_positions = {"leye": (49, 72), "reye": (49, 38)} + cropped_positions = {"leye": (55, 81), "reye": (55, 42)} + + cropper = FaceCrop( + cropped_image_size=cropped_image_size, + cropped_positions=cropped_positions, + color_channel="rgb", + fixed_positions=None, + annotator=None, + ) + return bob.io.image.to_matplotlib( + cropper.transform([image], [annotations])[0].astype("uint8") + ) + + +def get_id_by_line(line): + return line.split("/")[0] + + +def generate_tfrecord( + base_path, landmark_path, file_list, output_tf_record_path, indexes +): + def write_single_line_tfrecord(writer, image, offset, user_id): + + # Serializing + serialized_img = image.tobytes() + + # Writing + feature = { + "data": _bytes_feature(serialized_img), + "label": _int64_feature(offset), + "key": _bytes_feature(str.encode(user_id)), + } + + example = tf.train.Example(features=tf.train.Features(feature=feature)) + writer.write(example.SerializeToString()) + + with tf.io.TFRecordWriter(output_tf_record_path) as tf_writer: + + current_id = None + with open(file_list) as f: + for file_name in f.readlines(): + + user_id = get_id_by_line(file_name) + if user_id in indexes: + + img = bob.io.base.load( + os.path.join(base_path, file_name).rstrip("\n") + ) + l_name = file_name.rstrip(".jpg\n") + + if current_id != user_id: + current_id = user_id + sys.stdout.write( + f"Writing user {current_id}. {str(datetime.now())} \n" + ) + sys.stdout.flush() + + landmarks = search_landmark(landmark_path, l_name) + if landmarks[0][0] > landmarks[1][0]: + annotations = { + "reye": (landmarks[1][1], landmarks[1][0]), + "leye": (landmarks[0][1], landmarks[0][0]), + } + else: + annotations = { + "reye": (landmarks[0][1], landmarks[0][0]), + "leye": (landmarks[1][1], landmarks[1][0]), + } + if landmarks is None: + raise ValueError(f"Landmark for {file_name} not found!") + + aligned_image = align(img, annotations) + write_single_line_tfrecord( + tf_writer, aligned_image, int(indexes[user_id]), user_id + ) + + +def map_indexes(image_path, n_chunks): + """ + Create a dictionary mapping the ID to VGG2-ID, like: + + {0: 'n000001'], + 1: 'n000002']} + + """ + + indexes = sorted(list(set([l.split("/")[0] for l in open(image_path).readlines()]))) + + identities_map = {indexes[i]: i for i in range(len(indexes))} + + # SPLIT THE DICTIONARY IN TOTAL_CHUNKS + indexes_as_list = list(identities_map.items()) + dict_as_list = np.array_split(indexes_as_list, n_chunks) + dicts = [dict(d) for d in dict_as_list] + + return dicts + + +if __name__ == "__main__": + args = docopt(__doc__) + + VGG2_PATH = args["<vgg-path>"] + LANDMARK_PATH = os.path.join(VGG2_PATH, "bb_landmark", "loose_landmark_train.csv") + + if "SGE_TASK_LAST" in os.environ: + TOTAL_CHUNKS = int(os.environ["SGE_TASK_LAST"]) + CURRENT_CHUNK = int(os.environ["SGE_TASK_ID"]) - 1 + else: + TOTAL_CHUNKS = 1 + CURRENT_CHUNK = 0 + + # TOTAL_CHUNKS = 140 + # CURRENT_CHUNK = 0 + + TRAINING_LIST = os.path.join(VGG2_PATH, "train_list.txt") + # TEST_LIST = os.path.join(VGG2_PATH, "test_list.txt") + + # MAP ALL INDEXES + + indexes = map_indexes(TRAINING_LIST, TOTAL_CHUNKS) + + generate_tfrecord( + os.path.join(VGG2_PATH, "train"), + LANDMARK_PATH, + TRAINING_LIST, + os.path.join( + args["<output-path>"], f"train_vgg2_chunk{CURRENT_CHUNK}.tfrecords" + ), + indexes[CURRENT_CHUNK], + ) + diff --git a/cnn_training/vgg2_2_tfrecords_loose.py b/cnn_training/vgg2_2_tfrecords_loose.py new file mode 100644 index 00000000..1fb8d6c8 --- /dev/null +++ b/cnn_training/vgg2_2_tfrecords_loose.py @@ -0,0 +1,209 @@ +#!/usr/bin/env python +# coding: utf-8 + +""" +Crop VGG2 with loose crop based on bounding box + +Usage: + vgg2_2_tfrecords.py <vgg-path> <output-path> [--factor=<kn>] + vgg2_2_tfrecords.py -h | --help + +Options: + -h --help Show this screen. + --factor=<kn> Crop Factor [default: 0.3] + +""" + + +from docopt import docopt +import numpy as np +import os +import bob.io.image +import bob.io.base +import tensorflow as tf +import sys +from datetime import datetime + + +def _bytes_feature(value): + return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value])) + + +def _int64_feature(value): + return tf.train.Feature(int64_list=tf.train.Int64List(value=[value])) + + +def search_landmark(landmark_path, img_path): + with open(landmark_path) as f: + next(f) + for line in f: + line = line.split(",") + if img_path in line[0]: + landmarks = np.array([float(line[i]) for i in [1, 2, 3, 4]]) + return { + "topleft": (landmarks[1], landmarks[0]), + "dimensions": (landmarks[3], landmarks[2]), + "bottomright": ( + landmarks[1] + landmarks[3], + landmarks[0] + landmarks[2], + ), + } + + else: + return None + + +def extend_annotations(annotations, img_bottom_right, factor=0.3): + width = annotations["dimensions"][1] + height = annotations["dimensions"][0] + + new_annotations = {"topleft": [0, 0], "bottomright": [0, 0]} + + new_annotations["topleft"][0] = max(0, annotations["topleft"][0] - height * factor) + new_annotations["topleft"][1] = max(0, annotations["topleft"][1] - width * factor) + + new_annotations["bottomright"][0] = min( + img_bottom_right[1], annotations["bottomright"][0] + height * factor + ) + new_annotations["bottomright"][1] = min( + img_bottom_right[0], annotations["bottomright"][1] + width * factor + ) + + return new_annotations + + +from bob.bio.face.preprocessor import FaceCrop + + +def align(image, annotations, cropped_image_size=(126, 126), factor=0.3): + + cropped_image_height, cropped_image_width = cropped_image_size + + img_bottom_right = (image.shape[1], image.shape[2]) + new_annotations = extend_annotations(annotations, img_bottom_right, factor=factor) + + cropped_positions = {"topleft": (0, 0), "bottomright": cropped_image_size} + cropper = FaceCrop( + cropped_image_size=cropped_image_size, + cropped_positions=cropped_positions, + color_channel="rgb", + fixed_positions=None, + annotator=None, + ) + return bob.io.image.to_matplotlib( + cropper.transform([image], [new_annotations])[0] + ).astype("uint8") + + +def get_id_by_line(line): + return line.split("/")[0] + + +def generate_tfrecord( + base_path, landmark_path, file_list, output_tf_record_path, indexes, factor=0.3 +): + def write_single_line_tfrecord(writer, image, offset, user_id): + + # Serializing + serialized_img = image.tobytes() + + # Writing + feature = { + "data": _bytes_feature(serialized_img), + "label": _int64_feature(offset), + "key": _bytes_feature(str.encode(user_id)), + } + + example = tf.train.Example(features=tf.train.Features(feature=feature)) + writer.write(example.SerializeToString()) + + with tf.io.TFRecordWriter(output_tf_record_path) as tf_writer: + + current_id = None + with open(file_list) as f: + for file_name in f.readlines(): + + user_id = get_id_by_line(file_name) + if user_id in indexes: + + img = bob.io.base.load( + os.path.join(base_path, file_name).rstrip("\n") + ) + l_name = file_name.rstrip(".jpg\n") + + if current_id != user_id: + current_id = user_id + sys.stdout.write( + f"Writing user {current_id}. {str(datetime.now())} \n" + ) + sys.stdout.flush() + + landmarks = search_landmark(landmark_path, l_name) + if landmarks is None: + raise ValueError(f"Landmark for {file_name} not found!") + + # aligned_image = align(img, annotations) + aligned_image = align( + img, landmarks, factor=factor, cropped_image_size=(126, 126) + ) + bob.io.base.save(bob.io.image.to_bob(aligned_image), "xuucu.png") + write_single_line_tfrecord( + tf_writer, aligned_image, int(indexes[user_id]), user_id + ) + + +def map_indexes(image_path, n_chunks): + """ + Create a dictionary mapping the ID to VGG2-ID, like: + + {0: 'n000001'], + 1: 'n000002']} + + """ + + indexes = sorted(list(set([l.split("/")[0] for l in open(image_path).readlines()]))) + + identities_map = {indexes[i]: i for i in range(len(indexes))} + + # SPLIT THE DICTIONARY IN TOTAL_CHUNKS + indexes_as_list = list(identities_map.items()) + dict_as_list = np.array_split(indexes_as_list, n_chunks) + dicts = [dict(d) for d in dict_as_list] + + return dicts + + +if __name__ == "__main__": + args = docopt(__doc__) + + VGG2_PATH = args["<vgg-path>"] + LANDMARK_PATH = os.path.join(VGG2_PATH, "bb_landmark", "loose_bb_train.csv") + + if "SGE_TASK_LAST" in os.environ: + TOTAL_CHUNKS = int(os.environ["SGE_TASK_LAST"]) + CURRENT_CHUNK = int(os.environ["SGE_TASK_ID"]) - 1 + else: + TOTAL_CHUNKS = 1 + CURRENT_CHUNK = 0 + + # TOTAL_CHUNKS = 140 + # CURRENT_CHUNK = 0 + + TRAINING_LIST = os.path.join(VGG2_PATH, "train_list.txt") + # TEST_LIST = os.path.join(VGG2_PATH, "test_list.txt") + + # MAP ALL INDEXES + + indexes = map_indexes(TRAINING_LIST, TOTAL_CHUNKS) + + generate_tfrecord( + os.path.join(VGG2_PATH, "train"), + LANDMARK_PATH, + TRAINING_LIST, + os.path.join( + args["<output-path>"], f"train_vgg2_chunk{CURRENT_CHUNK}.tfrecords" + ), + indexes[CURRENT_CHUNK], + factor=float(args["--factor"]), + ) + diff --git a/cnn_training/webface360_to_tfrecord.py b/cnn_training/webface360_to_tfrecord.py new file mode 100644 index 00000000..e0c40a1d --- /dev/null +++ b/cnn_training/webface360_to_tfrecord.py @@ -0,0 +1,190 @@ +#!/usr/bin/env python +# coding: utf-8 + +""" +Converts WEB360 to TF RECORD + +Usage: + webface360_to_tfrecords.py <web360-path> <output-path> + webface360_to_tfrecords.py -h | --help + +Options: + -h --help Show this screen. + +""" + + +from docopt import docopt +import numpy as np +import os +import bob.io.image +import bob.io.base +import tensorflow as tf +import sys +from datetime import datetime + + +def _bytes_feature(value): + return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value])) + + +def _int64_feature(value): + return tf.train.Feature(int64_list=tf.train.Int64List(value=[value])) + + +def get_web360dirs(): + """ + Here I'm hardcoding the paths so we get consistent tfrecords, + just in case the IT decides to reestructure the web360 directory + """ + + return [ + "0_0_000", + "0_0_001", + "0_0_002", + "0_1_003", + "0_1_004", + "0_1_005", + "0_2_006", + "0_2_007", + "0_2_008", + "0_3_009", + "0_3_010", + "0_3_011", + "0_4_012", + "0_4_013", + "0_4_014", + "0_5_015", + "0_5_016", + "0_5_017", + "0_6_018", + "0_6_019", + "0_6_020", + "1_0_000", + "1_0_001", + "1_0_002", + "1_1_003", + "1_1_004", + "1_1_005", + "1_2_006", + "1_2_007", + "1_2_008", + "1_3_009", + "1_3_010", + "1_3_011", + "1_4_012", + "1_4_013", + "1_4_014", + "1_5_015", + "1_5_016", + "1_5_017", + "1_6_018", + "1_6_019", + "1_6_020", + "2_0_000", + "2_0_001", + "2_0_002", + "2_1_003", + "2_1_004", + "2_1_005", + "2_2_006", + "2_2_007", + "2_2_008", + "2_3_009", + "2_3_010", + "2_3_011", + "2_4_012", + "2_4_013", + "2_4_014", + "2_5_015", + "2_5_016", + "2_5_017", + "2_6_018", + "2_6_019", + "2_6_020", + ] + + +def get_keys(base_path): + + root_dirs = get_web360dirs() + keys = dict() + offset = 0 + for r in root_dirs: + + identities_dir = os.path.join(base_path, r) + for i in os.listdir(identities_dir): + id_dir = os.path.join(identities_dir, i) + if os.path.isdir(id_dir): + keys[i] = offset + offset += 1 + return keys + + +def generate_tfrecord( + chunk_path, output_tf_record_path, keys, +): + def write_single_line_tfrecord(writer, image, offset, user_id): + + # Serializing + serialized_img = image.tobytes() + + # Writing + feature = { + "data": _bytes_feature(serialized_img), + "label": _int64_feature(offset), + "key": _bytes_feature(str.encode(user_id)), + } + + example = tf.train.Example(features=tf.train.Features(feature=feature)) + writer.write(example.SerializeToString()) + + os.makedirs(os.path.dirname(output_tf_record_path), exist_ok=True) + + with tf.io.TFRecordWriter(output_tf_record_path) as tf_writer: + + for identity in os.listdir(chunk_path): + # Discarting the one we've discarted already + if identity not in keys: + continue + + identity_path = os.path.join(chunk_path, identity) + if not os.path.isdir(identity_path): + continue + + sys.stdout.write(f"Processing {identity} \n") + sys.stdout.flush() + + for image_path in os.listdir(identity_path): + image_path = os.path.join(identity_path, image_path) + if os.path.splitext(image_path)[-1] != ".jpg": + continue + image = bob.io.image.to_matplotlib(bob.io.image.load(image_path)) + + write_single_line_tfrecord(tf_writer, image, keys[identity], identity) + + +if __name__ == "__main__": + args = docopt(__doc__) + + WEB360_PATH = args["<web360-path>"] + output_path = args["<output-path>"] + + if "SGE_TASK_LAST" in os.environ: + TOTAL_CHUNKS = int(os.environ["SGE_TASK_LAST"]) + CURRENT_CHUNK = int(os.environ["SGE_TASK_ID"]) - 1 + else: + TOTAL_CHUNKS = 1 + CURRENT_CHUNK = 0 + + # keys = get_keys(WEB360_PATH) + import pickle + + keys = pickle.loads(open("keys-web360.pickle", "rb").read()) + + root_dirs = get_web360dirs() + output_tf_record_path = os.path.join(output_path, f"chunk_{CURRENT_CHUNK}.tfrecord") + chunk_path = os.path.join(WEB360_PATH, root_dirs[CURRENT_CHUNK]) + + generate_tfrecord(chunk_path, output_tf_record_path, keys) + diff --git a/cnn_training_cpy/._centerloss_mixed_precision.py b/cnn_training_cpy/._centerloss_mixed_precision.py new file mode 100644 index 0000000000000000000000000000000000000000..881da051d61cfeb4e02036b9ea9c9df85b2fefdf GIT binary patch literal 4096 zcmZQz6=P>$Vqox1Ojhs@R)|o50+1L3ClDJkFz{^v(m+1nBL)UWIUt(=a103v0xDSr z(ZR3)$QD4;!U*I;#if$-bM+Dn3UX5QauSP6N{drdQW8s2l>>r7dM5YIiU-k_pJPF; z83m&uFd71*Aut*OqaiRF0;3@?8UmvsFd71*Aut*OqalDZ1VEil5C#Lekc`Y?g`(8r z{L-T2RE4zsqHKlCypq(slFa<P#GIT;g`Cv15{0Ck#Jp@^Ul^)uNK>f(hkHeaLGJ&5 E0JF3zQ~&?~ literal 0 HcmV?d00001 diff --git a/cnn_training_cpy/arcface.py b/cnn_training_cpy/arcface.py new file mode 100644 index 00000000..fb593509 --- /dev/null +++ b/cnn_training_cpy/arcface.py @@ -0,0 +1,409 @@ +#!/usr/bin/env python +# coding: utf-8 + +""" +Trains some face recognition baselines using ARC based models + +# ARCFACE PARAMETERS from eq.4 +# FROM https://github.com/deepinsight/insightface/blob/master/recognition/ArcFace/sample_config.py#L153 + M1 = 1.0 + M2 = 0.3 + M3 = 0.2 + + +# ARCFACE PARAMETERS from eq.3 +M = 0.5 # ArcFace Margin #CHECK SECTION 3.1 +SCALE = 64.0 # Scale +# ORIGINAL = False # Original implementation + + +The config file has the following format to train an ARCFACE model: + +```yml +# VGG2 params +batch-size: 90 +face-size: 182 +face-output-size: 160 +n-classes: 87662 + + +## Backbone +backbone: 'mobilenet-v2' +head: 'arcface' +s: 10 +bottleneck: 512 +m: 0.5 + +# Training parameters +#solver: "rmsprop" +solver: "sgd" +lr: 0.1 +dropout-rate: 0.5 +epochs: 310 +lerning-rate-schedule: 'cosine-decay-restarts' + + + +train-tf-record-path: "/path/*.tfrecord" +validation-tf-record-path: "/path/lfw_pairs.tfrecord" +``` + + + + +Usage: + arcface.py <config-yaml> <checkpoint_path> + arcface.py -h | --help + +Options: + -h --help Show this screen. + arcface.py arcface -h | help + +""" + +import os +from functools import partial + +import pkg_resources +import tensorflow as tf +from bob.learn.tensorflow.models.inception_resnet_v2 import InceptionResNetV2 +from bob.learn.tensorflow.metrics import predict_using_tensors +from tensorflow.keras import layers +from bob.learn.tensorflow.callbacks import add_backup_callback +from bob.learn.tensorflow.metrics.embedding_accuracy import accuracy_from_embeddings +from bob.extension import rc +from bob.bio.face.tensorflow.preprocessing import prepare_dataset +import yaml + +from bob.learn.tensorflow.layers import ( + add_bottleneck, + add_top, + SphereFaceLayer, + ModifiedSoftMaxLayer, +) + +from bob.learn.tensorflow.models import ( + EmbeddingValidation, + ArcFaceLayer, + ArcFaceModel, + ArcFaceLayer3Penalties, +) + + +############################## +# CNN Backbones +# Add your NN backbone here +############################## +BACKBONES = dict() +BACKBONES["inception-resnet-v2"] = InceptionResNetV2 +BACKBONES["efficientnet-B0"] = tf.keras.applications.EfficientNetB0 +BACKBONES["resnet50"] = tf.keras.applications.ResNet50 +BACKBONES["mobilenet-v2"] = tf.keras.applications.MobileNetV2 + +############################## +# SOLVER SPECIFICATIONS +############################## + +SOLVERS = dict() +# Parameters taken from https://github.com/davidsandberg/facenet/blob/master/src/facenet.py#L181 +# Fixing the start learning rate +learning_rate = 0.1 +SOLVERS["rmsprop"] = partial( + tf.keras.optimizers.RMSprop, + learning_rate=learning_rate, + rho=0.9, + momentum=0.9, + epsilon=1.0, +) +SOLVERS["adam"] = partial(tf.keras.optimizers.Adam, learning_rate=learning_rate) +SOLVERS["adagrad"] = partial(tf.keras.optimizers.Adagrad, learning_rate=learning_rate) +SOLVERS["sgd"] = partial( + tf.keras.optimizers.SGD, learning_rate=learning_rate, momentum=0.9, nesterov=True +) + + +################################ +# DATA SPECIFICATION +############################### +DATA_SHAPES = dict() + +# Inputs with 182x182 are cropped to 160x160 +DATA_SHAPES[182] = 160 +DATA_SHAPES[112] = 98 +DATA_SHAPES[126] = 112 + + +# SHAPES EXPECTED FROM THE DATASET USING THIS BACKBONE +# DATA_SHAPE = (182, 182, 3) # size of faces +DATA_TYPE = tf.uint8 +# OUTPUT_SHAPE = (160, 160) + +AUTOTUNE = tf.data.experimental.AUTOTUNE + +# HERE WE VALIDATE WITH LFW RUNNING A +# INFORMATION ABOUT THE VALIDATION SET +# VALIDATION_TF_RECORD_PATHS = rc["bob.bio.face.cnn.lfw_tfrecord_path"] + +# there are 2812 samples in the validation set +VALIDATION_SAMPLES = 2812 +VALIDATION_BATCH_SIZE = 38 + + +def create_model( + n_classes, model_spec, backbone, bottleneck, dropout_rate, input_shape +): + + if backbone == "inception-resnet-v2": + pre_model = BACKBONES[backbone]( + include_top=False, bottleneck=False, input_shape=input_shape, + ) + else: + pre_model = BACKBONES[backbone]( + include_top=False, input_shape=input_shape, weights=None, + ) + + # Adding the bottleneck + pre_model = add_bottleneck( + pre_model, bottleneck_size=bottleneck, dropout_rate=dropout_rate + ) + pre_model = add_top(pre_model, n_classes=n_classes) + + float32_layer = layers.Activation("linear", dtype="float32") + + embeddings = tf.nn.l2_normalize( + pre_model.get_layer("embeddings/BatchNorm").output, axis=1 + ) + + logits_premodel = float32_layer(pre_model.get_layer("logits").output) + + # Wrapping the embedding validation + pre_model = EmbeddingValidation( + pre_model.input, outputs=[logits_premodel, embeddings], name=pre_model.name + ) + + ################################ + ## Creating the specific models + if "arcface" in model_spec: + labels = tf.keras.layers.Input([], name="label") + logits_arcface = ArcFaceLayer( + n_classes, s=model_spec["arcface"]["s"], m=model_spec["arcface"]["m"] + )(embeddings, labels) + arc_model = ArcFaceModel( + inputs=(pre_model.input, labels), outputs=[logits_arcface, embeddings] + ) + elif "arcface-3p" in model_spec: + labels = tf.keras.layers.Input([], name="label") + logits_arcface = ArcFaceLayer3Penalties( + n_classes, + s=model_spec["arcface-3p"]["s"], + m1=model_spec["arcface-3p"]["m1"], + m2=model_spec["arcface-3p"]["m2"], + m3=model_spec["arcface-3p"]["m3"], + )(embeddings, labels) + arc_model = ArcFaceModel( + inputs=(pre_model.input, labels), outputs=[logits_arcface, embeddings] + ) + elif "sphereface" in model_spec: + logits_arcface = SphereFaceLayer(n_classes, m=model_spec["sphereface"]["m"],)( + embeddings + ) + arc_model = EmbeddingValidation( + pre_model.input, outputs=[logits_arcface, embeddings] + ) + + elif "modified-softmax" in model_spec: + logits_modified_softmax = ModifiedSoftMaxLayer(n_classes)(embeddings) + arc_model = EmbeddingValidation( + pre_model.input, outputs=[logits_modified_softmax, embeddings] + ) + + return pre_model, arc_model + + +def build_and_compile_models( + n_classes, optimizer, model_spec, backbone, bottleneck, dropout_rate, input_shape +): + pre_model, arc_model = create_model( + n_classes, model_spec, backbone, bottleneck, dropout_rate, input_shape + ) + + cross_entropy = tf.keras.losses.SparseCategoricalCrossentropy( + from_logits=True, name="cross_entropy" + ) + + pre_model.compile(optimizer=optimizer, loss=cross_entropy, metrics=["accuracy"]) + + arc_model.compile(optimizer=optimizer, loss=cross_entropy, metrics=["accuracy"]) + + return pre_model, arc_model + + +def train_and_evaluate( + tf_record_paths, + checkpoint_path, + n_classes, + batch_size, + epochs, + model_spec, + backbone, + optimizer, + bottleneck, + dropout_rate, + face_size, + validation_path, + lerning_rate_schedule, +): + + # number of training steps to do before validating a model. This also defines an epoch + # for keras which is not really true. We want to evaluate every 180000 (90 * 2000) + # samples + # STEPS_PER_EPOCH = 180000 // batch_size + # KERAS_EPOCH_MULTIPLIER = 6 + STEPS_PER_EPOCH = 2000 + + DATA_SHAPE = (face_size, face_size, 3) + OUTPUT_SHAPE = (DATA_SHAPES[face_size], DATA_SHAPES[face_size]) + + if validation_path is None: + validation_path = rc["bob.bio.face.cnn.lfw_tfrecord_path"] + if validation_path is None: + raise ValueError( + "No validation set was set. Please, do `bob config set bob.bio.face.cnn.lfw_tfrecord_path [PATH]`" + ) + + train_ds = prepare_dataset( + tf_record_paths, + batch_size, + epochs, + data_shape=DATA_SHAPE, + output_shape=OUTPUT_SHAPE, + shuffle=True, + augment=True, + ) + + val_ds = prepare_dataset( + validation_path, + data_shape=DATA_SHAPE, + output_shape=OUTPUT_SHAPE, + epochs=epochs, + batch_size=VALIDATION_BATCH_SIZE, + shuffle=False, + augment=False, + ) + val_metric_name = "val_accuracy" + + pre_model, arc_model = build_and_compile_models( + n_classes, + optimizer, + model_spec, + backbone, + bottleneck=bottleneck, + dropout_rate=dropout_rate, + input_shape=OUTPUT_SHAPE + (3,), + ) + + def scheduler(epoch, lr): + # 200 epochs at 0.1, 10 at 0.01 and 5 0.001 + # The epoch number here is Keras's which is different from actual epoch number + # epoch = epoch // KERAS_EPOCH_MULTIPLIER + + # Tracking in the tensorboard + tf.summary.scalar("learning rate", data=lr, step=epoch) + + if epoch in range(200): + return 1 * lr + else: + return lr * tf.math.exp(-0.01) + + if lerning_rate_schedule == "cosine-decay-restarts": + decay_steps = 50 + lr_decayed_fn = tf.keras.callbacks.LearningRateScheduler( + tf.keras.experimental.CosineDecayRestarts( + 0.1, decay_steps, t_mul=2.0, m_mul=0.8, alpha=0.1 + ), + verbose=1, + ) + + else: + lr_decayed_fn = tf.keras.callbacks.LearningRateScheduler(scheduler, verbose=1) + + callbacks = { + "latest": tf.keras.callbacks.ModelCheckpoint( + f"{checkpoint_path}/latest", verbose=1 + ), + "tensorboard": tf.keras.callbacks.TensorBoard( + log_dir=f"{checkpoint_path}/logs", update_freq=15, profile_batch=0 + ), + "lr": lr_decayed_fn, + "nan": tf.keras.callbacks.TerminateOnNaN(), + } + + callbacks = add_backup_callback(callbacks, backup_dir=f"{checkpoint_path}/backup") + # STEPS_PER_EPOCH + pre_model.fit( + train_ds, + epochs=2, + validation_data=val_ds, + steps_per_epoch=STEPS_PER_EPOCH, + validation_steps=VALIDATION_SAMPLES // VALIDATION_BATCH_SIZE, + callbacks=callbacks, + verbose=2, + ) + + # STEPS_PER_EPOCH + # epochs=epochs * KERAS_EPOCH_MULTIPLIER, + arc_model.fit( + train_ds, + validation_data=val_ds, + epochs=epochs, + steps_per_epoch=STEPS_PER_EPOCH, + validation_steps=VALIDATION_SAMPLES // VALIDATION_BATCH_SIZE, + callbacks=callbacks, + verbose=2, + ) + + +from docopt import docopt + +if __name__ == "__main__": + args = docopt(__doc__) + + config = yaml.full_load(open(args["<config-yaml>"])) + + model_spec = dict() + if config["head"] == "arcface": + model_spec["arcface"] = dict() + model_spec["arcface"]["m"] = float(config["m"]) + model_spec["arcface"]["s"] = int(config["s"]) + + if config["head"] == "arcface-3p": + model_spec["arcface-3p"] = dict() + model_spec["arcface-3p"]["m1"] = float(config["m1"]) + model_spec["arcface-3p"]["m2"] = float(config["m2"]) + model_spec["arcface-3p"]["m3"] = float(config["m3"]) + model_spec["arcface-3p"]["s"] = int(config["s"]) + + if config["head"] == "sphereface": + model_spec["sphereface"] = dict() + model_spec["sphereface"]["m"] = float(config["m"]) + + if config["head"] == "modified-softmax": + # There's no hyper parameter here + model_spec["modified-softmax"] = dict() + + train_and_evaluate( + config["train-tf-record-path"], + args["<checkpoint_path>"], + int(config["n-classes"]), + int(config["batch-size"]), + int(config["epochs"]), + model_spec, + config["backbone"], + optimizer=SOLVERS[config["solver"]](learning_rate=float(config["lr"])), + bottleneck=int(config["bottleneck"]), + dropout_rate=float(config["dropout-rate"]), + face_size=int(config["face-size"]), + validation_path=config["validation-tf-record-path"], + lerning_rate_schedule=config["lerning-rate-schedule"], + ) + diff --git a/cnn_training_cpy/centerloss.py b/cnn_training_cpy/centerloss.py new file mode 100644 index 00000000..bac26eb5 --- /dev/null +++ b/cnn_training_cpy/centerloss.py @@ -0,0 +1,251 @@ +#!/usr/bin/env python +# coding: utf-8 + +""" +Trains a face recognition CNN using the strategy from the paper + +"A Discriminative Feature Learning Approach +for Deep Face Recognition" https://ydwen.github.io/papers/WenECCV16.pdf + +The default backbone is the InceptionResnetv2 + +Do `./bin/python centerloss.py --help` for more information + +""" + +import os +from functools import partial +import click +import pkg_resources +import tensorflow as tf +from bob.learn.tensorflow.losses import CenterLoss, CenterLossLayer +from bob.learn.tensorflow.models.inception_resnet_v2 import InceptionResNetV2 +from bob.learn.tensorflow.metrics import predict_using_tensors +from tensorflow.keras import layers +from bob.learn.tensorflow.callbacks import add_backup_callback +from bob.learn.tensorflow.metrics.embedding_accuracy import accuracy_from_embeddings +from bob.extension import rc +from bob.bio.face.tensorflow.preprocessing import prepare_dataset + +# CNN Backbone +# Change your NN backbone here +BACKBONE = InceptionResNetV2 + +# SHAPES EXPECTED FROM THE DATASET USING THIS BACKBONE +DATA_SHAPE = (182, 182, 3) # size of faces +DATA_TYPE = tf.uint8 +OUTPUT_SHAPE = (160, 160) + +AUTOTUNE = tf.data.experimental.AUTOTUNE + +# HERE WE VALIDATE WITH LFW RUNNING A +# INFORMATION ABOUT THE VALIDATION SET +VALIDATION_TF_RECORD_PATHS = rc["bob.bio.face.cnn.lfw_tfrecord_path"] + +# there are 2812 samples in the validation set +VALIDATION_SAMPLES = 2812 +VALIDATION_BATCH_SIZE = 38 + +# WEIGHTS BEWTWEEN the two losses +LOSS_WEIGHTS = {"cross_entropy": 1.0, "center_loss": 0.01} + + +class CenterLossModel(tf.keras.Model): + def compile( + self, + cross_entropy, + center_loss, + loss_weights, + train_loss, + train_cross_entropy, + train_center_loss, + test_acc, + **kwargs, + ): + super().compile(**kwargs) + self.cross_entropy = cross_entropy + self.center_loss = center_loss + self.loss_weights = loss_weights + self.train_loss = train_loss + self.train_cross_entropy = train_cross_entropy + self.train_center_loss = train_center_loss + self.test_acc = test_acc + + def train_step(self, data): + images, labels = data + with tf.GradientTape() as tape: + logits, prelogits = self(images, training=True) + loss_cross = self.cross_entropy(labels, logits) + loss_center = self.center_loss(labels, prelogits) + loss = ( + loss_cross * self.loss_weights[self.cross_entropy.name] + + loss_center * self.loss_weights[self.center_loss.name] + ) + trainable_vars = self.trainable_variables + gradients = tape.gradient(loss, trainable_vars) + self.optimizer.apply_gradients(zip(gradients, trainable_vars)) + + self.train_loss(loss) + self.train_cross_entropy(loss_cross) + self.train_center_loss(loss_center) + return { + m.name: m.result() + for m in [self.train_loss, self.train_cross_entropy, self.train_center_loss] + } + + def test_step(self, data): + images, labels = data + logits, prelogits = self(images, training=False) + self.test_acc(accuracy_from_embeddings(labels, prelogits)) + return {m.name: m.result() for m in [self.test_acc]} + + +def create_model(n_classes): + + model = BACKBONE( + include_top=True, + classes=n_classes, + bottleneck=True, + input_shape=OUTPUT_SHAPE + (3,), + ) + + prelogits = model.get_layer("Bottleneck/BatchNorm").output + prelogits = CenterLossLayer( + n_classes=n_classes, n_features=prelogits.shape[-1], name="centers" + )(prelogits) + + logits = model.get_layer("logits").output + model = CenterLossModel( + inputs=model.input, outputs=[logits, prelogits], name=model.name + ) + return model + + +def build_and_compile_model(n_classes, learning_rate): + model = create_model(n_classes) + + cross_entropy = tf.keras.losses.SparseCategoricalCrossentropy( + from_logits=True, name="cross_entropy" + ) + center_loss = CenterLoss( + centers_layer=model.get_layer("centers"), alpha=0.9, name="center_loss", + ) + + optimizer = tf.keras.optimizers.RMSprop( + learning_rate=learning_rate, rho=0.9, momentum=0.9, epsilon=1.0 + ) + + train_loss = tf.keras.metrics.Mean(name="loss") + train_cross_entropy = tf.keras.metrics.Mean(name="cross_entropy") + train_center_loss = tf.keras.metrics.Mean(name="center_loss") + + test_acc = tf.keras.metrics.Mean(name="accuracy") + + model.compile( + optimizer=optimizer, + cross_entropy=cross_entropy, + center_loss=center_loss, + loss_weights=LOSS_WEIGHTS, + train_loss=train_loss, + train_cross_entropy=train_cross_entropy, + train_center_loss=train_center_loss, + test_acc=test_acc, + ) + return model + + +@click.command() +@click.argument("tf-record-paths") +@click.argument("checkpoint-path") +@click.option( + "-n", + "--n-classes", + default=87662, + help="Number of classes in the classification problem. Default to `87662`, which is the number of identities in our pruned MSCeleb", +) +@click.option( + "-b", + "--batch-size", + default=90, + help="Batch size. Be aware that we are using single precision. Batch size should be high.", +) +@click.option( + "-e", "--epochs", default=35, help="Number of epochs", +) +def train_and_evaluate(tf_record_paths, checkpoint_path, n_classes, batch_size, epochs): + # number of training steps to do before validating a model. This also defines an epoch + # for keras which is not really true. We want to evaluate every 180000 (90 * 2000) + # samples + STEPS_PER_EPOCH = 180000 // batch_size + learning_rate = 0.1 + KERAS_EPOCH_MULTIPLIER = 6 + train_ds = prepare_dataset( + tf_record_paths, + batch_size, + epochs, + data_shape=DATA_SHAPE, + output_shape=OUTPUT_SHAPE, + shuffle=True, + augment=True, + ) + + if VALIDATION_TF_RECORD_PATHS is None: + raise ValueError( + "No validation set was set. Please, do `bob config set bob.bio.face.cnn.lfw_tfrecord_path [PATH]`" + ) + + val_ds = prepare_dataset( + VALIDATION_TF_RECORD_PATHS, + data_shape=DATA_SHAPE, + output_shape=OUTPUT_SHAPE, + epochs=epochs, + batch_size=VALIDATION_BATCH_SIZE, + shuffle=False, + augment=False, + ) + val_metric_name = "val_accuracy" + + model = build_and_compile_model(n_classes, learning_rate) + + def scheduler(epoch, lr): + # 20 epochs at 0.1, 10 at 0.01 and 5 0.001 + # The epoch number here is Keras's which is different from actual epoch number + epoch = epoch // KERAS_EPOCH_MULTIPLIER + if epoch in range(20): + return 0.1 + elif epoch in range(20, 30): + return 0.01 + else: + return 0.001 + + callbacks = { + "latest": tf.keras.callbacks.ModelCheckpoint( + f"{checkpoint_path}/latest", verbose=1 + ), + "best": tf.keras.callbacks.ModelCheckpoint( + f"{checkpoint_path}/best", + monitor=val_metric_name, + save_best_only=True, + mode="max", + verbose=1, + ), + "tensorboard": tf.keras.callbacks.TensorBoard( + log_dir=f"{checkpoint_path}/logs", update_freq=15, profile_batch=0 + ), + "lr": tf.keras.callbacks.LearningRateScheduler(scheduler, verbose=1), + "nan": tf.keras.callbacks.TerminateOnNaN(), + } + callbacks = add_backup_callback(callbacks, backup_dir=f"{checkpoint_path}/backup") + model.fit( + train_ds, + validation_data=val_ds, + epochs=epochs * KERAS_EPOCH_MULTIPLIER, + steps_per_epoch=STEPS_PER_EPOCH, + validation_steps=VALIDATION_SAMPLES // VALIDATION_BATCH_SIZE, + callbacks=callbacks, + verbose=2, + ) + + +if __name__ == "__main__": + train_and_evaluate() diff --git a/cnn_training_cpy/centerloss_mixed_precision.py b/cnn_training_cpy/centerloss_mixed_precision.py new file mode 100644 index 00000000..e14069f5 --- /dev/null +++ b/cnn_training_cpy/centerloss_mixed_precision.py @@ -0,0 +1,279 @@ +w #!/usr/bin/env python +# coding: utf-8 + +""" +Trains a face recognition CNN using the strategy from the paper + +"A Discriminative Feature Learning Approach +for Deep Face Recognition" https://ydwen.github.io/papers/WenECCV16.pdf + +######### +# THIS ONE USES FLOAT16 TO COMPUTE THE GRADIENTS +# CHECKE HERE FOR MORE INFO: # https://www.tensorflow.org/api_docs/python/tf/keras/mixed_precision/experimental/Policy +######## + +The default backbone is the InceptionResnetv2 + +Do `./bin/python centerloss_mixed_precision.py --help` for more information + +""" + +import os +from functools import partial +import click +import pkg_resources +import tensorflow as tf +from bob.learn.tensorflow.losses import CenterLoss, CenterLossLayer +from bob.learn.tensorflow.models.inception_resnet_v2 import InceptionResNetV2 +from bob.learn.tensorflow.metrics import predict_using_tensors +from tensorflow.keras import layers +from tensorflow.keras.mixed_precision import experimental as mixed_precision +from bob.learn.tensorflow.callbacks import add_backup_callback +from bob.learn.tensorflow.metrics.embedding_accuracy import accuracy_from_embeddings +from bob.extension import rc +from bob.bio.face.tensorflow.preprocessing import prepare_dataset + +# Setting mixed precision policy +# https://www.tensorflow.org/api_docs/python/tf/keras/mixed_precision/experimental/Policy +policy = mixed_precision.Policy("mixed_float16") +mixed_precision.set_policy(policy) + +# CNN Backbone +# Change your NN backbone here +BACKBONE = InceptionResNetV2 + +# SHAPES EXPECTED FROM THE DATASET USING THIS BACKBONE +DATA_SHAPE = (182, 182, 3) # size of faces +DATA_TYPE = tf.uint8 +OUTPUT_SHAPE = (160, 160) + +AUTOTUNE = tf.data.experimental.AUTOTUNE + +# HERE WE VALIDATE WITH LFW RUNNING A +# INFORMATION ABOUT THE VALIDATION SET +VALIDATION_TF_RECORD_PATHS = rc["bob.bio.face.cnn.lfw_tfrecord_path"] + +# there are 2812 samples in the validation set +VALIDATION_SAMPLES = 2812 +VALIDATION_BATCH_SIZE = 38 + +# WEIGHTS BEWTWEEN the two losses +LOSS_WEIGHTS = {"cross_entropy": 1.0, "center_loss": 0.01} + + +class CenterLossModel(tf.keras.Model): + def compile( + self, + cross_entropy, + center_loss, + loss_weights, + train_loss, + train_cross_entropy, + train_center_loss, + test_acc, + global_batch_size, + **kwargs, + ): + super().compile(**kwargs) + self.cross_entropy = cross_entropy + self.center_loss = center_loss + self.loss_weights = loss_weights + self.train_loss = train_loss + self.train_cross_entropy = train_cross_entropy + self.train_center_loss = train_center_loss + self.test_acc = test_acc + self.global_batch_size = global_batch_size + + def train_step(self, data): + images, labels = data + with tf.GradientTape() as tape: + logits, prelogits = self(images, training=True) + loss_cross = self.cross_entropy(labels, logits) + loss_center = self.center_loss(labels, prelogits) + loss = ( + loss_cross * self.loss_weights[self.cross_entropy.name] + + loss_center * self.loss_weights[self.center_loss.name] + ) + unscaled_loss = tf.nn.compute_average_loss( + loss, global_batch_size=self.global_batch_size + ) + loss = self.optimizer.get_scaled_loss(unscaled_loss) + + trainable_vars = self.trainable_variables + gradients = tape.gradient(loss, trainable_vars) + gradients = self.optimizer.get_unscaled_gradients(gradients) + self.optimizer.apply_gradients(zip(gradients, trainable_vars)) + + self.train_loss(unscaled_loss) + self.train_cross_entropy(loss_cross) + self.train_center_loss(loss_center) + return { + m.name: m.result() + for m in [self.train_loss, self.train_cross_entropy, self.train_center_loss] + } + + def test_step(self, data): + images, labels = data + logits, prelogits = self(images, training=False) + self.test_acc(accuracy_from_embeddings(labels, prelogits)) + return {m.name: m.result() for m in [self.test_acc]} + + +def create_model(n_classes): + + model = BACKBONE( + include_top=True, + classes=n_classes, + bottleneck=True, + input_shape=OUTPUT_SHAPE + (3,), + kernel_regularizer=tf.keras.regularizers.L2(5e-5), + ) + float32_layer = layers.Activation("linear", dtype="float32") + + prelogits = model.get_layer("Bottleneck/BatchNorm").output + prelogits = CenterLossLayer( + n_classes=n_classes, n_features=prelogits.shape[-1], name="centers" + )(prelogits) + prelogits = float32_layer(prelogits) + logits = float32_layer(model.get_layer("logits").output) + model = CenterLossModel( + inputs=model.input, outputs=[logits, prelogits], name=model.name + ) + return model + + +def build_and_compile_model(n_classes, learning_rate, global_batch_size): + model = create_model(n_classes) + + cross_entropy = tf.keras.losses.SparseCategoricalCrossentropy( + from_logits=True, name="cross_entropy", reduction=tf.keras.losses.Reduction.NONE + ) + center_loss = CenterLoss( + centers_layer=model.get_layer("centers"), + alpha=0.9, + name="center_loss", + reduction=tf.keras.losses.Reduction.NONE, + ) + + optimizer = tf.keras.optimizers.RMSprop( + learning_rate=learning_rate, rho=0.9, momentum=0.9, epsilon=1.0 + ) + optimizer = mixed_precision.LossScaleOptimizer(optimizer, loss_scale="dynamic") + + train_loss = tf.keras.metrics.Mean(name="loss") + train_cross_entropy = tf.keras.metrics.Mean(name="cross_entropy") + train_center_loss = tf.keras.metrics.Mean(name="center_loss") + + test_acc = tf.keras.metrics.Mean(name="accuracy") + + model.compile( + optimizer=optimizer, + cross_entropy=cross_entropy, + center_loss=center_loss, + loss_weights=LOSS_WEIGHTS, + train_loss=train_loss, + train_cross_entropy=train_cross_entropy, + train_center_loss=train_center_loss, + test_acc=test_acc, + global_batch_size=global_batch_size, + ) + return model + + +@click.command() +@click.argument("tf-record-paths") +@click.argument("checkpoint-path") +@click.option( + "-n", + "--n-classes", + default=87662, + help="Number of classes in the classification problem. Default to `87662`, which is the number of identities in our pruned MSCeleb", +) +@click.option( + "-b", + "--batch-size", + default=90 * 2, + help="Batch size. Be aware that we are using single precision. Batch size should be high.", +) +@click.option( + "-e", "--epochs", default=35, help="Number of epochs", +) +def train_and_evaluate(tf_record_paths, checkpoint_path, n_classes, batch_size, epochs): + # number of training steps to do before validating a model. This also defines an epoch + # for keras which is not really true. We want to evaluate every 180000 (90 * 2000) + # samples + STEPS_PER_EPOCH = 180000 // batch_size + learning_rate = 0.1 + KERAS_EPOCH_MULTIPLIER = 6 + train_ds = prepare_dataset( + tf_record_paths, + batch_size, + epochs, + data_shape=DATA_SHAPE, + output_shape=OUTPUT_SHAPE, + shuffle=True, + augment=True, + ) + + if VALIDATION_TF_RECORD_PATHS is None: + raise ValueError( + "No validation set was set. Please, do `bob config set bob.bio.face.cnn.lfw_tfrecord_path [PATH]`" + ) + + val_ds = prepare_dataset( + VALIDATION_TF_RECORD_PATHS, + data_shape=DATA_SHAPE, + output_shape=OUTPUT_SHAPE, + epochs=epochs, + batch_size=VALIDATION_BATCH_SIZE, + shuffle=False, + augment=False, + ) + val_metric_name = "val_accuracy" + + model = build_and_compile_model( + n_classes, learning_rate, global_batch_size=batch_size + ) + + def scheduler(epoch, lr): + # 20 epochs at 0.1, 10 at 0.01 and 5 0.001 + # The epoch number here is Keras's which is different from actual epoch number + epoch = epoch // KERAS_EPOCH_MULTIPLIER + if epoch in range(20): + return 0.1 + elif epoch in range(20, 30): + return 0.01 + else: + return 0.001 + + callbacks = { + "latest": tf.keras.callbacks.ModelCheckpoint( + f"{checkpoint_path}/latest", verbose=1 + ), + "best": tf.keras.callbacks.ModelCheckpoint( + f"{checkpoint_path}/best", + monitor=val_metric_name, + save_best_only=True, + mode="max", + verbose=1, + ), + "tensorboard": tf.keras.callbacks.TensorBoard( + log_dir=f"{checkpoint_path}/logs", update_freq=15, profile_batch="10,50" + ), + "lr": tf.keras.callbacks.LearningRateScheduler(scheduler, verbose=1), + "nan": tf.keras.callbacks.TerminateOnNaN(), + } + callbacks = add_backup_callback(callbacks, backup_dir=f"{checkpoint_path}/backup") + model.fit( + train_ds, + validation_data=val_ds, + epochs=epochs * KERAS_EPOCH_MULTIPLIER, + steps_per_epoch=STEPS_PER_EPOCH, + validation_steps=VALIDATION_SAMPLES // VALIDATION_BATCH_SIZE, + callbacks=callbacks, + verbose=2, + ) + + +if __name__ == "__main__": + train_and_evaluate() diff --git a/doc/faq/.ipynb_checkpoints/facecrop-checkpoint.ipynb b/doc/faq/.ipynb_checkpoints/facecrop-checkpoint.ipynb new file mode 100644 index 00000000..8ab56222 --- /dev/null +++ b/doc/faq/.ipynb_checkpoints/facecrop-checkpoint.ipynb @@ -0,0 +1,70 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## How to crop a face" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "CROPPED_IMAGE_HEIGHT = 128\n", + "CROPPED_IMAGE_WIDTH = 128\n", + "\n", + "EYE_CENTER_POS = (40, 64)\n", + "MOUTH_CENTER_POS = (88, 64)\n", + "\n", + "\n", + "mouth_center=(int((annotations['mouthleft'][0]+annotations['mouthright'][0])/2.0), int((annotations['mouthleft'][1]+annotations['mouthright'][1])/2.0))\n", + "\n", + "eye_center=(int((annotations['leye'][0]+annotations['reye'][0])/2.0), int((annotations['leye'][1]+annotations['reye'][1])/2.0))\n", + "\n", + "annotations['eye_center'] =eye_center\n", + "\n", + "annotations['mouth_center']=mouth_center\n", + "\n", + "light_cnn_face_cropper=bob.bio.face.preprocessor.FaceCrop(\n", + " cropped_image_size=(CROPPED_IMAGE_HEIGHT, CROPPED_IMAGE_WIDTH),\n", + " cropped_positions={'eye_center': EYE_CENTER_POS, 'mouth_center': MOUTH_CENTER_POS})\n", + "\n", + "\n", + "normalized_image = light_cnn_face_cropper.crop_face( image, annotations=annotations)" + ] + } + ], + "metadata": { + "jupytext": { + "formats": "ipynb,py:light", + "text_representation": { + "extension": ".py", + "format_name": "light", + "format_version": "1.5", + "jupytext_version": "1.7.1" + } + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.7" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/doc/faq/.ipynb_checkpoints/facecrop-checkpoint.py b/doc/faq/.ipynb_checkpoints/facecrop-checkpoint.py new file mode 100644 index 00000000..ac6b78f4 --- /dev/null +++ b/doc/faq/.ipynb_checkpoints/facecrop-checkpoint.py @@ -0,0 +1,54 @@ +# --- +# jupyter: +# jupytext: +# formats: ipynb,py:light +# text_representation: +# extension: .py +# format_name: light +# format_version: '1.5' +# jupytext_version: 1.11.1 +# kernelspec: +# display_name: Python 3 +# language: python +# name: python3 +# --- + +# ## How to crop a face + +# + +import bob.bio.face +import bob.io.image + +# Loading Ada's images +image = bob.io.image.load("./img/838_ada.jpg") + +# Setting Ada's eyes +annotations = dict() +annotations['reye'] = (265, 203) +annotations['leye'] = (278, 294) + +# Final cropped size +cropped_image_size = (224, 224) + +# Defining where we want the eyes to be located after the crop +cropped_positions = {"leye": (65, 150), "reye": (65, 77)} + + +face_cropper=bob.bio.face.preprocessor.FaceCrop( + cropped_image_size=cropped_image_size, + cropped_positions=cropped_positions, + color_channel="rgb") + +# Crops always a batch of images +cropped_image = face_cropper.transform([image], annotations=[annotations]) + + +# + +import matplotlib.pyplot as plt + +figure = plt.figure() +plt.subplot(121) +bob.io.image.imshow(image) +plt.subplot(122) +bob.io.image.imshow(cropped_image[0].astype("uint8")) +figure.show() diff --git a/doc/faq/facecrop.ipynb b/doc/faq/facecrop.ipynb new file mode 100644 index 00000000..03044580 --- /dev/null +++ b/doc/faq/facecrop.ipynb @@ -0,0 +1,109 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## How to crop a face" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "lines_to_next_cell": 2 + }, + "outputs": [], + "source": [ + "import bob.bio.face\n", + "import bob.io.image\n", + "\n", + "# Loading Ada's images\n", + "image = bob.io.image.load(\"./img/838_ada.jpg\")\n", + "\n", + "# Setting Ada's eyes\n", + "annotations = dict()\n", + "annotations['reye'] = (265, 203)\n", + "annotations['leye'] = (278, 294)\n", + "\n", + "# Final cropped size\n", + "cropped_image_size = (224, 224)\n", + "\n", + "# Defining where we want the eyes to be located after the crop\n", + "cropped_positions = {\"leye\": (65, 150), \"reye\": (65, 77)}\n", + "\n", + "\n", + "face_cropper=bob.bio.face.preprocessor.FaceCrop(\n", + " cropped_image_size=cropped_image_size,\n", + " cropped_positions=cropped_positions,\n", + " color_channel=\"rgb\")\n", + "\n", + "# Crops always a batch of images\n", + "cropped_image = face_cropper.transform([image], annotations=[annotations])" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "62f5e3a70d3247e4beeed7318775d33a", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "%matplotlib widget\n", + "import matplotlib.pyplot as plt\n", + "\n", + "figure = plt.figure()\n", + "plt.subplot(121)\n", + "bob.io.image.imshow(image)\n", + "plt.subplot(122)\n", + "bob.io.image.imshow(cropped_image[0].astype(\"uint8\"))\n", + "figure.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "jupytext": { + "formats": "ipynb,py:light" + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.7" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} -- GitLab