Commit d4021c32 authored by Tiago de Freitas Pereira's avatar Tiago de Freitas Pereira

Merge branch 'tf-training' into 'master'

TF2 training

See merge request !96
parents 1a99ee82 163c02d1
Pipeline #47861 failed with stages
in 44 minutes and 3 seconds
#!/usr/bin/env python
# coding: utf-8
"""
Tensor pre-processing for somr face recognition CNNs
"""
import os
from functools import partial
from multiprocessing import cpu_count
import tensorflow as tf
from tensorflow.keras import layers
# STANDARD FEATURES FROM OUR TF-RECORDS
FEATURES = {
"data": tf.io.FixedLenFeature([], tf.string),
"label": tf.io.FixedLenFeature([], tf.int64),
"key": tf.io.FixedLenFeature([], tf.string),
}
def decode_tfrecords(x, data_shape, data_type=tf.uint8):
features = tf.io.parse_single_example(x, FEATURES)
image = tf.io.decode_raw(features["data"], data_type)
image = tf.reshape(image, data_shape)
features["data"] = image
return features
def get_preprocessor(output_shape):
"""
"""
preprocessor = tf.keras.Sequential(
[
# rotate before cropping
# 5 random degree rotation
layers.experimental.preprocessing.RandomRotation(5 / 360),
layers.experimental.preprocessing.RandomCrop(
height=output_shape[0], width=output_shape[1]
),
layers.experimental.preprocessing.RandomFlip("horizontal"),
# FIXED_STANDARDIZATION from https://github.com/davidsandberg/facenet
# [-0.99609375, 0.99609375]
layers.experimental.preprocessing.Rescaling(
scale=1 / 128, offset=-127.5 / 128
),
]
)
return preprocessor
def preprocess(preprocessor, features, augment=False):
image = features["data"]
label = features["label"]
image = preprocessor(image, training=augment)
return image, label
def prepare_dataset(
tf_record_paths,
batch_size,
epochs,
data_shape,
output_shape,
shuffle=False,
augment=False,
autotune=tf.data.experimental.AUTOTUNE,
n_cpus=cpu_count(),
shuffle_buffer=int(2e4),
):
"""
Create batches from a list of TF-Records
Parameters
----------
tf_record_paths: list
List of paths of the TF-Records
batch_size: int
epochs: int
shuffle: bool
augment: bool
autotune: int
n_cpus: int
shuffle_buffer: int
"""
ds = tf.data.Dataset.list_files(tf_record_paths, shuffle=shuffle)
ds = tf.data.TFRecordDataset(ds, num_parallel_reads=n_cpus)
if shuffle:
# ignore order and read files as soon as they come in
ignore_order = tf.data.Options()
ignore_order.experimental_deterministic = False
ds = ds.with_options(ignore_order)
ds = ds.map(partial(decode_tfrecords, data_shape=data_shape)).prefetch(buffer_size=autotune)
if shuffle:
ds = ds.shuffle(shuffle_buffer).repeat(epochs)
preprocessor = get_preprocessor(output_shape)
ds = ds.batch(batch_size).map(
partial(preprocess, preprocessor, augment=augment), num_parallel_calls=autotune,
)
# Use buffered prefecting on all datasets
return ds.prefetch(buffer_size=autotune)
This diff is collapsed.
#!/usr/bin/env python
# coding: utf-8
"""
Trains a face recognition CNN using the strategy from the paper
"A Discriminative Feature Learning Approach
for Deep Face Recognition" https://ydwen.github.io/papers/WenECCV16.pdf
The default backbone is the InceptionResnetv2
Do `./bin/python centerloss.py --help` for more information
"""
import os
from functools import partial
import click
import pkg_resources
import tensorflow as tf
from bob.learn.tensorflow.losses import CenterLoss, CenterLossLayer
from bob.learn.tensorflow.models.inception_resnet_v2 import InceptionResNetV2
from bob.learn.tensorflow.metrics import predict_using_tensors
from tensorflow.keras import layers
from bob.learn.tensorflow.callbacks import add_backup_callback
from bob.learn.tensorflow.metrics.embedding_accuracy import accuracy_from_embeddings
from bob.extension import rc
from bob.bio.face.tensorflow.preprocessing import prepare_dataset
# CNN Backbone
# Change your NN backbone here
BACKBONE = InceptionResNetV2
# SHAPES EXPECTED FROM THE DATASET USING THIS BACKBONE
DATA_SHAPE = (182, 182, 3) # size of faces
DATA_TYPE = tf.uint8
OUTPUT_SHAPE = (160, 160)
AUTOTUNE = tf.data.experimental.AUTOTUNE
# HERE WE VALIDATE WITH LFW RUNNING A
# INFORMATION ABOUT THE VALIDATION SET
VALIDATION_TF_RECORD_PATHS = rc["bob.bio.face.cnn.lfw_tfrecord_path"]
# there are 2812 samples in the validation set
VALIDATION_SAMPLES = 2812
VALIDATION_BATCH_SIZE = 38
# WEIGHTS BEWTWEEN the two losses
LOSS_WEIGHTS = {"cross_entropy": 1.0, "center_loss": 0.01}
class CenterLossModel(tf.keras.Model):
def compile(
self,
cross_entropy,
center_loss,
loss_weights,
train_loss,
train_cross_entropy,
train_center_loss,
test_acc,
**kwargs,
):
super().compile(**kwargs)
self.cross_entropy = cross_entropy
self.center_loss = center_loss
self.loss_weights = loss_weights
self.train_loss = train_loss
self.train_cross_entropy = train_cross_entropy
self.train_center_loss = train_center_loss
self.test_acc = test_acc
def train_step(self, data):
images, labels = data
with tf.GradientTape() as tape:
logits, prelogits = self(images, training=True)
loss_cross = self.cross_entropy(labels, logits)
loss_center = self.center_loss(labels, prelogits)
loss = (
loss_cross * self.loss_weights[self.cross_entropy.name]
+ loss_center * self.loss_weights[self.center_loss.name]
)
trainable_vars = self.trainable_variables
gradients = tape.gradient(loss, trainable_vars)
self.optimizer.apply_gradients(zip(gradients, trainable_vars))
self.train_loss(loss)
self.train_cross_entropy(loss_cross)
self.train_center_loss(loss_center)
return {
m.name: m.result()
for m in [self.train_loss, self.train_cross_entropy, self.train_center_loss]
}
def test_step(self, data):
images, labels = data
logits, prelogits = self(images, training=False)
self.test_acc(accuracy_from_embeddings(labels, prelogits))
return {m.name: m.result() for m in [self.test_acc]}
def create_model(n_classes):
model = BACKBONE(
include_top=True,
classes=n_classes,
bottleneck=True,
input_shape=OUTPUT_SHAPE + (3,),
)
prelogits = model.get_layer("Bottleneck/BatchNorm").output
prelogits = CenterLossLayer(
n_classes=n_classes, n_features=prelogits.shape[-1], name="centers"
)(prelogits)
logits = model.get_layer("logits").output
model = CenterLossModel(
inputs=model.input, outputs=[logits, prelogits], name=model.name
)
return model
def build_and_compile_model(n_classes, learning_rate):
model = create_model(n_classes)
cross_entropy = tf.keras.losses.SparseCategoricalCrossentropy(
from_logits=True, name="cross_entropy"
)
center_loss = CenterLoss(
centers_layer=model.get_layer("centers"), alpha=0.9, name="center_loss",
)
optimizer = tf.keras.optimizers.RMSprop(
learning_rate=learning_rate, rho=0.9, momentum=0.9, epsilon=1.0
)
train_loss = tf.keras.metrics.Mean(name="loss")
train_cross_entropy = tf.keras.metrics.Mean(name="cross_entropy")
train_center_loss = tf.keras.metrics.Mean(name="center_loss")
test_acc = tf.keras.metrics.Mean(name="accuracy")
model.compile(
optimizer=optimizer,
cross_entropy=cross_entropy,
center_loss=center_loss,
loss_weights=LOSS_WEIGHTS,
train_loss=train_loss,
train_cross_entropy=train_cross_entropy,
train_center_loss=train_center_loss,
test_acc=test_acc,
)
return model
@click.command()
@click.argument("tf-record-paths")
@click.argument("checkpoint-path")
@click.option(
"-n",
"--n-classes",
default=87662,
help="Number of classes in the classification problem. Default to `87662`, which is the number of identities in our pruned MSCeleb",
)
@click.option(
"-b",
"--batch-size",
default=90,
help="Batch size. Be aware that we are using single precision. Batch size should be high.",
)
@click.option(
"-e", "--epochs", default=35, help="Number of epochs",
)
def train_and_evaluate(tf_record_paths, checkpoint_path, n_classes, batch_size, epochs):
# number of training steps to do before validating a model. This also defines an epoch
# for keras which is not really true. We want to evaluate every 180000 (90 * 2000)
# samples
STEPS_PER_EPOCH = 180000 // batch_size
learning_rate = 0.1
KERAS_EPOCH_MULTIPLIER = 6
train_ds = prepare_dataset(
tf_record_paths,
batch_size,
epochs,
data_shape=DATA_SHAPE,
output_shape=OUTPUT_SHAPE,
shuffle=True,
augment=True,
)
if VALIDATION_TF_RECORD_PATHS is None:
raise ValueError(
"No validation set was set. Please, do `bob config set bob.bio.face.cnn.lfw_tfrecord_path [PATH]`"
)
val_ds = prepare_dataset(
VALIDATION_TF_RECORD_PATHS,
data_shape=DATA_SHAPE,
output_shape=OUTPUT_SHAPE,
epochs=epochs,
batch_size=VALIDATION_BATCH_SIZE,
shuffle=False,
augment=False,
)
val_metric_name = "val_accuracy"
model = build_and_compile_model(n_classes, learning_rate)
def scheduler(epoch, lr):
# 20 epochs at 0.1, 10 at 0.01 and 5 0.001
# The epoch number here is Keras's which is different from actual epoch number
epoch = epoch // KERAS_EPOCH_MULTIPLIER
if epoch in range(20):
return 0.1
elif epoch in range(20, 30):
return 0.01
else:
return 0.001
callbacks = {
"latest": tf.keras.callbacks.ModelCheckpoint(
f"{checkpoint_path}/latest", verbose=1
),
"best": tf.keras.callbacks.ModelCheckpoint(
f"{checkpoint_path}/best",
monitor=val_metric_name,
save_best_only=True,
mode="max",
verbose=1,
),
"tensorboard": tf.keras.callbacks.TensorBoard(
log_dir=f"{checkpoint_path}/logs", update_freq=15, profile_batch=0
),
"lr": tf.keras.callbacks.LearningRateScheduler(scheduler, verbose=1),
"nan": tf.keras.callbacks.TerminateOnNaN(),
}
callbacks = add_backup_callback(callbacks, backup_dir=f"{checkpoint_path}/backup")
model.fit(
train_ds,
validation_data=val_ds,
epochs=epochs * KERAS_EPOCH_MULTIPLIER,
steps_per_epoch=STEPS_PER_EPOCH,
validation_steps=VALIDATION_SAMPLES // VALIDATION_BATCH_SIZE,
callbacks=callbacks,
verbose=2,
)
if __name__ == "__main__":
train_and_evaluate()
w #!/usr/bin/env python
# coding: utf-8
"""
Trains a face recognition CNN using the strategy from the paper
"A Discriminative Feature Learning Approach
for Deep Face Recognition" https://ydwen.github.io/papers/WenECCV16.pdf
#########
# THIS ONE USES FLOAT16 TO COMPUTE THE GRADIENTS
# CHECKE HERE FOR MORE INFO: # https://www.tensorflow.org/api_docs/python/tf/keras/mixed_precision/experimental/Policy
########
The default backbone is the InceptionResnetv2
Do `./bin/python centerloss_mixed_precision.py --help` for more information
"""
import os
from functools import partial
import click
import pkg_resources
import tensorflow as tf
from bob.learn.tensorflow.losses import CenterLoss, CenterLossLayer
from bob.learn.tensorflow.models.inception_resnet_v2 import InceptionResNetV2
from bob.learn.tensorflow.metrics import predict_using_tensors
from tensorflow.keras import layers
from tensorflow.keras.mixed_precision import experimental as mixed_precision
from bob.learn.tensorflow.callbacks import add_backup_callback
from bob.learn.tensorflow.metrics.embedding_accuracy import accuracy_from_embeddings
from bob.extension import rc
from bob.bio.face.tensorflow.preprocessing import prepare_dataset
# Setting mixed precision policy
# https://www.tensorflow.org/api_docs/python/tf/keras/mixed_precision/experimental/Policy
policy = mixed_precision.Policy("mixed_float16")
mixed_precision.set_policy(policy)
# CNN Backbone
# Change your NN backbone here
BACKBONE = InceptionResNetV2
# SHAPES EXPECTED FROM THE DATASET USING THIS BACKBONE
DATA_SHAPE = (182, 182, 3) # size of faces
DATA_TYPE = tf.uint8
OUTPUT_SHAPE = (160, 160)
AUTOTUNE = tf.data.experimental.AUTOTUNE
# HERE WE VALIDATE WITH LFW RUNNING A
# INFORMATION ABOUT THE VALIDATION SET
VALIDATION_TF_RECORD_PATHS = rc["bob.bio.face.cnn.lfw_tfrecord_path"]
# there are 2812 samples in the validation set
VALIDATION_SAMPLES = 2812
VALIDATION_BATCH_SIZE = 38
# WEIGHTS BEWTWEEN the two losses
LOSS_WEIGHTS = {"cross_entropy": 1.0, "center_loss": 0.01}
class CenterLossModel(tf.keras.Model):
def compile(
self,
cross_entropy,
center_loss,
loss_weights,
train_loss,
train_cross_entropy,
train_center_loss,
test_acc,
global_batch_size,
**kwargs,
):
super().compile(**kwargs)
self.cross_entropy = cross_entropy
self.center_loss = center_loss
self.loss_weights = loss_weights
self.train_loss = train_loss
self.train_cross_entropy = train_cross_entropy
self.train_center_loss = train_center_loss
self.test_acc = test_acc
self.global_batch_size = global_batch_size
def train_step(self, data):
images, labels = data
with tf.GradientTape() as tape:
logits, prelogits = self(images, training=True)
loss_cross = self.cross_entropy(labels, logits)
loss_center = self.center_loss(labels, prelogits)
loss = (
loss_cross * self.loss_weights[self.cross_entropy.name]
+ loss_center * self.loss_weights[self.center_loss.name]
)
unscaled_loss = tf.nn.compute_average_loss(
loss, global_batch_size=self.global_batch_size
)
loss = self.optimizer.get_scaled_loss(unscaled_loss)
trainable_vars = self.trainable_variables
gradients = tape.gradient(loss, trainable_vars)
gradients = self.optimizer.get_unscaled_gradients(gradients)
self.optimizer.apply_gradients(zip(gradients, trainable_vars))
self.train_loss(unscaled_loss)
self.train_cross_entropy(loss_cross)
self.train_center_loss(loss_center)
return {
m.name: m.result()
for m in [self.train_loss, self.train_cross_entropy, self.train_center_loss]
}
def test_step(self, data):
images, labels = data
logits, prelogits = self(images, training=False)
self.test_acc(accuracy_from_embeddings(labels, prelogits))
return {m.name: m.result() for m in [self.test_acc]}
def create_model(n_classes):
model = BACKBONE(
include_top=True,
classes=n_classes,
bottleneck=True,
input_shape=OUTPUT_SHAPE + (3,),
kernel_regularizer=tf.keras.regularizers.L2(5e-5),
)
float32_layer = layers.Activation("linear", dtype="float32")
prelogits = model.get_layer("Bottleneck/BatchNorm").output
prelogits = CenterLossLayer(
n_classes=n_classes, n_features=prelogits.shape[-1], name="centers"
)(prelogits)
prelogits = float32_layer(prelogits)
logits = float32_layer(model.get_layer("logits").output)
model = CenterLossModel(
inputs=model.input, outputs=[logits, prelogits], name=model.name
)
return model
def build_and_compile_model(n_classes, learning_rate, global_batch_size):
model = create_model(n_classes)
cross_entropy = tf.keras.losses.SparseCategoricalCrossentropy(
from_logits=True, name="cross_entropy", reduction=tf.keras.losses.Reduction.NONE
)
center_loss = CenterLoss(
centers_layer=model.get_layer("centers"),
alpha=0.9,
name="center_loss",
reduction=tf.keras.losses.Reduction.NONE,
)
optimizer = tf.keras.optimizers.RMSprop(
learning_rate=learning_rate, rho=0.9, momentum=0.9, epsilon=1.0
)
optimizer = mixed_precision.LossScaleOptimizer(optimizer, loss_scale="dynamic")
train_loss = tf.keras.metrics.Mean(name="loss")
train_cross_entropy = tf.keras.metrics.Mean(name="cross_entropy")
train_center_loss = tf.keras.metrics.Mean(name="center_loss")
test_acc = tf.keras.metrics.Mean(name="accuracy")
model.compile(
optimizer=optimizer,
cross_entropy=cross_entropy,
center_loss=center_loss,
loss_weights=LOSS_WEIGHTS,
train_loss=train_loss,
train_cross_entropy=train_cross_entropy,
train_center_loss=train_center_loss,
test_acc=test_acc,
global_batch_size=global_batch_size,
)
return model
@click.command()
@click.argument("tf-record-paths")
@click.argument("checkpoint-path")
@click.option(
"-n",
"--n-classes",
default=87662,
help="Number of classes in the classification problem. Default to `87662`, which is the number of identities in our pruned MSCeleb",
)
@click.option(
"-b",
"--batch-size",
default=90 * 2,
help="Batch size. Be aware that we are using single precision. Batch size should be high.",
)
@click.option(
"-e", "--epochs", default=35, help="Number of epochs",
)
def train_and_evaluate(tf_record_paths, checkpoint_path, n_classes, batch_size, epochs):
# number of training steps to do before validating a model. This also defines an epoch
# for keras which is not really true. We want to evaluate every 180000 (90 * 2000)
# samples
STEPS_PER_EPOCH = 180000 // batch_size
learning_rate = 0.1
KERAS_EPOCH_MULTIPLIER = 6
train_ds = prepare_dataset(
tf_record_paths,
batch_size,
epochs,
data_shape=DATA_SHAPE,
output_shape=OUTPUT_SHAPE,
shuffle=True,
augment=True,
)
if VALIDATION_TF_RECORD_PATHS is None:
raise ValueError(
"No validation set was set. Please, do `bob config set bob.bio.face.cnn.lfw_tfrecord_path [PATH]`"
)
val_ds = prepare_dataset(
VALIDATION_TF_RECORD_PATHS,
data_shape=DATA_SHAPE,
output_shape=OUTPUT_SHAPE,
epochs=epochs,
batch_size=VALIDATION_BATCH_SIZE,
shuffle=False,
augment=False,
)
val_metric_name = "val_accuracy"
model = build_and_compile_model(
n_classes, learning_rate, global_batch_size=batch_size
)
def scheduler(epoch, lr):
# 20 epochs at 0.1, 10 at 0.01 and 5 0.001
# The epoch number here is Keras's which is different from actual epoch number
epoch = epoch // KERAS_EPOCH_MULTIPLIER
if epoch in range(20):
return 0.1
elif epoch in range(20, 30):
return 0.01
else:
return 0.001
callbacks = {
"latest": tf.keras.callbacks.ModelCheckpoint(
f"{checkpoint_path}/latest", verbose=1
),
"best": tf.keras.callbacks.ModelCheckpoint(
f"{checkpoint_path}/best",
monitor=val_metric_name,
save_best_only=True,
mode="max",
verbose=1,
),
"tensorboard": tf.keras.callbacks.TensorBoard(
log_dir=f"{checkpoint_path}/logs", update_freq=15, profile_batch="10,50"
),
"lr": tf.keras.callbacks.LearningRateScheduler(scheduler, verbose=1),
"nan": tf.keras.callbacks.TerminateOnNaN(),
}
callbacks = add_backup_callback(callbacks, backup_dir=f"{checkpoint_path}/backup")
model.fit(
train_ds,
validation_data=val_ds,
epochs=epochs * KERAS_EPOCH_MULTIPLIER,
steps_per_epoch=STEPS_PER_EPOCH,
validation_steps=VALIDATION_SAMPLES // VALIDATION_BATCH_SIZE,
callbacks=callbacks,
verbose=2,
)
if __name__ == "__main__":
train_and_evaluate()
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment