From be8a07508d99d6c0ab2ef673c4c07dba72dd34e8 Mon Sep 17 00:00:00 2001 From: Tiago Freitas Pereira <tiagofrepereira@gmail.com> Date: Fri, 20 Oct 2017 10:12:45 +0200 Subject: [PATCH] Created estimator that handles siamese nets --- bob/learn/tensorflow/dataset/__init__.py | 8 +- bob/learn/tensorflow/estimators/Siamese.py | 136 ++++++++++++++++++ bob/learn/tensorflow/estimators/__init__.py | 1 + bob/learn/tensorflow/loss/ContrastiveLoss.py | 56 +++++++- bob/learn/tensorflow/network/Dummy.py | 36 ++--- bob/learn/tensorflow/test/test_dataset.py | 24 +++- .../tensorflow/test/test_estimator_siamese.py | 108 ++++++++++++++ 7 files changed, 340 insertions(+), 29 deletions(-) create mode 100755 bob/learn/tensorflow/estimators/Siamese.py create mode 100755 bob/learn/tensorflow/test/test_estimator_siamese.py diff --git a/bob/learn/tensorflow/dataset/__init__.py b/bob/learn/tensorflow/dataset/__init__.py index 8e0f584f..ce6f3e2f 100755 --- a/bob/learn/tensorflow/dataset/__init__.py +++ b/bob/learn/tensorflow/dataset/__init__.py @@ -44,20 +44,20 @@ def append_image_augmentation(image, gray_scale=False, image = tf.cast(image, tf.float32) if output_shape is not None: - assert output_shape.ndim == 2 + assert len(output_shape) == 2 image = tf.image.resize_image_with_crop_or_pad(image, output_shape[0], output_shape[1]) if random_flip: image = tf.image.random_flip_left_right(image) if random_brightness: - image = tf.image.random_brightness(image) + image = tf.image.random_brightness(image, max_delta=0.5) if random_contrast: - image = tf.image.random_contrast(image) + image = tf.image.random_contrast(image, lower=0, upper=0.5) if random_saturation: - image = tf.image.random_saturation(image) + image = tf.image.random_saturation(image, lower=0, upper=0.5) if gray_scale: image = tf.image.rgb_to_grayscale(image, name="rgb_to_gray") diff --git a/bob/learn/tensorflow/estimators/Siamese.py b/bob/learn/tensorflow/estimators/Siamese.py new file mode 100755 index 00000000..f36b6b44 --- /dev/null +++ b/bob/learn/tensorflow/estimators/Siamese.py @@ -0,0 +1,136 @@ +#!/usr/bin/env python +# vim: set fileencoding=utf-8 : +# @author: Tiago de Freitas Pereira <tiago.pereira@idiap.ch> + +import tensorflow as tf +import os +import bob.io.base +import bob.core +from tensorflow.core.framework import summary_pb2 +import time + +#logger = bob.core.log.setup("bob.learn.tensorflow") +from tensorflow.python.estimator import estimator +from bob.learn.tensorflow.utils import predict_using_tensors +#from bob.learn.tensorflow.loss import mean_cross_entropy_center_loss + + +import logging +logger = logging.getLogger("bob.learn") + + +class Siamese(estimator.Estimator): + """ + NN estimator for Siamese networks + + The **architecture** function should follow the following pattern: + + def my_beautiful_function(placeholder): + + end_points = dict() + graph = convXX(placeholder) + end_points['conv'] = graph + .... + return graph, end_points + + The **loss** function should follow the following pattern: + + def my_beautiful_loss(logits, labels): + return loss_set_of_ops(logits, labels) + + + **Parameters** + architecture: + Pointer to a function that builds the graph. + + optimizer: + One of the tensorflow solvers (https://www.tensorflow.org/api_guides/python/train) + - tf.train.GradientDescentOptimizer + - tf.train.AdagradOptimizer + - .... + + config: + + n_classes: + Number of classes of your problem. The logits will be appended in this class + + loss_op: + Pointer to a function that computes the loss. + + embedding_validation: + Run the validation using embeddings?? [default: False] + + model_dir: + Model path + + validation_batch_size: + Size of the batch for validation. This value is used when the + validation with embeddings is used. This is a hack. + """ + + def __init__(self, + architecture=None, + optimizer=None, + config=None, + n_classes=0, + loss_op=None, + model_dir="", + validation_batch_size=None, + ): + + self.architecture = architecture + self.optimizer=optimizer + self.n_classes=n_classes + self.loss_op=loss_op + self.loss = None + + if self.architecture is None: + raise ValueError("Please specify a function to build the architecture !!") + + if self.optimizer is None: + raise ValueError("Please specify a optimizer (https://www.tensorflow.org/api_guides/python/train) !!") + + if self.loss_op is None: + raise ValueError("Please specify a function to build the loss !!") + + if self.n_classes <= 0: + raise ValueError("Number of classes must be greated than 0") + + def _model_fn(features, labels, mode, params, config): + + if mode == tf.estimator.ModeKeys.TRAIN: + + # The input function needs to have dictionary pair with the `left` and `right` keys + if not 'left' in features.keys() or not 'right' in features.keys(): + raise ValueError("The input function needs to contain a dictionary with the keys `left` and `right` ") + + # Building one graph + prelogits_left = self.architecture(features['left'])[0] + prelogits_right = self.architecture(features['right'], reuse=True)[0] + + # Compute Loss (for both TRAIN and EVAL modes) + self.loss = self.loss_op(prelogits_left, prelogits_right, labels) + # Configure the Training Op (for TRAIN mode) + global_step = tf.contrib.framework.get_or_create_global_step() + train_op = self.optimizer.minimize(self.loss, global_step=global_step) + return tf.estimator.EstimatorSpec(mode=mode, loss=self.loss, + train_op=train_op) + + # Compute the embeddings + prelogits = self.architecture(features)[0] + embeddings = tf.nn.l2_normalize(prelogits, 1) + predictions = {"embeddings": embeddings} + + if mode == tf.estimator.ModeKeys.PREDICT: + return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions) + + predictions_op = predict_using_tensors(predictions["embeddings"], labels, num=validation_batch_size) + eval_metric_ops = {"accuracy": tf.metrics.accuracy(labels=labels, predictions=predictions_op)} + + return tf.estimator.EstimatorSpec(mode=mode, loss=tf.reduce_mean(1), eval_metric_ops=eval_metric_ops) + + + super(Siamese, self).__init__(model_fn=_model_fn, + model_dir=model_dir, + config=config) + diff --git a/bob/learn/tensorflow/estimators/__init__.py b/bob/learn/tensorflow/estimators/__init__.py index 04ce0a5e..e63d6329 100755 --- a/bob/learn/tensorflow/estimators/__init__.py +++ b/bob/learn/tensorflow/estimators/__init__.py @@ -1,4 +1,5 @@ from .Logits import Logits, LogitsCenterLoss +from .Siamese import Siamese # gets sphinx autodoc done right - don't remove it def __appropriate__(*args): diff --git a/bob/learn/tensorflow/loss/ContrastiveLoss.py b/bob/learn/tensorflow/loss/ContrastiveLoss.py index 1ec9ace5..62f93dac 100755 --- a/bob/learn/tensorflow/loss/ContrastiveLoss.py +++ b/bob/learn/tensorflow/loss/ContrastiveLoss.py @@ -33,6 +33,59 @@ def contrastive_loss(left_embedding, right_embedding, labels, contrastive_margin """ + with tf.name_scope("contrastive_loss"): + labels = tf.to_float(labels) + + left_embedding = tf.nn.l2_normalize(left_embedding, 1) + right_embedding = tf.nn.l2_normalize(right_embedding, 1) + + d = compute_euclidean_distance(left_embedding, right_embedding) + + with tf.name_scope("within_class"): + one = tf.constant(1.0) + within_class = tf.multiply(one - labels, tf.square(d)) # (1-Y)*(d^2) + within_class_loss = tf.reduce_mean(within_class, name=tf.GraphKeys.LOSSES) + + with tf.name_scope("between_class"): + max_part = tf.square(tf.maximum(contrastive_margin - d, 0)) + between_class = tf.multiply(labels, max_part) # (Y) * max((margin - d)^2, 0) + between_class_loss = tf.reduce_mean(between_class, name=tf.GraphKeys.LOSSES) + + with tf.name_scope("total_loss"): + loss = 0.5 * (within_class + between_class) + loss = tf.reduce_mean(loss, name=tf.GraphKeys.LOSSES) + + tf.summary.scalar('loss', loss) + tf.summary.scalar('between_class', between_class_loss) + tf.summary.scalar('within_class', within_class_loss) + + return loss + + +def contrastive_loss_deprecated(left_embedding, right_embedding, labels, contrastive_margin=1.0): + """ + Compute the contrastive loss as in + + http://yann.lecun.com/exdb/publis/pdf/hadsell-chopra-lecun-06.pdf + + :math:`L = 0.5 * (Y) * D^2 + 0.5 * (1-Y) * {max(0, margin - D)}^2` + + **Parameters** + + left_feature: + First element of the pair + + right_feature: + Second element of the pair + + labels: + Label of the pair (0 or 1) + + margin: + Contrastive margin + + """ + with tf.name_scope("contrastive_loss"): labels = tf.to_float(labels) @@ -47,7 +100,7 @@ def contrastive_loss(left_embedding, right_embedding, labels, contrastive_margin max_part = tf.square(tf.maximum(contrastive_margin - d, 0)) between_class = tf.multiply(labels, max_part) # (Y) * max((margin - d)^2, 0) - loss = 0.5 * (within_class + between_class) + loss = 0.5 * (within_class + between_class) loss_dict = dict() loss_dict['loss'] = tf.reduce_mean(loss, name=tf.GraphKeys.LOSSES) @@ -55,3 +108,4 @@ def contrastive_loss(left_embedding, right_embedding, labels, contrastive_margin loss_dict['within_class'] = tf.reduce_mean(within_class, name=tf.GraphKeys.LOSSES) return loss_dict + diff --git a/bob/learn/tensorflow/network/Dummy.py b/bob/learn/tensorflow/network/Dummy.py index 201d809e..bb88a598 100755 --- a/bob/learn/tensorflow/network/Dummy.py +++ b/bob/learn/tensorflow/network/Dummy.py @@ -18,24 +18,24 @@ def dummy(inputs, reuse=False): slim = tf.contrib.slim end_points = dict() - - initializer = tf.contrib.layers.xavier_initializer() - - graph = slim.conv2d(inputs, 10, [3, 3], activation_fn=tf.nn.relu, stride=1, scope='conv1', - weights_initializer=initializer, reuse=reuse) - end_points['conv1'] = graph - - graph = slim.max_pool2d(graph, [4, 4], scope='pool1') - end_points['pool1'] = graph - - graph = slim.flatten(graph, scope='flatten1') - end_points['flatten1'] = graph - - graph = slim.fully_connected(graph, 50, - weights_initializer=initializer, - activation_fn=None, - scope='fc1') - end_points['fc1'] = graph + with tf.variable_scope('Dummy', reuse=reuse): + initializer = tf.contrib.layers.xavier_initializer() + + graph = slim.conv2d(inputs, 10, [3, 3], activation_fn=tf.nn.relu, stride=1, scope='conv1', + weights_initializer=initializer) + end_points['conv1'] = graph + + graph = slim.max_pool2d(graph, [4, 4], scope='pool1') + end_points['pool1'] = graph + + graph = slim.flatten(graph, scope='flatten1') + end_points['flatten1'] = graph + + graph = slim.fully_connected(graph, 50, + weights_initializer=initializer, + activation_fn=None, + scope='fc1') + end_points['fc1'] = graph return graph, end_points diff --git a/bob/learn/tensorflow/test/test_dataset.py b/bob/learn/tensorflow/test/test_dataset.py index f4fd0dac..4a782bb7 100755 --- a/bob/learn/tensorflow/test/test_dataset.py +++ b/bob/learn/tensorflow/test/test_dataset.py @@ -6,7 +6,8 @@ import pkg_resources import tensorflow as tf from bob.learn.tensorflow.dataset.siamese_image import shuffle_data_and_labels_image_augmentation as siamese_batch -data_shape = (250, 250, 3) # size of atnt images +data_shape = (250, 250, 3) +output_shape = (50, 50) data_type = tf.float32 batch_size = 2 validation_batch_size = 250 @@ -18,14 +19,25 @@ def test_siamese_dataset(): # Trainer logits filenames = [pkg_resources.resource_filename(__name__, 'data/dummy_image_database/m301_01_p01_i0_0.png'), pkg_resources.resource_filename(__name__, 'data/dummy_image_database/m301_01_p02_i0_0.png'), - pkg_resources.resource_filename(__name__, 'data/dummy_image_database/m304_01_p01_i0_0.png'), + pkg_resources.resource_filename(__name__, 'data/dummy_image_database/m301_01_p01_i0_0.png'), + pkg_resources.resource_filename(__name__, 'data/dummy_image_database/m301_01_p02_i0_0.png'), + pkg_resources.resource_filename(__name__, 'data/dummy_image_database/m301_01_p01_i0_0.png'), + pkg_resources.resource_filename(__name__, 'data/dummy_image_database/m301_01_p02_i0_0.png'), + + pkg_resources.resource_filename(__name__, 'data/dummy_image_database/m304_01_p01_i0_0.png'), + pkg_resources.resource_filename(__name__, 'data/dummy_image_database/m304_02_f12_i0_0.png'), + pkg_resources.resource_filename(__name__, 'data/dummy_image_database/m304_01_p01_i0_0.png'), + pkg_resources.resource_filename(__name__, 'data/dummy_image_database/m304_02_f12_i0_0.png'), + pkg_resources.resource_filename(__name__, 'data/dummy_image_database/m304_01_p01_i0_0.png'), pkg_resources.resource_filename(__name__, 'data/dummy_image_database/m304_02_f12_i0_0.png')] - labels = [0, 0, 1, 1] + labels = [0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 1, 1] + + data, label = siamese_batch(filenames, labels, data_shape, data_type, 2, per_image_normalization=False, output_shape=output_shape) - data, label = siamese_batch(filenames, labels, data_shape, data_type, 2) with tf.Session() as session: d, l = session.run([data, label]) assert len(l) == 2 - assert d['left'].shape == (2, 250, 250, 3) - assert d['right'].shape == (2, 250, 250, 3) + assert d['left'].shape == (2, 50, 50, 3) + assert d['right'].shape == (2, 50, 50, 3) diff --git a/bob/learn/tensorflow/test/test_estimator_siamese.py b/bob/learn/tensorflow/test/test_estimator_siamese.py new file mode 100755 index 00000000..3abbd5e4 --- /dev/null +++ b/bob/learn/tensorflow/test/test_estimator_siamese.py @@ -0,0 +1,108 @@ +#!/usr/bin/env python +# vim: set fileencoding=utf-8 : +# @author: Tiago de Freitas Pereira <tiago.pereira@idiap.ch> + +import tensorflow as tf + +from bob.learn.tensorflow.network import dummy +from bob.learn.tensorflow.estimators import Siamese +from bob.learn.tensorflow.dataset.siamese_image import shuffle_data_and_labels_image_augmentation as siamese_batch +from bob.learn.tensorflow.dataset.image import shuffle_data_and_labels_image_augmentation as single_batch + +from bob.learn.tensorflow.loss import contrastive_loss +from bob.learn.tensorflow.utils.hooks import LoggerHookEstimator +from bob.learn.tensorflow.utils import reproducible +import pkg_resources + +import numpy +import shutil +import os + + +tfrecord_train = "./train_mnist.tfrecord" +tfrecord_validation = "./validation_mnist.tfrecord" +model_dir = "./temp" + +learning_rate = 0.001 +data_shape = (250, 250, 3) # size of atnt images +output_shape = (50, 50) +data_type = tf.float32 +batch_size = 4 +validation_batch_size = 2 +epochs = 1 +steps = 5000 + + +# Data +filenames = [pkg_resources.resource_filename(__name__, 'data/dummy_image_database/m301_01_p01_i0_0.png'), + pkg_resources.resource_filename(__name__, 'data/dummy_image_database/m301_01_p02_i0_0.png'), + pkg_resources.resource_filename(__name__, 'data/dummy_image_database/m301_01_p01_i0_0.png'), + pkg_resources.resource_filename(__name__, 'data/dummy_image_database/m301_01_p02_i0_0.png'), + pkg_resources.resource_filename(__name__, 'data/dummy_image_database/m301_01_p01_i0_0.png'), + pkg_resources.resource_filename(__name__, 'data/dummy_image_database/m301_01_p02_i0_0.png'), + pkg_resources.resource_filename(__name__, 'data/dummy_image_database/m301_01_p02_i0_0.png'), + pkg_resources.resource_filename(__name__, 'data/dummy_image_database/m301_01_p01_i0_0.png'), + pkg_resources.resource_filename(__name__, 'data/dummy_image_database/m301_01_p02_i0_0.png'), + + + pkg_resources.resource_filename(__name__, 'data/dummy_image_database/m304_01_p01_i0_0.png'), + pkg_resources.resource_filename(__name__, 'data/dummy_image_database/m304_02_f12_i0_0.png'), + pkg_resources.resource_filename(__name__, 'data/dummy_image_database/m304_01_p01_i0_0.png'), + pkg_resources.resource_filename(__name__, 'data/dummy_image_database/m304_02_f12_i0_0.png'), + pkg_resources.resource_filename(__name__, 'data/dummy_image_database/m304_01_p01_i0_0.png'), + pkg_resources.resource_filename(__name__, 'data/dummy_image_database/m304_02_f12_i0_0.png'), + pkg_resources.resource_filename(__name__, 'data/dummy_image_database/m304_01_p01_i0_0.png'), + pkg_resources.resource_filename(__name__, 'data/dummy_image_database/m304_02_f12_i0_0.png'), + pkg_resources.resource_filename(__name__, 'data/dummy_image_database/m304_02_f12_i0_0.png'), + ] +labels = [0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1] + + +def test_logitstrainer(): + # Trainer logits + try: + trainer = Siamese(model_dir=model_dir, + architecture=dummy, + optimizer=tf.train.GradientDescentOptimizer(learning_rate), + n_classes=10, + loss_op=contrastive_loss, + validation_batch_size=validation_batch_size) + run_siamesetrainer(trainer) + finally: + try: + shutil.rmtree(model_dir, ignore_errors=True) + #pass + except Exception: + pass + + +def run_siamesetrainer(trainer): + + # Cleaning up + tf.reset_default_graph() + assert len(tf.global_variables()) == 0 + + def input_fn(): + return siamese_batch(filenames, labels, data_shape, data_type, batch_size, epochs=epochs, output_shape=output_shape, + random_flip=True, random_brightness=True, random_contrast=True, random_saturation=True) + + def input_validation_fn(): + return single_batch(filenames, labels, data_shape, data_type, validation_batch_size, epochs=10, output_shape=output_shape) + + hooks = [LoggerHookEstimator(trainer, batch_size, 300), + + tf.train.SummarySaverHook(save_steps=1000, + output_dir=model_dir, + scaffold=tf.train.Scaffold(), + summary_writer=tf.summary.FileWriter(model_dir) )] + + trainer.train(input_fn, steps=steps, hooks=hooks) + + acc = trainer.evaluate(input_validation_fn) + assert acc['accuracy'] > 0.5 + + # Cleaning up + tf.reset_default_graph() + assert len(tf.global_variables()) == 0 + -- GitLab