From 136a436c0e3b7762ff7e0c5d1cdcdd075140d9bd Mon Sep 17 00:00:00 2001 From: Tiago Freitas Pereira Date: Mon, 31 Oct 2016 09:53:51 +0100 Subject: [PATCH] Setting up batch normalization --- bob/learn/tensorflow/layers/FullyConnected.py | 4 ++-- bob/learn/tensorflow/layers/InputLayer.py | 2 +- bob/learn/tensorflow/layers/Layer.py | 11 ++++++++--- bob/learn/tensorflow/network/SequenceNetwork.py | 2 +- bob/learn/tensorflow/test/test_cnn.py | 2 +- bob/learn/tensorflow/test/test_cnn_scratch.py | 7 +++++-- 6 files changed, 18 insertions(+), 10 deletions(-) diff --git a/bob/learn/tensorflow/layers/FullyConnected.py b/bob/learn/tensorflow/layers/FullyConnected.py index 586e448..80bce41 100644 --- a/bob/learn/tensorflow/layers/FullyConnected.py +++ b/bob/learn/tensorflow/layers/FullyConnected.py @@ -8,6 +8,7 @@ from .Layer import Layer from operator import mul from bob.learn.tensorflow.initialization import Xavier from bob.learn.tensorflow.initialization import Constant +import numpy class FullyConnected(Layer): @@ -68,8 +69,7 @@ class FullyConnected(Layer): if len(self.input_layer.get_shape()) == 4: shape = self.input_layer.get_shape().as_list() - #fc = tf.reshape(self.input_layer, [shape[0], shape[1] * shape[2] * shape[3]]) - fc = tf.reshape(self.input_layer, [-1, shape[1] * shape[2] * shape[3]]) + fc = tf.reshape(self.input_layer, [-1, numpy.prod(shape[1:])]) else: fc = self.input_layer diff --git a/bob/learn/tensorflow/layers/InputLayer.py b/bob/learn/tensorflow/layers/InputLayer.py index 2f1e1d7..9a8a468 100644 --- a/bob/learn/tensorflow/layers/InputLayer.py +++ b/bob/learn/tensorflow/layers/InputLayer.py @@ -21,5 +21,5 @@ class InputLayer(Layer): def create_variables(self, input_layer): return - def get_graph(self): + def get_graph(self, training_phase=True): return self.original_layer diff --git a/bob/learn/tensorflow/layers/Layer.py b/bob/learn/tensorflow/layers/Layer.py index 67cd2b7..bdca659 100644 --- a/bob/learn/tensorflow/layers/Layer.py +++ b/bob/learn/tensorflow/layers/Layer.py @@ -61,16 +61,21 @@ class Layer(object): """ from tensorflow.python.ops import control_flow_ops - name = 'batch_norm' + name = "batch_norm" with tf.variable_scope(name): phase_train = tf.convert_to_tensor(phase_train, dtype=tf.bool) - n_out = int(x.get_shape()[3]) + n_out = int(x.get_shape()[-1]) beta = tf.Variable(tf.constant(0.0, shape=[n_out], dtype=x.dtype), name=name + '/beta', trainable=True, dtype=x.dtype) gamma = tf.Variable(tf.constant(1.0, shape=[n_out], dtype=x.dtype), name=name + '/gamma', trainable=True, dtype=x.dtype) - batch_mean, batch_var = tf.nn.moments(x, [0, 1, 2], name='moments') + # If signal + #if len(x.get_shape()) == 2: + # batch_mean, batch_var = tf.nn.moments(x, [0], name='moments_{0}'.format(name)) + #else: + batch_mean, batch_var = tf.nn.moments(x, range(len(x.get_shape())-1), name='moments_{0}'.format(name)) + ema = tf.train.ExponentialMovingAverage(decay=0.9) def mean_var_with_update(): diff --git a/bob/learn/tensorflow/network/SequenceNetwork.py b/bob/learn/tensorflow/network/SequenceNetwork.py index 595a928..2697f95 100644 --- a/bob/learn/tensorflow/network/SequenceNetwork.py +++ b/bob/learn/tensorflow/network/SequenceNetwork.py @@ -70,7 +70,7 @@ class SequenceNetwork(six.with_metaclass(abc.ABCMeta, object)): if training or not isinstance(current_layer, Dropout): current_layer.create_variables(input_offset) - input_offset = current_layer.get_graph() + input_offset = current_layer.get_graph(training_phase=training) if feature_layer is not None and k == feature_layer: return input_offset diff --git a/bob/learn/tensorflow/test/test_cnn.py b/bob/learn/tensorflow/test/test_cnn.py index 9336fee..35bb0ea 100644 --- a/bob/learn/tensorflow/test/test_cnn.py +++ b/bob/learn/tensorflow/test/test_cnn.py @@ -5,7 +5,7 @@ import numpy from bob.learn.tensorflow.datashuffler import Memory, SiameseMemory, TripletMemory, Disk, SiameseDisk, TripletDisk, ImageAugmentation -from bob.learn.tensorflow.network import Chopra, Lenet +from bob.learn.tensorflow.network import Chopra from bob.learn.tensorflow.loss import BaseLoss, ContrastiveLoss, TripletLoss from bob.learn.tensorflow.trainers import Trainer, SiameseTrainer, TripletTrainer, constant diff --git a/bob/learn/tensorflow/test/test_cnn_scratch.py b/bob/learn/tensorflow/test/test_cnn_scratch.py index 66b813f..484d85a 100644 --- a/bob/learn/tensorflow/test/test_cnn_scratch.py +++ b/bob/learn/tensorflow/test/test_cnn_scratch.py @@ -33,11 +33,13 @@ def scratch_network(): filters=10, activation=tf.nn.tanh, weights_initialization=Xavier(seed=seed, use_gpu=False), - bias_initialization=Constant(use_gpu=False))) + bias_initialization=Constant(use_gpu=False) + )) scratch.add(FullyConnected(name="fc1", output_dim=10, activation=None, weights_initialization=Xavier(seed=seed, use_gpu=False), - bias_initialization=Constant(use_gpu=False))) + bias_initialization=Constant(use_gpu=False) + )) return scratch @@ -90,6 +92,7 @@ def test_cnn_trainer_scratch(): trainer.train(train_data_shuffler) accuracy = validate_network(validation_data, validation_labels, directory) + assert accuracy > 80 del scratch -- 2.21.0