Preparing batch normalization

parent 54941120
......@@ -69,7 +69,7 @@ class Conv2D(Layer):
self.b = self.bias_initialization(shape=[self.filters],
name="b_" + str(self.name) + "bias",
scope="b_" + str(self.name)
)
)
def get_graph(self, training_phase=True):
......
......@@ -40,6 +40,10 @@ class Layer(object):
self.input_layer = None
self.activation = activation
# Batch normalization variables
self.beta = None
self.gamma = None
def create_variables(self, input_layer):
NotImplementedError("Please implement this function in derived classes")
......@@ -61,30 +65,33 @@ class Layer(object):
"""
from tensorflow.python.ops import control_flow_ops
name = "batch_norm"
with tf.variable_scope(name):
phase_train = tf.convert_to_tensor(phase_train, dtype=tf.bool)
n_out = int(x.get_shape()[-1])
beta = tf.Variable(tf.constant(0.0, shape=[n_out], dtype=x.dtype),
name=name + '/beta', trainable=True, dtype=x.dtype)
gamma = tf.Variable(tf.constant(1.0, shape=[n_out], dtype=x.dtype),
name=name + '/gamma', trainable=True, dtype=x.dtype)
# If signal
#if len(x.get_shape()) == 2:
# batch_mean, batch_var = tf.nn.moments(x, [0], name='moments_{0}'.format(name))
#else:
name = "batch_norm_" + str(self.name)
#with tf.variable_scope(name):
phase_train = tf.convert_to_tensor(phase_train, dtype=tf.bool)
n_out = int(x.get_shape()[-1])
self.beta = tf.Variable(tf.constant(0.0, shape=[n_out], dtype=x.dtype),
name=name + '_beta',
trainable=True,
dtype=x.dtype)
self.gamma = tf.Variable(tf.constant(1.0, shape=[n_out], dtype=x.dtype),
name=name + '_gamma',
trainable=True,
dtype=x.dtype)
if len(x.get_shape()) == 2:
batch_mean, batch_var = tf.nn.moments(x, [0], name='moments_{0}'.format(name))
else:
batch_mean, batch_var = tf.nn.moments(x, range(len(x.get_shape())-1), name='moments_{0}'.format(name))
ema = tf.train.ExponentialMovingAverage(decay=0.9)
ema = tf.train.ExponentialMovingAverage(decay=0.9)
def mean_var_with_update():
ema_apply_op = ema.apply([batch_mean, batch_var])
with tf.control_dependencies([ema_apply_op]):
return tf.identity(batch_mean), tf.identity(batch_var)
def mean_var_with_update():
ema_apply_op = ema.apply([batch_mean, batch_var])
with tf.control_dependencies([ema_apply_op]):
return tf.identity(batch_mean), tf.identity(batch_var)
mean, var = control_flow_ops.cond(phase_train,
mean_var_with_update,
lambda: (ema.average(batch_mean), ema.average(batch_var)))
normed = tf.nn.batch_normalization(x, mean, var, beta, gamma, 1e-3)
mean, var = control_flow_ops.cond(phase_train,
mean_var_with_update,
lambda: (ema.average(batch_mean), ema.average(batch_var)))
normed = tf.nn.batch_normalization(x, mean, var, self.beta, self.gamma, 1e-3)
return normed
......@@ -125,6 +125,11 @@ class SequenceNetwork(six.with_metaclass(abc.ABCMeta, object)):
variables[self.sequence_net[k].W.name] = self.sequence_net[k].W
variables[self.sequence_net[k].b.name] = self.sequence_net[k].b
# Dumping batch norm variables
if self.sequence_net[k].batch_norm:
variables[self.sequence_net[k].beta.name] = self.sequence_net[k].beta
variables[self.sequence_net[k].gamma.name] = self.sequence_net[k].gamma
return variables
def variable_summaries(self, var, name):
......@@ -200,7 +205,8 @@ class SequenceNetwork(six.with_metaclass(abc.ABCMeta, object)):
split_path = path.split("/")
for i in range(0, len(split_path)-1):
p = split_path[i]
hdf5.create_group(p)
if not hdf5.has_group(p):
hdf5.create_group(p)
# Saving the architecture
if self.pickle_architecture is not None:
......@@ -237,6 +243,9 @@ class SequenceNetwork(six.with_metaclass(abc.ABCMeta, object)):
session.run(self.sequence_net[k].W)
self.sequence_net[k].b.assign(hdf5.read(self.sequence_net[k].b.name)).eval(session=session)
session.run(self.sequence_net[k].b)
hdf5.cd("..")
def load(self, hdf5, shape=None, session=None, batch=1, use_gpu=False):
......@@ -276,26 +285,15 @@ class SequenceNetwork(six.with_metaclass(abc.ABCMeta, object)):
tf.initialize_all_variables().run(session=session)
self.load_variables_only(hdf5, session)
"""
def save(self, session, path, step=None):
if self.saver is None:
#variables = self.dump_variables()
#variables['mean'] = tf.Variable(10.0)
#import ipdb; ipdb.set_trace()
def save_original(self, session, saver, path):
return saver.save(session, path)
#tf.initialize_all_variables().run()
self.saver = tf.train.Saver(session)
def load_original(self, session, path):
saver = tf.train.import_meta_graph(path + ".meta")
saver.restore(session, path)
if step is None:
return self.saver.save(session, path)
else:
return self.saver.save(session, path)
def load(self, path, session=None):
if session is None:
session = tf.Session()
#if session is None:
# session = tf.Session()
#tf.initialize_all_variables().run(session=session)
# Loading variables
......@@ -308,6 +306,4 @@ class SequenceNetwork(six.with_metaclass(abc.ABCMeta, object)):
#variables['input_divide'] = self.input_divide
#variables['input_subtract'] = self.input_subtract
#self.saver = tf.train.Saver(variables)
self.saver.restore(session, path)
"""
\ No newline at end of file
#self.saver.restore(session, path)
......@@ -29,8 +29,8 @@ from bob.learn.tensorflow.loss import ContrastiveLoss, BaseLoss, TripletLoss
import numpy
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0,1,2,3"
#os.environ["CUDA_VISIBLE_DEVICES"] = ""
#os.environ["CUDA_VISIBLE_DEVICES"] = "0,1,2,3"
os.environ["CUDA_VISIBLE_DEVICES"] = ""
def main():
......@@ -45,8 +45,8 @@ def main():
import bob.db.mobio
db_mobio = bob.db.mobio.Database()
#directory = "/idiap/temp/tpereira/DEEP_FACE/CASIA_WEBFACE/mobio/preprocessed/"
directory = "./preprocessed/"
directory = "/idiap/temp/tpereira/DEEP_FACE/CASIA_WEBFACE/mobio/preprocessed/"
#directory = "./preprocessed/"
# Preparing train set
#train_objects = db_mobio.objects(protocol="male", groups="world")
......@@ -108,7 +108,7 @@ def main():
prefetch=False,
optimizer=optimizer,
snapshot=200,
temp_dir="./logs/")
temp_dir="/idiap/temp/tpereira/VGG16/mobio_world")
#trainer.train(train_data_shuffler, validation_data_shuffler)
trainer.train(train_data_shuffler)
......@@ -28,13 +28,13 @@ seed = 10
def scratch_network():
# Creating a random network
scratch = SequenceNetwork()
scratch = SequenceNetwork(default_feature_layer="fc1")
scratch.add(Conv2D(name="conv1", kernel_size=3,
filters=10,
activation=tf.nn.tanh,
weights_initialization=Xavier(seed=seed, use_gpu=False),
bias_initialization=Constant(use_gpu=False)
))
bias_initialization=Constant(use_gpu=False),
batch_norm=True))
scratch.add(FullyConnected(name="fc1", output_dim=10,
activation=None,
weights_initialization=Xavier(seed=seed, use_gpu=False),
......@@ -50,11 +50,15 @@ def validate_network(validation_data, validation_labels, directory):
input_shape=[28, 28, 1],
batch_size=validation_batch_size)
with tf.Session() as session:
validation_shape = [400, 28, 28, 1]
path = os.path.join(directory, "model.hdf5")
#path = os.path.join(directory, "model.ckp")
#scratch = SequenceNetwork(default_feature_layer="fc1")
scratch = SequenceNetwork()
scratch.load(bob.io.base.HDF5File(os.path.join(directory, "model.hdf5")),
#scratch.load_original(session, os.path.join(directory, "model.ckp"))
scratch.load(bob.io.base.HDF5File(path),
shape=validation_shape, session=session)
[data, labels] = validation_data_shuffler.get_batch()
predictions = scratch(data, session=session)
accuracy = 100. * numpy.sum(numpy.argmax(predictions, 1) == labels) / predictions.shape[0]
......@@ -91,6 +95,8 @@ def test_cnn_trainer_scratch():
temp_dir=directory)
trainer.train(train_data_shuffler)
import ipdb; ipdb.set_trace();
accuracy = validate_network(validation_data, validation_labels, directory)
assert accuracy > 80
......
......@@ -292,6 +292,9 @@ class Trainer(object):
with tf.Session(config=config) as session:
tf.initialize_all_variables().run()
# Original tensorflow saver object
saver = tf.train.Saver(var_list=tf.trainable_variables())
# Loading a pretrained model
if self.model_from_file != "":
logger.info("Loading pretrained model from {0}".format(self.model_from_file))
......@@ -328,7 +331,10 @@ class Trainer(object):
# Taking snapshot
if step % self.snapshot == 0:
logger.info("Taking snapshot")
hdf5 = bob.io.base.HDF5File(os.path.join(self.temp_dir, 'model_snapshot{0}.hdf5'.format(step)), 'w')
path = os.path.join(self.temp_dir, 'model_snapshot{0}.hdf5'.format(step))
#path_original = os.path.join(self.temp_dir, 'model_snapshot{0}.ckp'.format(step))
#self.architecture.save_original(session, saver, path_original)
hdf5 = bob.io.base.HDF5File(path, 'w')
self.architecture.save(hdf5)
del hdf5
......@@ -339,7 +345,10 @@ class Trainer(object):
self.validation_summary_writter.close()
# Saving the final network
hdf5 = bob.io.base.HDF5File(os.path.join(self.temp_dir, 'model.hdf5'), 'w')
path = os.path.join(self.temp_dir, 'model.hdf5')
#path_original = os.path.join(self.temp_dir, 'model.ckp')
#self.architecture.save_original(session, saver, path_original)
hdf5 = bob.io.base.HDF5File(path, 'w')
self.architecture.save(hdf5)
del hdf5
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment