diff --git a/bob/learn/tensorflow/analyzers/SoftmaxAnalizer.py b/bob/learn/tensorflow/analyzers/SoftmaxAnalizer.py index 6cb767278e9f8a5edde0c51798b70b8022d96cc1..f87cb0a60354cd8553a286622cc305ec8af973ab 100644 --- a/bob/learn/tensorflow/analyzers/SoftmaxAnalizer.py +++ b/bob/learn/tensorflow/analyzers/SoftmaxAnalizer.py @@ -31,24 +31,70 @@ class SoftmaxAnalizer(object): """ self.data_shuffler = None - self.machine = None + self.trainer = None self.session = None - def __call__(self, data_shuffler, machine, session): + def __call__(self, data_shuffler, trainer, session): if self.data_shuffler is None: self.data_shuffler = data_shuffler - self.machine = machine + self.trainer = trainer self.session = session # Creating the graph feature_batch, label_batch = self.data_shuffler.get_placeholders(name="validation_accuracy") data, labels = self.data_shuffler.get_batch() - graph = self.machine.compute_graph(feature_batch) + graph = self.trainer.architecture.compute_graph(feature_batch) + + predictions = numpy.argmax(self.session.run(graph, feed_dict={feature_batch: data[:]}), 1) + accuracy = 100. * numpy.sum(predictions == labels) / predictions.shape[0] + + summaries = [] + summaries.append(summary_pb2.Summary.Value(tag="accuracy_validation", simple_value=float(accuracy))) + return summary_pb2.Summary(value=summaries) + + +class SoftmaxSiameseAnalizer(object): + """ + Analizer. + """ + + def __init__(self): + """ + Softmax analizer + + ** Parameters ** + + data_shuffler: + graph: + session: + convergence_threshold: + convergence_reference: References to analize the convergence. Possible values are `eer`, `far10`, `far10` + + + """ + + self.data_shuffler = None + self.trainer = None + self.session = None + + def __call__(self, data_shuffler, machine, session): + + if self.data_shuffler is None: + self.data_shuffler = data_shuffler + self.trainer = trainer + self.session = session + + # Creating the graph + #feature_batch, label_batch = self.data_shuffler.get_placeholders(name="validation_accuracy") + feature_left_batch, feature_right_batch label_batch = self.data_shuffler.get_placeholders_pair(name="validation_accuracy") + + batch_left, batch_right, labels = self.data_shuffler.get_batch() + left = self.machine.compute_graph(feature_batch) predictions = numpy.argmax(self.session.run(graph, feed_dict={feature_batch: data[:]}), 1) accuracy = 100. * numpy.sum(predictions == labels) / predictions.shape[0] summaries = [] summaries.append(summary_pb2.Summary.Value(tag="accuracy_validation", simple_value=float(accuracy))) - return summary_pb2.Summary(value=summaries) \ No newline at end of file + return summary_pb2.Summary(value=summaries) diff --git a/bob/learn/tensorflow/data/BaseDataShuffler.py b/bob/learn/tensorflow/data/BaseDataShuffler.py index eacdd2b4af8603534923caaed4194dcdd19321b8..16a40b2aa12d7a508217325b2a5f2b049e0c280c 100644 --- a/bob/learn/tensorflow/data/BaseDataShuffler.py +++ b/bob/learn/tensorflow/data/BaseDataShuffler.py @@ -47,7 +47,10 @@ class BaseDataShuffler(object): self.indexes = numpy.array(range(self.n_samples)) numpy.random.shuffle(self.indexes) + # TODO: Reorganize the datas hufflers for siamese and triplets self.data_placeholder = None + self.data2_placeholder = None + self.data3_placeholder = None self.label_placeholder = None def get_placeholders_forprefetch(self, name=""): @@ -59,6 +62,21 @@ class BaseDataShuffler(object): self.label_placeholder = tf.placeholder(tf.int64, shape=[None, ]) return self.data_placeholder, self.label_placeholder + def get_placeholders_pair_forprefetch(self, name=""): + """ + Returns a place holder with the size of your batch + """ + if self.data_placeholder is None: + self.data_placeholder = tf.placeholder(tf.float32, shape=tuple([None] + list(self.shape[1:])), name=name) + + if self.data2_placeholder is None: + self.data2_placeholder = tf.placeholder(tf.float32, shape=tuple([None] + list(self.shape[1:])), name=name) + + if self.label_placeholder: + self.label_placeholder = tf.placeholder(tf.int64, shape=[None, ]) + + return self.data_placeholder, self.data2_placeholder, self.label_placeholder + def get_placeholders(self, name=""): """ Returns a place holder with the size of your batch @@ -66,9 +84,28 @@ class BaseDataShuffler(object): if self.data_placeholder is None: self.data_placeholder = tf.placeholder(tf.float32, shape=self.shape, name=name) + + if self.label_placeholder is None: self.label_placeholder = tf.placeholder(tf.int64, shape=self.shape[0]) + return self.data_placeholder, self.label_placeholder + def get_placeholders_pair(self, name=""): + """ + Returns a place holder with the size of your batch + """ + + if self.data_placeholder is None: + self.data_placeholder = tf.placeholder(tf.float32, shape=self.shape, name=name+"_right") + + if self.data2_placeholder is None: + self.data2_placeholder = tf.placeholder(tf.float32, shape=self.shape, name=name+"_left") + + if self.label_placeholder is None: + self.label_placeholder = tf.placeholder(tf.int64, shape=self.shape[0], name="label") + + return self.data_placeholder, self.data2_placeholder, self.label_placeholder + def get_genuine_or_not(self, input_data, input_labels, genuine=True): if genuine: diff --git a/bob/learn/tensorflow/script/train_mnist.py b/bob/learn/tensorflow/script/train_mnist.py index 046d83f720f50d95ff22466dd5f0b2cea5438474..945f9a34b48357e17cbc77f7e5df627173523ec9 100644 --- a/bob/learn/tensorflow/script/train_mnist.py +++ b/bob/learn/tensorflow/script/train_mnist.py @@ -87,13 +87,13 @@ def main(): batch_size=VALIDATION_BATCH_SIZE) # Preparing the architecture - cnn = False + cnn = True if cnn: architecture = Chopra(seed=SEED) #architecture = Lenet(seed=SEED) #architecture = Dummy(seed=SEED) loss = BaseLoss(tf.nn.sparse_softmax_cross_entropy_with_logits, tf.reduce_mean) - trainer = Trainer(architecture=architecture, loss=loss, iterations=ITERATIONS, prefetch=False, temp_dir="cnn") + trainer = Trainer(architecture=architecture, loss=loss, iterations=ITERATIONS, prefetch=False, temp_dir="./LOGS/cnn") trainer.train(train_data_shuffler, validation_data_shuffler) #trainer.train(train_data_shuffler) else: diff --git a/bob/learn/tensorflow/script/train_mnist_siamese.py b/bob/learn/tensorflow/script/train_mnist_siamese.py index b6680f6b68ede712fab8430da41676ff5e899eaa..b63fa2f2dd8bb2cf53d87ceeaa5ff0bfa0b1a948 100644 --- a/bob/learn/tensorflow/script/train_mnist_siamese.py +++ b/bob/learn/tensorflow/script/train_mnist_siamese.py @@ -112,28 +112,29 @@ def main(): # batch_size=VALIDATION_BATCH_SIZE) # Preparing the architecture - #n_classes = len(train_data_shuffler.possible_labels) - n_classes = 50 + n_classes = len(train_data_shuffler.possible_labels) + #n_classes = 50 cnn = True if cnn: # LENET PAPER CHOPRA - architecture = Chopra(seed=SEED) + architecture = Chopra(seed=SEED, fc1_output=n_classes) #architecture = Lenet(default_feature_layer="fc2", n_classes=n_classes, conv1_output=8, conv2_output=16,use_gpu=USE_GPU) #architecture = VGG(n_classes=n_classes, use_gpu=USE_GPU) #architecture = Dummy(seed=SEED) #architecture = LenetDropout(default_feature_layer="fc2", n_classes=n_classes, conv1_output=4, conv2_output=8, use_gpu=USE_GPU) - loss = ContrastiveLoss(contrastive_margin=3.) - optimizer = tf.train.GradientDescentOptimizer(0.00001) + loss = ContrastiveLoss(contrastive_margin=4.) + optimizer = tf.train.GradientDescentOptimizer(0.000001) trainer = SiameseTrainer(architecture=architecture, loss=loss, iterations=ITERATIONS, snapshot=VALIDATION_TEST, - optimizer=optimizer) - + optimizer=optimizer, + temp_dir="./LOGS/siamese-cnn") + #import ipdb; ipdb.set_trace(); trainer.train(train_data_shuffler, validation_data_shuffler) #trainer.train(train_data_shuffler) else: @@ -143,5 +144,6 @@ def main(): trainer = SiameseTrainer(architecture=mlp, loss=loss, iterations=ITERATIONS, - snapshot=VALIDATION_TEST) + snapshot=VALIDATION_TEST, + temp_dir="./LOGS/siamese-dnn") trainer.train(train_data_shuffler, validation_data_shuffler) diff --git a/bob/learn/tensorflow/trainers/SiameseTrainer.py b/bob/learn/tensorflow/trainers/SiameseTrainer.py index 9287deffe707344e315aaefb8256afbcb0b1a2ea..38499ee52f1b5d9fbcdfda23ce756741a8cbe33f 100644 --- a/bob/learn/tensorflow/trainers/SiameseTrainer.py +++ b/bob/learn/tensorflow/trainers/SiameseTrainer.py @@ -6,16 +6,40 @@ import logging logger = logging.getLogger("bob.learn.tensorflow") import tensorflow as tf +from tensorflow.core.framework import summary_pb2 import threading -from ..analyzers import ExperimentAnalizer +from ..analyzers import ExperimentAnalizer, SoftmaxAnalizer from ..network import SequenceNetwork import bob.io.base from .Trainer import Trainer import os import sys + class SiameseTrainer(Trainer): + """ + Trainer for siamese networks. + + **Parameters** + architecture: The architecture that you want to run. Should be a :py:class`bob.learn.tensorflow.network.SequenceNetwork` + optimizer: One of the tensorflow optimizers https://www.tensorflow.org/versions/r0.10/api_docs/python/train.html + use_gpu: Use GPUs in the training + loss: Loss + temp_dir: The output directory + + base_learning_rate: Initial learning rate + weight_decay: + convergence_threshold: + + iterations: Maximum number of iterations + snapshot: Will take a snapshot of the network at every `n` iterations + prefetch: Use extra Threads to deal with the I/O + analizer: Neural network analizer :py:mod:`bob.learn.tensorflow.analyzers` + verbosity_level: + + """ + def __init__(self, architecture, optimizer=tf.train.AdamOptimizer(), @@ -30,7 +54,13 @@ class SiameseTrainer(Trainer): ###### training options ########## convergence_threshold=0.01, iterations=5000, - snapshot=100): + snapshot=100, + prefetch=False, + + ## Analizer + analizer=SoftmaxAnalizer(), + + verbosity_level=2): super(SiameseTrainer, self).__init__( architecture=architecture, @@ -38,146 +68,171 @@ class SiameseTrainer(Trainer): use_gpu=use_gpu, loss=loss, temp_dir=temp_dir, + + # Learning rate base_learning_rate=base_learning_rate, weight_decay=weight_decay, + + ###### training options ########## convergence_threshold=convergence_threshold, iterations=iterations, - snapshot=snapshot + snapshot=snapshot, + prefetch=prefetch, + + ## Analizer + analizer=analizer, + + verbosity_level=verbosity_level ) - def train(self, train_data_shuffler, validation_data_shuffler=None): - """ - Do the loop forward --> backward --| - ^--------------------| + self.between_class_graph = None + self.within_class_graph = None + + def compute_graph(self, data_shuffler, prefetch=False, name=""): """ + Computes the graph for the trainer. - def start_thread(): - threads = [] - for n in range(1): - t = threading.Thread(target=load_and_enqueue) - t.daemon = True # thread will close when parent quits - t.start() - threads.append(t) - return threads - - def load_and_enqueue(): - """ - Injecting data in the place holder queue - """ - # for i in range(self.iterations+5): - while not thread_pool.should_stop(): - batch_left, batch_right, labels = train_data_shuffler.get_pair() - - feed_dict = {train_placeholder_left_data: batch_left, - train_placeholder_right_data: batch_right, - train_placeholder_labels: labels} - - session.run(enqueue_op, feed_dict=feed_dict) - - # TODO: find an elegant way to provide this as a parameter of the trainer - learning_rate = tf.train.exponential_decay( - self.base_learning_rate, # Learning rate - train_data_shuffler.batch_size, - train_data_shuffler.n_samples, - self.weight_decay # Decay step - ) - # Creating directory - bob.io.base.create_directories_safe(self.temp_dir) - - # Creating two graphs - train_placeholder_left_data, train_placeholder_labels = train_data_shuffler.\ - get_placeholders_forprefetch(name="train_left") - train_placeholder_right_data, _ = train_data_shuffler.get_placeholders(name="train_right") - - # Defining a placeholder queue for prefetching - queue = tf.FIFOQueue(capacity=100, - dtypes=[tf.float32, tf.float32, tf.int64], - shapes=[train_placeholder_left_data.get_shape().as_list()[1:], - train_placeholder_right_data.get_shape().as_list()[1:], - []]) - # Fetching the place holders from the queue - enqueue_op = queue.enqueue_many([train_placeholder_left_data, - train_placeholder_right_data, - train_placeholder_labels]) - train_left_feature_batch, train_right_label_batch, train_labels_batch = \ - queue.dequeue_many(train_data_shuffler.batch_size) - - # Creating the architecture for train and validation - if not isinstance(self.architecture, SequenceNetwork): - raise ValueError("The variable `architecture` must be an instance of " - "`bob.learn.tensorflow.network.SequenceNetwork`") + ** Parameters ** + + data_shuffler: Data shuffler + prefetch: + name: Name of the graph + """ + + # Defining place holders + if prefetch: + placeholder_left_data, placeholder_right_data, placeholder_labels = data_shuffler.get_placeholders_pair_forprefetch(name="train") + + # Creating two graphs + #placeholder_left_data, placeholder_labels = data_shuffler. \ + # get_placeholders_forprefetch(name="train_left") + #placeholder_right_data, _ = data_shuffler.get_placeholders(name="train_right") + feature_left_batch, feature_right_batch, label_batch = data_shuffler.get_placeholders_pair(name="train_") + + # Defining a placeholder queue for prefetching + queue = tf.FIFOQueue(capacity=100, + dtypes=[tf.float32, tf.float32, tf.int64], + shapes=[placeholder_left_data.get_shape().as_list()[1:], + placeholder_right_data.get_shape().as_list()[1:], + []]) + + # Fetching the place holders from the queue + self.enqueue_op = queue.enqueue_many([placeholder_left_data, placeholder_right_data, placeholder_labels]) + feature_left_batch, feature_right_batch, label_batch = queue.dequeue_many(data_shuffler.batch_size) + + # Creating the architecture for train and validation + if not isinstance(self.architecture, SequenceNetwork): + raise ValueError("The variable `architecture` must be an instance of " + "`bob.learn.tensorflow.network.SequenceNetwork`") + else: + feature_left_batch, feature_right_batch, label_batch = data_shuffler.get_placeholders_pair(name="train_") + #feature_left_batch, label_batch = data_shuffler.get_placeholders(name="train_left") + #feature_right_batch, _ = data_shuffler.get_placeholders(name="train_right") # Creating the siamese graph - train_left_graph = self.architecture.compute_graph(train_left_feature_batch) - train_right_graph = self.architecture.compute_graph(train_right_label_batch) - - loss_train, between_class, within_class = self.loss(train_labels_batch, - train_left_graph, - train_right_graph) - - # Preparing the optimizer - step = tf.Variable(0) - self.optimizer._learning_rate = learning_rate - optimizer = self.optimizer.minimize(loss_train, global_step=step) - #optimizer = tf.train.MomentumOptimizer(learning_rate, momentum=0.99, use_locking=False, - # name='Momentum').minimize(loss_train, global_step=step) - - print("Initializing !!") - # Training - hdf5 = bob.io.base.HDF5File(os.path.join(self.temp_dir, 'model.hdf5'), 'w') - - with tf.Session() as session: - if validation_data_shuffler is not None: - analizer = ExperimentAnalizer(validation_data_shuffler, self.architecture, session) - - tf.initialize_all_variables().run() - - # Start a thread to enqueue data asynchronously, and hide I/O latency. - thread_pool = tf.train.Coordinator() - tf.train.start_queue_runners(coord=thread_pool) - threads = start_thread() - - # TENSOR BOARD SUMMARY - train_writer = tf.train.SummaryWriter(os.path.join(self.temp_dir, 'LOGS'), session.graph) - - # Siamese specific summary - tf.scalar_summary('loss', loss_train) - tf.scalar_summary('between_class', between_class) - tf.scalar_summary('within_class', within_class) - tf.scalar_summary('lr', learning_rate) - merged = tf.merge_all_summaries() - - # Architecture summary - self.architecture.generate_summaries() - merged_validation = tf.merge_all_summaries() - - for step in range(self.iterations): - - _, l, lr, summary = session.run( - [optimizer, loss_train, learning_rate, merged]) - #_, l, lr,b,w, summary = session.run([optimizer, loss_train, learning_rate,between_class,within_class, merged]) - #_, l, lr= session.run([optimizer, loss_train, learning_rate]) - train_writer.add_summary(summary, step) - #print str(step) + " loss: {0}, bc: {1}, wc: {2}".format(l, b, w) - #print str(step) + " loss: {0}".format(l) - sys.stdout.flush() - #import ipdb; ipdb.set_trace(); - - if validation_data_shuffler is not None and step % self.snapshot == 0: - print str(step) - sys.stdout.flush() - - summary = session.run(merged_validation) - train_writer.add_summary(summary, step) - - summary = analizer() - train_writer.add_summary(summary, step) - - print("#######DONE##########") - self.architecture.save(hdf5) - del hdf5 - train_writer.close() - - thread_pool.request_stop() - thread_pool.join(threads) + train_left_graph = self.architecture.compute_graph(feature_left_batch) + train_right_graph = self.architecture.compute_graph(feature_right_batch) + + graph, between_class_graph, within_class_graph = self.loss(label_batch, + train_left_graph, + train_right_graph) + + self.between_class_graph = between_class_graph + self.within_class_graph = within_class_graph + + return graph + + def get_feed_dict(self, data_shuffler): + """ + Given a data shuffler prepared the dictionary to be injected in the graph + + ** Parameters ** + data_shuffler: + + """ + + batch_left, batch_right, labels = data_shuffler.get_pair() + placeholder_left_data, placeholder_right_data, placeholder_label = data_shuffler.get_placeholders_pair(name="train") + + feed_dict = {placeholder_left_data: batch_left, + placeholder_right_data: batch_right, + placeholder_label: labels} + + return feed_dict + + def fit(self, session, step): + """ + Run one iteration (`forward` and `backward`) + + ** Parameters ** + session: Tensorflow session + step: Iteration number + + """ + if self.prefetch: + _, l, bt_class, wt_class, lr, summary = session.run([self.optimizer, + self.training_graph, self.between_class_graph, self.within_class_graph, + self.learning_rate, self.summaries_train]) + else: + feed_dict = self.get_feed_dict(self.train_data_shuffler) + _, l, bt_class, wt_class, lr, summary = session.run([self.optimizer, + self.training_graph, self.between_class_graph, self.within_class_graph, + self.learning_rate, self.summaries_train], feed_dict=feed_dict) + + logger.info("Loss training set step={0} = {1}".format(step, l)) + self.train_summary_writter.add_summary(summary, step) + + def compute_validation(self, session, data_shuffler, step): + """ + Computes the loss in the validation set + + ** Parameters ** + session: Tensorflow session + data_shuffler: The data shuffler to be used + step: Iteration number + + """ + + if self.validation_summary_writter is None: + self.validation_summary_writter = tf.train.SummaryWriter(os.path.join(self.temp_dir, 'validation'), session.graph) + + self.validation_graph = self.compute_graph(data_shuffler, name="validation") + feed_dict = self.get_feed_dict(data_shuffler) + l = session.run(self.validation_graph, feed_dict=feed_dict) + + summaries = [] + summaries.append(summary_pb2.Summary.Value(tag="loss", simple_value=float(l))) + self.validation_summary_writter.add_summary(summary_pb2.Summary(value=summaries), step) + logger.info("Loss VALIDATION set step={0} = {1}".format(step, l)) + + def create_general_summary(self): + """ + Creates a simple tensorboard summary with the value of the loss and learning rate + """ + + # Train summary + tf.scalar_summary('loss', self.training_graph, name="train") + tf.scalar_summary('between_class_loss', self.between_class_graph, name="train") + tf.scalar_summary('within_class_loss', self.within_class_graph, name="train") + tf.scalar_summary('lr', self.learning_rate, name="train") + return tf.merge_all_summaries() + + def load_and_enqueue(self, session): + """ + Injecting data in the place holder queue + + **Parameters** + session: Tensorflow session + """ + + while not self.thread_pool.should_stop(): + + batch_left, batch_right, labels = self.train_data_shuffler.get_pair() + placeholder_left_data, placeholder_right_data, placeholder_label = self.train_data_shuffler.get_placeholders_pair() + + feed_dict = {placeholder_left_data: batch_left, + placeholder_right_data: batch_right, + placeholder_label: labels} + + session.run(self.enqueue_op, feed_dict=feed_dict) diff --git a/bob/learn/tensorflow/trainers/Trainer.py b/bob/learn/tensorflow/trainers/Trainer.py index ea3152075428af35c77d2140f4d7a77497443185..1043dd165e2f1c885f457ff3edc8a8a628763b8e 100644 --- a/bob/learn/tensorflow/trainers/Trainer.py +++ b/bob/learn/tensorflow/trainers/Trainer.py @@ -4,21 +4,42 @@ # @date: Tue 09 Aug 2016 15:25:22 CEST import logging -logger = logging.getLogger("bob.learn.tensorflow") import tensorflow as tf from ..network import SequenceNetwork import threading -import numpy import os import bob.io.base import bob.core from ..analyzers import SoftmaxAnalizer from tensorflow.core.framework import summary_pb2 +import time logger = bob.core.log.setup("bob.learn.tensorflow") -class Trainer(object): +class Trainer(object): + """ + One graph trainer. + Use this trainer when your CNN is composed by one graph + + **Parameters** + architecture: The architecture that you want to run. Should be a :py:class`bob.learn.tensorflow.network.SequenceNetwork` + optimizer: One of the tensorflow optimizers https://www.tensorflow.org/versions/r0.10/api_docs/python/train.html + use_gpu: Use GPUs in the training + loss: Loss + temp_dir: The output directory + + base_learning_rate: Initial learning rate + weight_decay: + convergence_threshold: + + iterations: Maximum number of iterations + snapshot: Will take a snapshot of the network at every `n` iterations + prefetch: Use extra Threads to deal with the I/O + analizer: Neural network analizer :py:mod:`bob.learn.tensorflow.analyzers` + verbosity_level: + + """ def __init__(self, architecture, optimizer=tf.train.AdamOptimizer(), @@ -37,22 +58,10 @@ class Trainer(object): prefetch=False, ## Analizer - analizer = SoftmaxAnalizer(), - + analizer=SoftmaxAnalizer(), verbosity_level=2): - """ - **Parameters** - architecture: The architecture that you want to run. Should be a :py:class`bob.learn.tensorflow.network.SequenceNetwork` - optimizer: One of the tensorflow optimizers https://www.tensorflow.org/versions/r0.10/api_docs/python/train.html - use_gpu: Use GPUs in the training - loss: Loss - temp_dir: - iterations: - snapshot: - convergence_threshold: - """ if not isinstance(architecture, SequenceNetwork): raise ValueError("`architecture` should be instance of `SequenceNetwork`") @@ -78,6 +87,7 @@ class Trainer(object): self.train_data_shuffler = None self.summaries_train = None self.train_summary_writter = None + self.thread_pool = None # Validation data self.validation_graph = None @@ -91,24 +101,22 @@ class Trainer(object): bob.core.log.set_verbosity_level(logger, verbosity_level) - def compute_graph(self, data_shuffler, name=""): + def compute_graph(self, data_shuffler, prefetch=False, name=""): """ Computes the graph for the trainer. + ** Parameters ** data_shuffler: Data shuffler + prefetch: name: Name of the graph """ # Defining place holders - if self.prefetch: + if prefetch: placeholder_data, placeholder_labels = data_shuffler.get_placeholders_forprefetch(name=name) - #if validation_data_shuffler is not None: - # validation_placeholder_data, validation_placeholder_labels = \ - # validation_data_shuffler.get_placeholders(name="validation") - # Defining a placeholder queue for prefetching queue = tf.FIFOQueue(capacity=10, dtypes=[tf.float32, tf.int64], @@ -133,10 +141,9 @@ class Trainer(object): def get_feed_dict(self, data_shuffler): """ - Computes the feed_dict for the graph + Given a data shuffler prepared the dictionary to be injected in the graph ** Parameters ** - data_shuffler: """ @@ -147,7 +154,16 @@ class Trainer(object): label_placeholder: labels} return feed_dict - def __fit(self, session, step): + def fit(self, session, step): + """ + Run one iteration (`forward` and `backward`) + + ** Parameters ** + session: Tensorflow session + step: Iteration number + + """ + if self.prefetch: _, l, lr, summary = session.run([self.optimizer, self.training_graph, self.learning_rate, self.summaries_train]) @@ -159,7 +175,16 @@ class Trainer(object): logger.info("Loss training set step={0} = {1}".format(step, l)) self.train_summary_writter.add_summary(summary, step) - def __compute_validation(self, session, data_shuffler, step): + def compute_validation(self, session, data_shuffler, step): + """ + Computes the loss in the validation set + + ** Parameters ** + session: Tensorflow session + data_shuffler: The data shuffler to be used + step: Iteration number + + """ if self.validation_summary_writter is None: self.validation_summary_writter = tf.train.SummaryWriter(os.path.join(self.temp_dir, 'validation'), session.graph) @@ -173,16 +198,27 @@ class Trainer(object): self.validation_summary_writter.add_summary(summary_pb2.Summary(value=summaries), step) logger.info("Loss VALIDATION set step={0} = {1}".format(step, l)) - def __create_general_summary(self): + def create_general_summary(self): + """ + Creates a simple tensorboard summary with the value of the loss and learning rate + """ + # Train summary tf.scalar_summary('loss', self.training_graph, name="train") tf.scalar_summary('lr', self.learning_rate, name="train") return tf.merge_all_summaries() def start_thread(self, session): + """ + Start pool of threads for pre-fetching + + **Parameters** + session: Tensorflow session + """ + threads = [] - for n in range(1): - t = threading.Thread(target=self.load_and_enqueue, args=(session, )) + for n in range(3): + t = threading.Thread(target=self.load_and_enqueue, args=(session,)) t.daemon = True # thread will close when parent quits t.start() threads.append(t) @@ -191,6 +227,9 @@ class Trainer(object): def load_and_enqueue(self, session): """ Injecting data in the place holder queue + + **Parameters** + session: Tensorflow session """ while not self.thread_pool.should_stop(): @@ -204,8 +243,7 @@ class Trainer(object): def train(self, train_data_shuffler, validation_data_shuffler=None): """ - Do the loop forward --> backward --| - ^--------------------| + Train the network """ # Creating directory @@ -220,14 +258,14 @@ class Trainer(object): self.weight_decay # Decay step ) - self.training_graph = self.compute_graph(train_data_shuffler, name="train") + self.training_graph = self.compute_graph(train_data_shuffler, prefetch=self.prefetch, name="train") # Preparing the optimizer self.optimizer_class._learning_rate = self.learning_rate self.optimizer = self.optimizer_class.minimize(self.training_graph, global_step=tf.Variable(0)) # Train summary - self.summaries_train = self.__create_general_summary() + self.summaries_train = self.create_general_summary() logger.info("Initializing !!") # Training @@ -247,13 +285,19 @@ class Trainer(object): self.train_summary_writter = tf.train.SummaryWriter(os.path.join(self.temp_dir, 'train'), session.graph) for step in range(self.iterations): - self.__fit(session, step) + + start = time.time() + self.fit(session, step) + end = time.time() + summary = summary_pb2.Summary.Value(tag="elapsed_time", simple_value=float(end-start)) + self.train_summary_writter.add_summary(summary_pb2.Summary(value=[summary]), step) + if validation_data_shuffler is not None and step % self.snapshot == 0: - self.__compute_validation(session, validation_data_shuffler, step) + self.compute_validation(session, validation_data_shuffler, step) if self.analizer is not None: self.validation_summary_writter.add_summary(self.analizer( - validation_data_shuffler, self.architecture, session), step) + validation_data_shuffler, self, session), step) logger.info("Training finally finished") diff --git a/setup.py b/setup.py index fbda8ace22cec761618235be1df553c3ea064f88..f82eb84852a71d334cc4e74b1381d40523e5fbef 100644 --- a/setup.py +++ b/setup.py @@ -79,6 +79,7 @@ setup( 'train_mnist_triplet.py = bob.learn.tensorflow.script.train_mnist_triplet:main', 'train_siamese_casia_webface.py = bob.learn.tensorflow.script.train_siamese_casia_webface:main', + 'train_mobio.py = bob.learn.tensorflow.script.train_mobio:main', ],