Organizing the trainer

parent 2efd0e68
...@@ -31,24 +31,70 @@ class SoftmaxAnalizer(object): ...@@ -31,24 +31,70 @@ class SoftmaxAnalizer(object):
""" """
self.data_shuffler = None self.data_shuffler = None
self.machine = None self.trainer = None
self.session = None self.session = None
def __call__(self, data_shuffler, machine, session): def __call__(self, data_shuffler, trainer, session):
if self.data_shuffler is None: if self.data_shuffler is None:
self.data_shuffler = data_shuffler self.data_shuffler = data_shuffler
self.machine = machine self.trainer = trainer
self.session = session self.session = session
# Creating the graph # Creating the graph
feature_batch, label_batch = self.data_shuffler.get_placeholders(name="validation_accuracy") feature_batch, label_batch = self.data_shuffler.get_placeholders(name="validation_accuracy")
data, labels = self.data_shuffler.get_batch() data, labels = self.data_shuffler.get_batch()
graph = self.machine.compute_graph(feature_batch) graph = self.trainer.architecture.compute_graph(feature_batch)
predictions = numpy.argmax(self.session.run(graph, feed_dict={feature_batch: data[:]}), 1)
accuracy = 100. * numpy.sum(predictions == labels) / predictions.shape[0]
summaries = []
summaries.append(summary_pb2.Summary.Value(tag="accuracy_validation", simple_value=float(accuracy)))
return summary_pb2.Summary(value=summaries)
class SoftmaxSiameseAnalizer(object):
"""
Analizer.
"""
def __init__(self):
"""
Softmax analizer
** Parameters **
data_shuffler:
graph:
session:
convergence_threshold:
convergence_reference: References to analize the convergence. Possible values are `eer`, `far10`, `far10`
"""
self.data_shuffler = None
self.trainer = None
self.session = None
def __call__(self, data_shuffler, machine, session):
if self.data_shuffler is None:
self.data_shuffler = data_shuffler
self.trainer = trainer
self.session = session
# Creating the graph
#feature_batch, label_batch = self.data_shuffler.get_placeholders(name="validation_accuracy")
feature_left_batch, feature_right_batch label_batch = self.data_shuffler.get_placeholders_pair(name="validation_accuracy")
batch_left, batch_right, labels = self.data_shuffler.get_batch()
left = self.machine.compute_graph(feature_batch)
predictions = numpy.argmax(self.session.run(graph, feed_dict={feature_batch: data[:]}), 1) predictions = numpy.argmax(self.session.run(graph, feed_dict={feature_batch: data[:]}), 1)
accuracy = 100. * numpy.sum(predictions == labels) / predictions.shape[0] accuracy = 100. * numpy.sum(predictions == labels) / predictions.shape[0]
summaries = [] summaries = []
summaries.append(summary_pb2.Summary.Value(tag="accuracy_validation", simple_value=float(accuracy))) summaries.append(summary_pb2.Summary.Value(tag="accuracy_validation", simple_value=float(accuracy)))
return summary_pb2.Summary(value=summaries) return summary_pb2.Summary(value=summaries)
\ No newline at end of file
...@@ -47,7 +47,10 @@ class BaseDataShuffler(object): ...@@ -47,7 +47,10 @@ class BaseDataShuffler(object):
self.indexes = numpy.array(range(self.n_samples)) self.indexes = numpy.array(range(self.n_samples))
numpy.random.shuffle(self.indexes) numpy.random.shuffle(self.indexes)
# TODO: Reorganize the datas hufflers for siamese and triplets
self.data_placeholder = None self.data_placeholder = None
self.data2_placeholder = None
self.data3_placeholder = None
self.label_placeholder = None self.label_placeholder = None
def get_placeholders_forprefetch(self, name=""): def get_placeholders_forprefetch(self, name=""):
...@@ -59,6 +62,21 @@ class BaseDataShuffler(object): ...@@ -59,6 +62,21 @@ class BaseDataShuffler(object):
self.label_placeholder = tf.placeholder(tf.int64, shape=[None, ]) self.label_placeholder = tf.placeholder(tf.int64, shape=[None, ])
return self.data_placeholder, self.label_placeholder return self.data_placeholder, self.label_placeholder
def get_placeholders_pair_forprefetch(self, name=""):
"""
Returns a place holder with the size of your batch
"""
if self.data_placeholder is None:
self.data_placeholder = tf.placeholder(tf.float32, shape=tuple([None] + list(self.shape[1:])), name=name)
if self.data2_placeholder is None:
self.data2_placeholder = tf.placeholder(tf.float32, shape=tuple([None] + list(self.shape[1:])), name=name)
if self.label_placeholder:
self.label_placeholder = tf.placeholder(tf.int64, shape=[None, ])
return self.data_placeholder, self.data2_placeholder, self.label_placeholder
def get_placeholders(self, name=""): def get_placeholders(self, name=""):
""" """
Returns a place holder with the size of your batch Returns a place holder with the size of your batch
...@@ -66,9 +84,28 @@ class BaseDataShuffler(object): ...@@ -66,9 +84,28 @@ class BaseDataShuffler(object):
if self.data_placeholder is None: if self.data_placeholder is None:
self.data_placeholder = tf.placeholder(tf.float32, shape=self.shape, name=name) self.data_placeholder = tf.placeholder(tf.float32, shape=self.shape, name=name)
if self.label_placeholder is None:
self.label_placeholder = tf.placeholder(tf.int64, shape=self.shape[0]) self.label_placeholder = tf.placeholder(tf.int64, shape=self.shape[0])
return self.data_placeholder, self.label_placeholder return self.data_placeholder, self.label_placeholder
def get_placeholders_pair(self, name=""):
"""
Returns a place holder with the size of your batch
"""
if self.data_placeholder is None:
self.data_placeholder = tf.placeholder(tf.float32, shape=self.shape, name=name+"_right")
if self.data2_placeholder is None:
self.data2_placeholder = tf.placeholder(tf.float32, shape=self.shape, name=name+"_left")
if self.label_placeholder is None:
self.label_placeholder = tf.placeholder(tf.int64, shape=self.shape[0], name="label")
return self.data_placeholder, self.data2_placeholder, self.label_placeholder
def get_genuine_or_not(self, input_data, input_labels, genuine=True): def get_genuine_or_not(self, input_data, input_labels, genuine=True):
if genuine: if genuine:
......
...@@ -87,13 +87,13 @@ def main(): ...@@ -87,13 +87,13 @@ def main():
batch_size=VALIDATION_BATCH_SIZE) batch_size=VALIDATION_BATCH_SIZE)
# Preparing the architecture # Preparing the architecture
cnn = False cnn = True
if cnn: if cnn:
architecture = Chopra(seed=SEED) architecture = Chopra(seed=SEED)
#architecture = Lenet(seed=SEED) #architecture = Lenet(seed=SEED)
#architecture = Dummy(seed=SEED) #architecture = Dummy(seed=SEED)
loss = BaseLoss(tf.nn.sparse_softmax_cross_entropy_with_logits, tf.reduce_mean) loss = BaseLoss(tf.nn.sparse_softmax_cross_entropy_with_logits, tf.reduce_mean)
trainer = Trainer(architecture=architecture, loss=loss, iterations=ITERATIONS, prefetch=False, temp_dir="cnn") trainer = Trainer(architecture=architecture, loss=loss, iterations=ITERATIONS, prefetch=False, temp_dir="./LOGS/cnn")
trainer.train(train_data_shuffler, validation_data_shuffler) trainer.train(train_data_shuffler, validation_data_shuffler)
#trainer.train(train_data_shuffler) #trainer.train(train_data_shuffler)
else: else:
......
...@@ -112,28 +112,29 @@ def main(): ...@@ -112,28 +112,29 @@ def main():
# batch_size=VALIDATION_BATCH_SIZE) # batch_size=VALIDATION_BATCH_SIZE)
# Preparing the architecture # Preparing the architecture
#n_classes = len(train_data_shuffler.possible_labels) n_classes = len(train_data_shuffler.possible_labels)
n_classes = 50 #n_classes = 50
cnn = True cnn = True
if cnn: if cnn:
# LENET PAPER CHOPRA # LENET PAPER CHOPRA
architecture = Chopra(seed=SEED) architecture = Chopra(seed=SEED, fc1_output=n_classes)
#architecture = Lenet(default_feature_layer="fc2", n_classes=n_classes, conv1_output=8, conv2_output=16,use_gpu=USE_GPU) #architecture = Lenet(default_feature_layer="fc2", n_classes=n_classes, conv1_output=8, conv2_output=16,use_gpu=USE_GPU)
#architecture = VGG(n_classes=n_classes, use_gpu=USE_GPU) #architecture = VGG(n_classes=n_classes, use_gpu=USE_GPU)
#architecture = Dummy(seed=SEED) #architecture = Dummy(seed=SEED)
#architecture = LenetDropout(default_feature_layer="fc2", n_classes=n_classes, conv1_output=4, conv2_output=8, use_gpu=USE_GPU) #architecture = LenetDropout(default_feature_layer="fc2", n_classes=n_classes, conv1_output=4, conv2_output=8, use_gpu=USE_GPU)
loss = ContrastiveLoss(contrastive_margin=3.) loss = ContrastiveLoss(contrastive_margin=4.)
optimizer = tf.train.GradientDescentOptimizer(0.00001) optimizer = tf.train.GradientDescentOptimizer(0.000001)
trainer = SiameseTrainer(architecture=architecture, trainer = SiameseTrainer(architecture=architecture,
loss=loss, loss=loss,
iterations=ITERATIONS, iterations=ITERATIONS,
snapshot=VALIDATION_TEST, snapshot=VALIDATION_TEST,
optimizer=optimizer) optimizer=optimizer,
temp_dir="./LOGS/siamese-cnn")
#import ipdb; ipdb.set_trace();
trainer.train(train_data_shuffler, validation_data_shuffler) trainer.train(train_data_shuffler, validation_data_shuffler)
#trainer.train(train_data_shuffler) #trainer.train(train_data_shuffler)
else: else:
...@@ -143,5 +144,6 @@ def main(): ...@@ -143,5 +144,6 @@ def main():
trainer = SiameseTrainer(architecture=mlp, trainer = SiameseTrainer(architecture=mlp,
loss=loss, loss=loss,
iterations=ITERATIONS, iterations=ITERATIONS,
snapshot=VALIDATION_TEST) snapshot=VALIDATION_TEST,
temp_dir="./LOGS/siamese-dnn")
trainer.train(train_data_shuffler, validation_data_shuffler) trainer.train(train_data_shuffler, validation_data_shuffler)
...@@ -4,21 +4,42 @@ ...@@ -4,21 +4,42 @@
# @date: Tue 09 Aug 2016 15:25:22 CEST # @date: Tue 09 Aug 2016 15:25:22 CEST
import logging import logging
logger = logging.getLogger("bob.learn.tensorflow")
import tensorflow as tf import tensorflow as tf
from ..network import SequenceNetwork from ..network import SequenceNetwork
import threading import threading
import numpy
import os import os
import bob.io.base import bob.io.base
import bob.core import bob.core
from ..analyzers import SoftmaxAnalizer from ..analyzers import SoftmaxAnalizer
from tensorflow.core.framework import summary_pb2 from tensorflow.core.framework import summary_pb2
import time
logger = bob.core.log.setup("bob.learn.tensorflow") logger = bob.core.log.setup("bob.learn.tensorflow")
class Trainer(object):
class Trainer(object):
"""
One graph trainer.
Use this trainer when your CNN is composed by one graph
**Parameters**
architecture: The architecture that you want to run. Should be a :py:class`bob.learn.tensorflow.network.SequenceNetwork`
optimizer: One of the tensorflow optimizers https://www.tensorflow.org/versions/r0.10/api_docs/python/train.html
use_gpu: Use GPUs in the training
loss: Loss
temp_dir: The output directory
base_learning_rate: Initial learning rate
weight_decay:
convergence_threshold:
iterations: Maximum number of iterations
snapshot: Will take a snapshot of the network at every `n` iterations
prefetch: Use extra Threads to deal with the I/O
analizer: Neural network analizer :py:mod:`bob.learn.tensorflow.analyzers`
verbosity_level:
"""
def __init__(self, def __init__(self,
architecture, architecture,
optimizer=tf.train.AdamOptimizer(), optimizer=tf.train.AdamOptimizer(),
...@@ -37,22 +58,10 @@ class Trainer(object): ...@@ -37,22 +58,10 @@ class Trainer(object):
prefetch=False, prefetch=False,
## Analizer ## Analizer
analizer = SoftmaxAnalizer(), analizer=SoftmaxAnalizer(),
verbosity_level=2): verbosity_level=2):
"""
**Parameters**
architecture: The architecture that you want to run. Should be a :py:class`bob.learn.tensorflow.network.SequenceNetwork`
optimizer: One of the tensorflow optimizers https://www.tensorflow.org/versions/r0.10/api_docs/python/train.html
use_gpu: Use GPUs in the training
loss: Loss
temp_dir:
iterations:
snapshot:
convergence_threshold:
"""
if not isinstance(architecture, SequenceNetwork): if not isinstance(architecture, SequenceNetwork):
raise ValueError("`architecture` should be instance of `SequenceNetwork`") raise ValueError("`architecture` should be instance of `SequenceNetwork`")
...@@ -78,6 +87,7 @@ class Trainer(object): ...@@ -78,6 +87,7 @@ class Trainer(object):
self.train_data_shuffler = None self.train_data_shuffler = None
self.summaries_train = None self.summaries_train = None
self.train_summary_writter = None self.train_summary_writter = None
self.thread_pool = None
# Validation data # Validation data
self.validation_graph = None self.validation_graph = None
...@@ -91,24 +101,22 @@ class Trainer(object): ...@@ -91,24 +101,22 @@ class Trainer(object):
bob.core.log.set_verbosity_level(logger, verbosity_level) bob.core.log.set_verbosity_level(logger, verbosity_level)
def compute_graph(self, data_shuffler, name=""): def compute_graph(self, data_shuffler, prefetch=False, name=""):
""" """
Computes the graph for the trainer. Computes the graph for the trainer.
** Parameters ** ** Parameters **
data_shuffler: Data shuffler data_shuffler: Data shuffler
prefetch:
name: Name of the graph name: Name of the graph
""" """
# Defining place holders # Defining place holders
if self.prefetch: if prefetch:
placeholder_data, placeholder_labels = data_shuffler.get_placeholders_forprefetch(name=name) placeholder_data, placeholder_labels = data_shuffler.get_placeholders_forprefetch(name=name)
#if validation_data_shuffler is not None:
# validation_placeholder_data, validation_placeholder_labels = \
# validation_data_shuffler.get_placeholders(name="validation")
# Defining a placeholder queue for prefetching # Defining a placeholder queue for prefetching
queue = tf.FIFOQueue(capacity=10, queue = tf.FIFOQueue(capacity=10,
dtypes=[tf.float32, tf.int64], dtypes=[tf.float32, tf.int64],
...@@ -133,10 +141,9 @@ class Trainer(object): ...@@ -133,10 +141,9 @@ class Trainer(object):
def get_feed_dict(self, data_shuffler): def get_feed_dict(self, data_shuffler):
""" """
Computes the feed_dict for the graph Given a data shuffler prepared the dictionary to be injected in the graph
** Parameters ** ** Parameters **
data_shuffler: data_shuffler:
""" """
...@@ -147,7 +154,16 @@ class Trainer(object): ...@@ -147,7 +154,16 @@ class Trainer(object):
label_placeholder: labels} label_placeholder: labels}
return feed_dict return feed_dict
def __fit(self, session, step): def fit(self, session, step):
"""
Run one iteration (`forward` and `backward`)
** Parameters **
session: Tensorflow session
step: Iteration number
"""
if self.prefetch: if self.prefetch:
_, l, lr, summary = session.run([self.optimizer, self.training_graph, _, l, lr, summary = session.run([self.optimizer, self.training_graph,
self.learning_rate, self.summaries_train]) self.learning_rate, self.summaries_train])
...@@ -159,7 +175,16 @@ class Trainer(object): ...@@ -159,7 +175,16 @@ class Trainer(object):
logger.info("Loss training set step={0} = {1}".format(step, l)) logger.info("Loss training set step={0} = {1}".format(step, l))
self.train_summary_writter.add_summary(summary, step) self.train_summary_writter.add_summary(summary, step)
def __compute_validation(self, session, data_shuffler, step): def compute_validation(self, session, data_shuffler, step):
"""
Computes the loss in the validation set
** Parameters **
session: Tensorflow session
data_shuffler: The data shuffler to be used
step: Iteration number
"""
if self.validation_summary_writter is None: if self.validation_summary_writter is None:
self.validation_summary_writter = tf.train.SummaryWriter(os.path.join(self.temp_dir, 'validation'), session.graph) self.validation_summary_writter = tf.train.SummaryWriter(os.path.join(self.temp_dir, 'validation'), session.graph)
...@@ -173,16 +198,27 @@ class Trainer(object): ...@@ -173,16 +198,27 @@ class Trainer(object):
self.validation_summary_writter.add_summary(summary_pb2.Summary(value=summaries), step) self.validation_summary_writter.add_summary(summary_pb2.Summary(value=summaries), step)
logger.info("Loss VALIDATION set step={0} = {1}".format(step, l)) logger.info("Loss VALIDATION set step={0} = {1}".format(step, l))
def __create_general_summary(self): def create_general_summary(self):
"""
Creates a simple tensorboard summary with the value of the loss and learning rate
"""
# Train summary # Train summary
tf.scalar_summary('loss', self.training_graph, name="train") tf.scalar_summary('loss', self.training_graph, name="train")
tf.scalar_summary('lr', self.learning_rate, name="train") tf.scalar_summary('lr', self.learning_rate, name="train")
return tf.merge_all_summaries() return tf.merge_all_summaries()
def start_thread(self, session): def start_thread(self, session):
"""
Start pool of threads for pre-fetching
**Parameters**
session: Tensorflow session
"""
threads = [] threads = []
for n in range(1): for n in range(3):
t = threading.Thread(target=self.load_and_enqueue, args=(session, )) t = threading.Thread(target=self.load_and_enqueue, args=(session,))
t.daemon = True # thread will close when parent quits t.daemon = True # thread will close when parent quits
t.start() t.start()
threads.append(t) threads.append(t)
...@@ -191,6 +227,9 @@ class Trainer(object): ...@@ -191,6 +227,9 @@ class Trainer(object):
def load_and_enqueue(self, session): def load_and_enqueue(self, session):
""" """
Injecting data in the place holder queue Injecting data in the place holder queue
**Parameters**
session: Tensorflow session
""" """
while not self.thread_pool.should_stop(): while not self.thread_pool.should_stop():
...@@ -204,8 +243,7 @@ class Trainer(object): ...@@ -204,8 +243,7 @@ class Trainer(object):
def train(self, train_data_shuffler, validation_data_shuffler=None): def train(self, train_data_shuffler, validation_data_shuffler=None):
""" """
Do the loop forward --> backward --| Train the network
^--------------------|
""" """
# Creating directory # Creating directory
...@@ -220,14 +258,14 @@ class Trainer(object): ...@@ -220,14 +258,14 @@ class Trainer(object):
self.weight_decay # Decay step self.weight_decay # Decay step
) )
self.training_graph = self.compute_graph(train_data_shuffler, name="train") self.training_graph = self.compute_graph(train_data_shuffler, prefetch=self.prefetch, name="train")
# Preparing the optimizer # Preparing the optimizer
self.optimizer_class._learning_rate = self.learning_rate self.optimizer_class._learning_rate = self.learning_rate
self.optimizer = self.optimizer_class.minimize(self.training_graph, global_step=tf.Variable(0)) self.optimizer = self.optimizer_class.minimize(self.training_graph, global_step=tf.Variable(0))
# Train summary # Train summary
self.summaries_train = self.__create_general_summary() self.summaries_train = self.create_general_summary()
logger.info("Initializing !!") logger.info("Initializing !!")
# Training # Training
...@@ -247,13 +285,19 @@ class Trainer(object): ...@@ -247,13 +285,19 @@ class Trainer(object):
self.train_summary_writter = tf.train.SummaryWriter(os.path.join(self.temp_dir, 'train'), session.graph) self.train_summary_writter = tf.train.SummaryWriter(os.path.join(self.temp_dir, 'train'), session.graph)
for step in range(self.iterations): for step in range(self.iterations):
self.__fit(session, step)
start = time.time()
self.fit(session, step)
end = time.time()
summary = summary_pb2.Summary.Value(tag="elapsed_time", simple_value=float(end-start))
self.train_summary_writter.add_summary(summary_pb2.Summary(value=[summary]), step)
if validation_data_shuffler is not None and step % self.snapshot == 0: if validation_data_shuffler is not None and step % self.snapshot == 0:
self.__compute_validation(session, validation_data_shuffler, step) self.compute_validation(session, validation_data_shuffler, step)
if self.analizer is not None: if self.analizer is not None:
self.validation_summary_writter.add_summary(self.analizer( self.validation_summary_writter.add_summary(self.analizer(
validation_data_shuffler, self.architecture, session), step) validation_data_shuffler, self, session), step)
logger.info("Training finally finished") logger.info("Training finally finished")
......
...@@ -79,6 +79,7 @@ setup( ...@@ -79,6 +79,7 @@ setup(
'train_mnist_triplet.py = bob.learn.tensorflow.script.train_mnist_triplet:main', 'train_mnist_triplet.py = bob.learn.tensorflow.script.train_mnist_triplet:main',
'train_siamese_casia_webface.py = bob.learn.tensorflow.script.train_siamese_casia_webface:main', 'train_siamese_casia_webface.py = bob.learn.tensorflow.script.train_siamese_casia_webface:main',
'train_mobio.py = bob.learn.tensorflow.script.train_mobio:main',
], ],
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment