Solved issues with the trainer

parent bc57c932
...@@ -22,7 +22,7 @@ class Constant(Initialization): ...@@ -22,7 +22,7 @@ class Constant(Initialization):
""" """
def __init__(self, constant_value=0.1, use_gpu=False): def __init__(self, constant_value=0.1, use_gpu=False, seed=None):
self.constant_value = constant_value self.constant_value = constant_value
super(Constant, self).__init__(seed=None, use_gpu=use_gpu) super(Constant, self).__init__(seed=None, use_gpu=use_gpu)
......
...@@ -20,8 +20,7 @@ class Conv2D(Layer): ...@@ -20,8 +20,7 @@ class Conv2D(Layer):
filters=8, filters=8,
weights_initialization=Xavier(), weights_initialization=Xavier(),
bias_initialization=Constant(), bias_initialization=Constant(),
use_gpu=False, use_gpu=False
seed=10
): ):
""" """
Constructor Constructor
...@@ -39,8 +38,8 @@ class Conv2D(Layer): ...@@ -39,8 +38,8 @@ class Conv2D(Layer):
activation=activation, activation=activation,
weights_initialization=weights_initialization, weights_initialization=weights_initialization,
bias_initialization=bias_initialization, bias_initialization=bias_initialization,
use_gpu=use_gpu, use_gpu=use_gpu
seed=seed) )
self.kernel_size = kernel_size self.kernel_size = kernel_size
self.filters = filters self.filters = filters
self.W = None self.W = None
...@@ -57,7 +56,8 @@ class Conv2D(Layer): ...@@ -57,7 +56,8 @@ class Conv2D(Layer):
if self.W is None: if self.W is None:
self.W = self.weights_initialization(shape=[self.kernel_size, self.kernel_size, n_channels, self.filters], self.W = self.weights_initialization(shape=[self.kernel_size, self.kernel_size, n_channels, self.filters],
name="w_" + str(self.name)) name="w_" + str(self.name)
)
if self.activation is not None: if self.activation is not None:
self.b = self.bias_initialization(shape=[self.filters], self.b = self.bias_initialization(shape=[self.filters],
......
...@@ -10,7 +10,6 @@ from bob.learn.tensorflow.initialization import Xavier ...@@ -10,7 +10,6 @@ from bob.learn.tensorflow.initialization import Xavier
from bob.learn.tensorflow.initialization import Constant from bob.learn.tensorflow.initialization import Constant
class FullyConnected(Layer): class FullyConnected(Layer):
""" """
...@@ -23,7 +22,6 @@ class FullyConnected(Layer): ...@@ -23,7 +22,6 @@ class FullyConnected(Layer):
weights_initialization=Xavier(), weights_initialization=Xavier(),
bias_initialization=Constant(), bias_initialization=Constant(),
use_gpu=False, use_gpu=False,
seed=10
): ):
""" """
Constructor Constructor
...@@ -40,8 +38,7 @@ class FullyConnected(Layer): ...@@ -40,8 +38,7 @@ class FullyConnected(Layer):
activation=activation, activation=activation,
weights_initialization=weights_initialization, weights_initialization=weights_initialization,
bias_initialization=bias_initialization, bias_initialization=bias_initialization,
use_gpu=use_gpu, use_gpu=use_gpu)
seed=seed)
self.output_dim = output_dim self.output_dim = output_dim
self.W = None self.W = None
......
...@@ -18,8 +18,7 @@ class Layer(object): ...@@ -18,8 +18,7 @@ class Layer(object):
activation=None, activation=None,
weights_initialization=Xavier(), weights_initialization=Xavier(),
bias_initialization=Constant(), bias_initialization=Constant(),
use_gpu=False, use_gpu=False):
seed=10):
""" """
Base constructor Base constructor
...@@ -35,7 +34,6 @@ class Layer(object): ...@@ -35,7 +34,6 @@ class Layer(object):
self.weights_initialization = weights_initialization self.weights_initialization = weights_initialization
self.bias_initialization = bias_initialization self.bias_initialization = bias_initialization
self.use_gpu = use_gpu self.use_gpu = use_gpu
self.seed = seed
self.input_layer = None self.input_layer = None
self.activation = activation self.activation = activation
......
...@@ -11,6 +11,9 @@ import tensorflow as tf ...@@ -11,6 +11,9 @@ import tensorflow as tf
from .SequenceNetwork import SequenceNetwork from .SequenceNetwork import SequenceNetwork
from ..layers import Conv2D, FullyConnected, MaxPooling from ..layers import Conv2D, FullyConnected, MaxPooling
import bob.learn.tensorflow import bob.learn.tensorflow
from bob.learn.tensorflow.initialization import Xavier
from bob.learn.tensorflow.initialization import Constant
class Lenet(SequenceNetwork): class Lenet(SequenceNetwork):
...@@ -25,7 +28,8 @@ class Lenet(SequenceNetwork): ...@@ -25,7 +28,8 @@ class Lenet(SequenceNetwork):
n_classes=10, n_classes=10,
default_feature_layer="fc2", default_feature_layer="fc2",
seed=10, use_gpu = False): seed=10,
use_gpu = False):
""" """
Create all the necessary variables for this CNN Create all the necessary variables for this CNN
...@@ -44,9 +48,25 @@ class Lenet(SequenceNetwork): ...@@ -44,9 +48,25 @@ class Lenet(SequenceNetwork):
super(Lenet, self).__init__(default_feature_layer=default_feature_layer) super(Lenet, self).__init__(default_feature_layer=default_feature_layer)
self.add(Conv2D(name="conv1", kernel_size=conv1_kernel_size, self.add(Conv2D(name="conv1", kernel_size=conv1_kernel_size,
filters=conv1_output, activation=tf.nn.tanh)) filters=conv1_output,
activation=tf.nn.tanh,
weights_initialization=Xavier(seed=seed),
bias_initialization=Constant()
))
self.add(MaxPooling(name="pooling1")) self.add(MaxPooling(name="pooling1"))
self.add(Conv2D(name="conv2", kernel_size=conv2_kernel_size, filters=conv2_output, activation=tf.nn.tanh)) self.add(Conv2D(name="conv2", kernel_size=conv2_kernel_size,
filters=conv2_output,
activation=tf.nn.tanh,
weights_initialization=Xavier(seed=seed),
bias_initialization=Constant()
))
self.add(MaxPooling(name="pooling2")) self.add(MaxPooling(name="pooling2"))
self.add(FullyConnected(name="fc1", output_dim=fc1_output, activation=tf.nn.tanh)) self.add(FullyConnected(name="fc1", output_dim=fc1_output,
self.add(FullyConnected(name="fc2", output_dim=n_classes, activation=None)) activation=tf.nn.tanh,
weights_initialization=Xavier(seed=seed),
bias_initialization=Constant()
))
self.add(FullyConnected(name="fc2", output_dim=n_classes,
activation=None,
weights_initialization=Xavier(seed=seed),
bias_initialization=Constant()))
...@@ -21,7 +21,8 @@ class SequenceNetwork(six.with_metaclass(abc.ABCMeta, object)): ...@@ -21,7 +21,8 @@ class SequenceNetwork(six.with_metaclass(abc.ABCMeta, object)):
Base class to create architectures using TensorFlow Base class to create architectures using TensorFlow
""" """
def __init__(self, default_feature_layer=None): def __init__(self,
default_feature_layer=None):
""" """
Base constructor Base constructor
...@@ -72,13 +73,8 @@ class SequenceNetwork(six.with_metaclass(abc.ABCMeta, object)): ...@@ -72,13 +73,8 @@ class SequenceNetwork(six.with_metaclass(abc.ABCMeta, object)):
if session is None: if session is None:
session = tf.Session() session = tf.Session()
batch_size = data.shape[0]
width = data.shape[1]
height = data.shape[2]
channels = data.shape[3]
# Feeding the placeholder # Feeding the placeholder
feature_placeholder = tf.placeholder(tf.float32, shape=(batch_size, width, height, channels), name="feature") feature_placeholder = tf.placeholder(tf.float32, shape=data.shape, name="feature")
feed_dict = {feature_placeholder: data} feed_dict = {feature_placeholder: data}
if feature_layer is None: if feature_layer is None:
......
...@@ -4,6 +4,7 @@ __path__ = extend_path(__path__, __name__) ...@@ -4,6 +4,7 @@ __path__ = extend_path(__path__, __name__)
from .SequenceNetwork import SequenceNetwork from .SequenceNetwork import SequenceNetwork
from .Lenet import Lenet from .Lenet import Lenet
from .MLP import MLP
# gets sphinx autodoc done right - don't remove it # gets sphinx autodoc done right - don't remove it
__all__ = [_ for _ in dir() if not _.startswith('_')] __all__ = [_ for _ in dir() if not _.startswith('_')]
...@@ -23,7 +23,7 @@ import tensorflow as tf ...@@ -23,7 +23,7 @@ import tensorflow as tf
from .. import util from .. import util
SEED = 10 SEED = 10
from bob.learn.tensorflow.data import MemoryDataShuffler, TextDataShuffler from bob.learn.tensorflow.data import MemoryDataShuffler, TextDataShuffler
from bob.learn.tensorflow.network import Lenet from bob.learn.tensorflow.network import Lenet, MLP
from bob.learn.tensorflow.trainers import Trainer from bob.learn.tensorflow.trainers import Trainer
from bob.learn.tensorflow.loss import BaseLoss from bob.learn.tensorflow.loss import BaseLoss
...@@ -85,11 +85,17 @@ def main(): ...@@ -85,11 +85,17 @@ def main():
input_shape=[80, 64, 1], input_shape=[80, 64, 1],
scale=False, scale=False,
batch_size=VALIDATION_BATCH_SIZE) batch_size=VALIDATION_BATCH_SIZE)
# Preparing the architecture
lenet = Lenet()
# Preparing the architecture
cnn = True
if cnn:
lenet = Lenet(seed=SEED)
loss = BaseLoss(tf.nn.sparse_softmax_cross_entropy_with_logits, tf.reduce_mean) loss = BaseLoss(tf.nn.sparse_softmax_cross_entropy_with_logits, tf.reduce_mean)
trainer = Trainer(architecture=lenet, loss=loss, iterations=ITERATIONS) trainer = Trainer(architecture=lenet, loss=loss, iterations=ITERATIONS)
trainer.train(train_data_shuffler, validation_data_shuffler) trainer.train(train_data_shuffler, validation_data_shuffler)
else:
mlp = MLP(10, hidden_layers=[15, 20])
loss = BaseLoss(tf.nn.sparse_softmax_cross_entropy_with_logits, tf.reduce_mean)
trainer = Trainer(architecture=mlp, loss=loss, iterations=ITERATIONS)
trainer.train(train_data_shuffler, validation_data_shuffler)
...@@ -23,10 +23,9 @@ import tensorflow as tf ...@@ -23,10 +23,9 @@ import tensorflow as tf
from .. import util from .. import util
SEED = 10 SEED = 10
from bob.learn.tensorflow.data import MemoryDataShuffler, TextDataShuffler from bob.learn.tensorflow.data import MemoryDataShuffler, TextDataShuffler
from bob.learn.tensorflow.network import Lenet from bob.learn.tensorflow.network import Lenet, MLP
from bob.learn.tensorflow.trainers import SiameseTrainer from bob.learn.tensorflow.trainers import SiameseTrainer
from bob.learn.tensorflow.loss import ContrastiveLoss from bob.learn.tensorflow.loss import ContrastiveLoss
import bob.db.mobio
import numpy import numpy
def main(): def main():
...@@ -40,7 +39,7 @@ def main(): ...@@ -40,7 +39,7 @@ def main():
perc_train = 0.9 perc_train = 0.9
# Loading data # Loading data
mnist = False mnist = True
if mnist: if mnist:
train_data, train_labels, validation_data, validation_labels = \ train_data, train_labels, validation_data, validation_labels = \
...@@ -89,15 +88,25 @@ def main(): ...@@ -89,15 +88,25 @@ def main():
# Preparing the architecture # Preparing the architecture
n_classes = len(train_data_shuffler.possible_labels) n_classes = len(train_data_shuffler.possible_labels)
cnn = True
if cnn:
lenet = Lenet(default_feature_layer="fc2", n_classes=n_classes) lenet = Lenet(default_feature_layer="fc2", n_classes=n_classes)
loss = ContrastiveLoss() loss = ContrastiveLoss()
trainer = SiameseTrainer(architecture=lenet, trainer = SiameseTrainer(architecture=lenet,
loss=loss, loss=loss,
iterations=ITERATIONS, iterations=ITERATIONS,
base_lr=0.0001,
save_intermediate=False,
snapshot=VALIDATION_TEST) snapshot=VALIDATION_TEST)
trainer.train(train_data_shuffler, validation_data_shuffler) trainer.train(train_data_shuffler, validation_data_shuffler)
else:
mlp = MLP(n_classes, hidden_layers=[15, 20])
loss = ContrastiveLoss()
trainer = SiameseTrainer(architecture=mlp,
loss=loss,
iterations=ITERATIONS,
snapshot=VALIDATION_TEST)
trainer.train(train_data_shuffler, validation_data_shuffler)
...@@ -10,44 +10,41 @@ import threading ...@@ -10,44 +10,41 @@ import threading
from ..analyzers import Analizer from ..analyzers import Analizer
from ..network import SequenceNetwork from ..network import SequenceNetwork
import bob.io.base import bob.io.base
from .Trainer import Trainer
import os import os
class SiameseTrainer(object): class SiameseTrainer(Trainer):
def __init__(self, def __init__(self,
architecture,
architecture=None, optimizer=tf.train.AdamOptimizer(),
use_gpu=False, use_gpu=False,
loss=None, loss=None,
temp_dir="", temp_dir="",
save_intermediate=False,
# Learning rate
base_learning_rate=0.001,
weight_decay=0.9,
###### training options ########## ###### training options ##########
convergence_threshold = 0.01, convergence_threshold=0.01,
iterations=5000, iterations=5000,
base_lr=0.001,
momentum=0.9,
weight_decay=0.95,
# The learning rate policy
snapshot=100): snapshot=100):
self.loss = loss super(SiameseTrainer, self).__init__(
self.loss_instance = None architecture=architecture,
self.optimizer = None optimizer=optimizer,
self.temp_dir = temp_dir use_gpu=use_gpu,
self.save_intermediate = save_intermediate loss=loss,
temp_dir=temp_dir,
self.architecture = architecture base_learning_rate=base_learning_rate,
self.use_gpu = use_gpu weight_decay=weight_decay,
convergence_threshold=convergence_threshold,
iterations=iterations,
snapshot=snapshot
)
self.iterations = iterations
self.snapshot = snapshot
self.base_lr = base_lr
self.momentum = momentum
self.weight_decay = weight_decay
self.convergence_threshold = convergence_threshold
def train(self, train_data_shuffler, validation_data_shuffler=None): def train(self, train_data_shuffler, validation_data_shuffler=None):
""" """
...@@ -77,6 +74,14 @@ class SiameseTrainer(object): ...@@ -77,6 +74,14 @@ class SiameseTrainer(object):
session.run(enqueue_op, feed_dict=feed_dict) session.run(enqueue_op, feed_dict=feed_dict)
# TODO: find an elegant way to provide this as a parameter of the trainer
learning_rate = tf.train.exponential_decay(
self.base_learning_rate, # Learning rate
train_data_shuffler.batch_size,
train_data_shuffler.n_samples,
self.weight_decay # Decay step
)
bob.io.base.create_directories_safe(os.path.join(self.temp_dir, 'OUTPUT')) bob.io.base.create_directories_safe(os.path.join(self.temp_dir, 'OUTPUT'))
# Creating two graphs # Creating two graphs
...@@ -110,18 +115,9 @@ class SiameseTrainer(object): ...@@ -110,18 +115,9 @@ class SiameseTrainer(object):
train_left_graph, train_left_graph,
train_right_graph) train_right_graph)
batch = tf.Variable(0) # Preparing the optimizer
self.optimizer._learning_rate = learning_rate
learning_rate = tf.train.exponential_decay( optimizer = self.optimizer.minimize(loss_train, global_step=tf.Variable(0))
self.base_lr, # Learning rate
batch * train_data_shuffler.batch_size,
train_data_shuffler.n_samples,
self.weight_decay # Decay step
)
#optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss_train,
# global_step=batch)
optimizer = tf.train.MomentumOptimizer(learning_rate, momentum=0.99, use_locking=False,
name='Momentum').minimize(loss_train, global_step=batch)
print("Initializing !!") print("Initializing !!")
# Training # Training
...@@ -154,8 +150,8 @@ class SiameseTrainer(object): ...@@ -154,8 +150,8 @@ class SiameseTrainer(object):
if validation_data_shuffler is not None and step % self.snapshot == 0: if validation_data_shuffler is not None and step % self.snapshot == 0:
analizer() analizer()
if self.save_intermediate: #if self.save_intermediate:
self.architecture.save(hdf5, step) # self.architecture.save(hdf5, step)
print str(step) + " - " + str(analizer.eer[-1]) print str(step) + " - " + str(analizer.eer[-1])
self.architecture.save(hdf5) self.architecture.save(hdf5)
......
...@@ -16,36 +16,46 @@ import bob.io.base ...@@ -16,36 +16,46 @@ import bob.io.base
class Trainer(object): class Trainer(object):
def __init__(self, def __init__(self,
architecture,
architecture=None, optimizer=tf.train.AdamOptimizer(),
use_gpu=False, use_gpu=False,
loss=None, loss=None,
temp_dir="", temp_dir="",
# Learning rate
base_learning_rate=0.001,
weight_decay=0.9,
###### training options ########## ###### training options ##########
convergence_threshold = 0.01, convergence_threshold=0.01,
iterations=5000, iterations=5000,
base_lr=0.001,
momentum=0.9,
weight_decay=0.95,
# The learning rate policy
snapshot=100): snapshot=100):
"""
self.loss = loss **Parameters**
self.loss_instance = None architecture: The architecture that you want to run. Should be a :py:class`bob.learn.tensorflow.network.SequenceNetwork`
self.optimizer = None optimizer: One of the tensorflow optimizers https://www.tensorflow.org/versions/r0.10/api_docs/python/train.html
self.temp_dir=temp_dir use_gpu: Use GPUs in the training
loss: Loss
temp_dir:
iterations:
snapshot:
convergence_threshold:
"""
if not isinstance(architecture, SequenceNetwork):
raise ValueError("`architecture` should be instance of `SequenceNetwork`")
self.architecture = architecture self.architecture = architecture
self.optimizer = optimizer
self.use_gpu = use_gpu self.use_gpu = use_gpu
self.loss = loss
self.temp_dir = temp_dir
self.base_learning_rate = base_learning_rate
self.weight_decay = weight_decay
self.iterations = iterations self.iterations = iterations
self.snapshot = snapshot self.snapshot = snapshot
self.base_lr = base_lr
self.momentum = momentum
self.weight_decay = weight_decay
self.convergence_threshold = convergence_threshold self.convergence_threshold = convergence_threshold
def train(self, train_data_shuffler, validation_data_shuffler=None): def train(self, train_data_shuffler, validation_data_shuffler=None):
...@@ -77,6 +87,14 @@ class Trainer(object): ...@@ -77,6 +87,14 @@ class Trainer(object):
session.run(enqueue_op, feed_dict=feed_dict) session.run(enqueue_op, feed_dict=feed_dict)
# TODO: find an elegant way to provide this as a parameter of the trainer
learning_rate = tf.train.exponential_decay(
self.base_learning_rate, # Learning rate
train_data_shuffler.batch_size,
train_data_shuffler.n_samples,
self.weight_decay # Decay step
)
# Defining place holders # Defining place holders
train_placeholder_data, train_placeholder_labels = train_data_shuffler.get_placeholders_forprefetch(name="train") train_placeholder_data, train_placeholder_labels = train_data_shuffler.get_placeholders_forprefetch(name="train")
if validation_data_shuffler is not None: if validation_data_shuffler is not None:
...@@ -105,15 +123,9 @@ class Trainer(object): ...@@ -105,15 +123,9 @@ class Trainer(object):
loss_validation = self.loss(validation_graph, validation_placeholder_labels) loss_validation = self.loss(validation_graph, validation_placeholder_labels)
validation_prediction = tf.nn.softmax(validation_graph) validation_prediction = tf.nn.softmax(validation_graph)
batch = tf.Variable(0) # Preparing the optimizer
learning_rate = tf.train.exponential_decay( self.optimizer._learning_rate = learning_rate
self.base_lr, # Learning rate optimizer = self.optimizer.minimize(loss_train, global_step=tf.Variable(0))
batch * train_data_shuffler.batch_size,
train_data_shuffler.n_samples,
self.weight_decay # Decay step
)
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss_train,
global_step=batch)
print("Initializing !!") print("Initializing !!")
# Training # Training
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment