Solved issues with the trainer

parent bc57c932
......@@ -22,7 +22,7 @@ class Constant(Initialization):
"""
def __init__(self, constant_value=0.1, use_gpu=False):
def __init__(self, constant_value=0.1, use_gpu=False, seed=None):
self.constant_value = constant_value
super(Constant, self).__init__(seed=None, use_gpu=use_gpu)
......
......@@ -20,8 +20,7 @@ class Conv2D(Layer):
filters=8,
weights_initialization=Xavier(),
bias_initialization=Constant(),
use_gpu=False,
seed=10
use_gpu=False
):
"""
Constructor
......@@ -39,8 +38,8 @@ class Conv2D(Layer):
activation=activation,
weights_initialization=weights_initialization,
bias_initialization=bias_initialization,
use_gpu=use_gpu,
seed=seed)
use_gpu=use_gpu
)
self.kernel_size = kernel_size
self.filters = filters
self.W = None
......@@ -57,7 +56,8 @@ class Conv2D(Layer):
if self.W is None:
self.W = self.weights_initialization(shape=[self.kernel_size, self.kernel_size, n_channels, self.filters],
name="w_" + str(self.name))
name="w_" + str(self.name)
)
if self.activation is not None:
self.b = self.bias_initialization(shape=[self.filters],
......
......@@ -10,7 +10,6 @@ from bob.learn.tensorflow.initialization import Xavier
from bob.learn.tensorflow.initialization import Constant
class FullyConnected(Layer):
"""
......@@ -23,7 +22,6 @@ class FullyConnected(Layer):
weights_initialization=Xavier(),
bias_initialization=Constant(),
use_gpu=False,
seed=10
):
"""
Constructor
......@@ -40,8 +38,7 @@ class FullyConnected(Layer):
activation=activation,
weights_initialization=weights_initialization,
bias_initialization=bias_initialization,
use_gpu=use_gpu,
seed=seed)
use_gpu=use_gpu)
self.output_dim = output_dim
self.W = None
......
......@@ -18,8 +18,7 @@ class Layer(object):
activation=None,
weights_initialization=Xavier(),
bias_initialization=Constant(),
use_gpu=False,
seed=10):
use_gpu=False):
"""
Base constructor
......@@ -35,7 +34,6 @@ class Layer(object):
self.weights_initialization = weights_initialization
self.bias_initialization = bias_initialization
self.use_gpu = use_gpu
self.seed = seed
self.input_layer = None
self.activation = activation
......
......@@ -11,6 +11,9 @@ import tensorflow as tf
from .SequenceNetwork import SequenceNetwork
from ..layers import Conv2D, FullyConnected, MaxPooling
import bob.learn.tensorflow
from bob.learn.tensorflow.initialization import Xavier
from bob.learn.tensorflow.initialization import Constant
class Lenet(SequenceNetwork):
......@@ -25,7 +28,8 @@ class Lenet(SequenceNetwork):
n_classes=10,
default_feature_layer="fc2",
seed=10, use_gpu = False):
seed=10,
use_gpu = False):
"""
Create all the necessary variables for this CNN
......@@ -44,9 +48,25 @@ class Lenet(SequenceNetwork):
super(Lenet, self).__init__(default_feature_layer=default_feature_layer)
self.add(Conv2D(name="conv1", kernel_size=conv1_kernel_size,
filters=conv1_output, activation=tf.nn.tanh))
filters=conv1_output,
activation=tf.nn.tanh,
weights_initialization=Xavier(seed=seed),
bias_initialization=Constant()
))
self.add(MaxPooling(name="pooling1"))
self.add(Conv2D(name="conv2", kernel_size=conv2_kernel_size, filters=conv2_output, activation=tf.nn.tanh))
self.add(Conv2D(name="conv2", kernel_size=conv2_kernel_size,
filters=conv2_output,
activation=tf.nn.tanh,
weights_initialization=Xavier(seed=seed),
bias_initialization=Constant()
))
self.add(MaxPooling(name="pooling2"))
self.add(FullyConnected(name="fc1", output_dim=fc1_output, activation=tf.nn.tanh))
self.add(FullyConnected(name="fc2", output_dim=n_classes, activation=None))
self.add(FullyConnected(name="fc1", output_dim=fc1_output,
activation=tf.nn.tanh,
weights_initialization=Xavier(seed=seed),
bias_initialization=Constant()
))
self.add(FullyConnected(name="fc2", output_dim=n_classes,
activation=None,
weights_initialization=Xavier(seed=seed),
bias_initialization=Constant()))
......@@ -21,7 +21,8 @@ class SequenceNetwork(six.with_metaclass(abc.ABCMeta, object)):
Base class to create architectures using TensorFlow
"""
def __init__(self, default_feature_layer=None):
def __init__(self,
default_feature_layer=None):
"""
Base constructor
......@@ -72,13 +73,8 @@ class SequenceNetwork(six.with_metaclass(abc.ABCMeta, object)):
if session is None:
session = tf.Session()
batch_size = data.shape[0]
width = data.shape[1]
height = data.shape[2]
channels = data.shape[3]
# Feeding the placeholder
feature_placeholder = tf.placeholder(tf.float32, shape=(batch_size, width, height, channels), name="feature")
feature_placeholder = tf.placeholder(tf.float32, shape=data.shape, name="feature")
feed_dict = {feature_placeholder: data}
if feature_layer is None:
......
......@@ -4,6 +4,7 @@ __path__ = extend_path(__path__, __name__)
from .SequenceNetwork import SequenceNetwork
from .Lenet import Lenet
from .MLP import MLP
# gets sphinx autodoc done right - don't remove it
__all__ = [_ for _ in dir() if not _.startswith('_')]
......@@ -23,7 +23,7 @@ import tensorflow as tf
from .. import util
SEED = 10
from bob.learn.tensorflow.data import MemoryDataShuffler, TextDataShuffler
from bob.learn.tensorflow.network import Lenet
from bob.learn.tensorflow.network import Lenet, MLP
from bob.learn.tensorflow.trainers import Trainer
from bob.learn.tensorflow.loss import BaseLoss
......@@ -85,11 +85,17 @@ def main():
input_shape=[80, 64, 1],
scale=False,
batch_size=VALIDATION_BATCH_SIZE)
# Preparing the architecture
lenet = Lenet()
# Preparing the architecture
cnn = True
if cnn:
lenet = Lenet(seed=SEED)
loss = BaseLoss(tf.nn.sparse_softmax_cross_entropy_with_logits, tf.reduce_mean)
trainer = Trainer(architecture=lenet, loss=loss, iterations=ITERATIONS)
trainer.train(train_data_shuffler, validation_data_shuffler)
else:
mlp = MLP(10, hidden_layers=[15, 20])
loss = BaseLoss(tf.nn.sparse_softmax_cross_entropy_with_logits, tf.reduce_mean)
trainer = Trainer(architecture=mlp, loss=loss, iterations=ITERATIONS)
trainer.train(train_data_shuffler, validation_data_shuffler)
......@@ -23,10 +23,9 @@ import tensorflow as tf
from .. import util
SEED = 10
from bob.learn.tensorflow.data import MemoryDataShuffler, TextDataShuffler
from bob.learn.tensorflow.network import Lenet
from bob.learn.tensorflow.network import Lenet, MLP
from bob.learn.tensorflow.trainers import SiameseTrainer
from bob.learn.tensorflow.loss import ContrastiveLoss
import bob.db.mobio
import numpy
def main():
......@@ -40,7 +39,7 @@ def main():
perc_train = 0.9
# Loading data
mnist = False
mnist = True
if mnist:
train_data, train_labels, validation_data, validation_labels = \
......@@ -89,15 +88,25 @@ def main():
# Preparing the architecture
n_classes = len(train_data_shuffler.possible_labels)
cnn = True
if cnn:
lenet = Lenet(default_feature_layer="fc2", n_classes=n_classes)
loss = ContrastiveLoss()
trainer = SiameseTrainer(architecture=lenet,
loss=loss,
iterations=ITERATIONS,
base_lr=0.0001,
save_intermediate=False,
snapshot=VALIDATION_TEST)
trainer.train(train_data_shuffler, validation_data_shuffler)
else:
mlp = MLP(n_classes, hidden_layers=[15, 20])
loss = ContrastiveLoss()
trainer = SiameseTrainer(architecture=mlp,
loss=loss,
iterations=ITERATIONS,
snapshot=VALIDATION_TEST)
trainer.train(train_data_shuffler, validation_data_shuffler)
......@@ -10,44 +10,41 @@ import threading
from ..analyzers import Analizer
from ..network import SequenceNetwork
import bob.io.base
from .Trainer import Trainer
import os
class SiameseTrainer(object):
class SiameseTrainer(Trainer):
def __init__(self,
architecture=None,
architecture,
optimizer=tf.train.AdamOptimizer(),
use_gpu=False,
loss=None,
temp_dir="",
save_intermediate=False,
# Learning rate
base_learning_rate=0.001,
weight_decay=0.9,
###### training options ##########
convergence_threshold = 0.01,
convergence_threshold=0.01,
iterations=5000,
base_lr=0.001,
momentum=0.9,
weight_decay=0.95,
# The learning rate policy
snapshot=100):
self.loss = loss
self.loss_instance = None
self.optimizer = None
self.temp_dir = temp_dir
self.save_intermediate = save_intermediate
self.architecture = architecture
self.use_gpu = use_gpu
super(SiameseTrainer, self).__init__(
architecture=architecture,
optimizer=optimizer,
use_gpu=use_gpu,
loss=loss,
temp_dir=temp_dir,
base_learning_rate=base_learning_rate,
weight_decay=weight_decay,
convergence_threshold=convergence_threshold,
iterations=iterations,
snapshot=snapshot
)
self.iterations = iterations
self.snapshot = snapshot
self.base_lr = base_lr
self.momentum = momentum
self.weight_decay = weight_decay
self.convergence_threshold = convergence_threshold
def train(self, train_data_shuffler, validation_data_shuffler=None):
"""
......@@ -77,6 +74,14 @@ class SiameseTrainer(object):
session.run(enqueue_op, feed_dict=feed_dict)
# TODO: find an elegant way to provide this as a parameter of the trainer
learning_rate = tf.train.exponential_decay(
self.base_learning_rate, # Learning rate
train_data_shuffler.batch_size,
train_data_shuffler.n_samples,
self.weight_decay # Decay step
)
bob.io.base.create_directories_safe(os.path.join(self.temp_dir, 'OUTPUT'))
# Creating two graphs
......@@ -110,18 +115,9 @@ class SiameseTrainer(object):
train_left_graph,
train_right_graph)
batch = tf.Variable(0)
learning_rate = tf.train.exponential_decay(
self.base_lr, # Learning rate
batch * train_data_shuffler.batch_size,
train_data_shuffler.n_samples,
self.weight_decay # Decay step
)
#optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss_train,
# global_step=batch)
optimizer = tf.train.MomentumOptimizer(learning_rate, momentum=0.99, use_locking=False,
name='Momentum').minimize(loss_train, global_step=batch)
# Preparing the optimizer
self.optimizer._learning_rate = learning_rate
optimizer = self.optimizer.minimize(loss_train, global_step=tf.Variable(0))
print("Initializing !!")
# Training
......@@ -154,8 +150,8 @@ class SiameseTrainer(object):
if validation_data_shuffler is not None and step % self.snapshot == 0:
analizer()
if self.save_intermediate:
self.architecture.save(hdf5, step)
#if self.save_intermediate:
# self.architecture.save(hdf5, step)
print str(step) + " - " + str(analizer.eer[-1])
self.architecture.save(hdf5)
......
......@@ -16,36 +16,46 @@ import bob.io.base
class Trainer(object):
def __init__(self,
architecture=None,
architecture,
optimizer=tf.train.AdamOptimizer(),
use_gpu=False,
loss=None,
temp_dir="",
# Learning rate
base_learning_rate=0.001,
weight_decay=0.9,
###### training options ##########
convergence_threshold = 0.01,
convergence_threshold=0.01,
iterations=5000,
base_lr=0.001,
momentum=0.9,
weight_decay=0.95,
# The learning rate policy
snapshot=100):
"""
self.loss = loss
self.loss_instance = None
self.optimizer = None
self.temp_dir=temp_dir
**Parameters**
architecture: The architecture that you want to run. Should be a :py:class`bob.learn.tensorflow.network.SequenceNetwork`
optimizer: One of the tensorflow optimizers https://www.tensorflow.org/versions/r0.10/api_docs/python/train.html
use_gpu: Use GPUs in the training
loss: Loss
temp_dir:
iterations:
snapshot:
convergence_threshold:
"""
if not isinstance(architecture, SequenceNetwork):
raise ValueError("`architecture` should be instance of `SequenceNetwork`")
self.architecture = architecture
self.optimizer = optimizer
self.use_gpu = use_gpu
self.loss = loss
self.temp_dir = temp_dir
self.base_learning_rate = base_learning_rate
self.weight_decay = weight_decay
self.iterations = iterations
self.snapshot = snapshot
self.base_lr = base_lr
self.momentum = momentum
self.weight_decay = weight_decay
self.convergence_threshold = convergence_threshold
def train(self, train_data_shuffler, validation_data_shuffler=None):
......@@ -77,6 +87,14 @@ class Trainer(object):
session.run(enqueue_op, feed_dict=feed_dict)
# TODO: find an elegant way to provide this as a parameter of the trainer
learning_rate = tf.train.exponential_decay(
self.base_learning_rate, # Learning rate
train_data_shuffler.batch_size,
train_data_shuffler.n_samples,
self.weight_decay # Decay step
)
# Defining place holders
train_placeholder_data, train_placeholder_labels = train_data_shuffler.get_placeholders_forprefetch(name="train")
if validation_data_shuffler is not None:
......@@ -105,15 +123,9 @@ class Trainer(object):
loss_validation = self.loss(validation_graph, validation_placeholder_labels)
validation_prediction = tf.nn.softmax(validation_graph)
batch = tf.Variable(0)
learning_rate = tf.train.exponential_decay(
self.base_lr, # Learning rate
batch * train_data_shuffler.batch_size,
train_data_shuffler.n_samples,
self.weight_decay # Decay step
)
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss_train,
global_step=batch)
# Preparing the optimizer
self.optimizer._learning_rate = learning_rate
optimizer = self.optimizer.minimize(loss_train, global_step=tf.Variable(0))
print("Initializing !!")
# Training
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment