Commit 47480241 authored by Tiago Pereira's avatar Tiago Pereira

Reformulating the Trainer

parent 60f2c7c1
Pipeline #8571 failed with stages
in 11 minutes and 39 seconds
...@@ -39,16 +39,26 @@ class Base(object): ...@@ -39,16 +39,26 @@ class Base(object):
normalizer: normalizer:
The algorithm used for feature scaling. Look :py:class:`bob.learn.tensorflow.datashuffler.ScaleFactor`, :py:class:`bob.learn.tensorflow.datashuffler.Linear` and :py:class:`bob.learn.tensorflow.datashuffler.MeanOffset` The algorithm used for feature scaling. Look :py:class:`bob.learn.tensorflow.datashuffler.ScaleFactor`, :py:class:`bob.learn.tensorflow.datashuffler.Linear` and :py:class:`bob.learn.tensorflow.datashuffler.MeanOffset`
prefetch:
Do prefetch?
prefetch_capacity:
""" """
def __init__(self, data, labels, def __init__(self, data, labels,
input_shape, input_shape=[None, 28, 28, 1],
input_dtype="float64", input_dtype="float64",
batch_size=1, batch_size=32,
seed=10, seed=10,
data_augmentation=None, data_augmentation=None,
normalizer=Linear()): normalizer=Linear(),
prefetch=False,
prefetch_capacity=10):
# Setting the seed for the pseudo random number generator
self.seed = seed self.seed = seed
numpy.random.seed(seed) numpy.random.seed(seed)
...@@ -58,10 +68,9 @@ class Base(object): ...@@ -58,10 +68,9 @@ class Base(object):
# TODO: Check if the bacth size is higher than the input data # TODO: Check if the bacth size is higher than the input data
self.batch_size = batch_size self.batch_size = batch_size
# Preparing the inputs
self.data = data self.data = data
self.shape = tuple([batch_size] + input_shape)
self.input_shape = tuple(input_shape) self.input_shape = tuple(input_shape)
self.labels = labels self.labels = labels
self.possible_labels = list(set(self.labels)) self.possible_labels = list(set(self.labels))
...@@ -72,43 +81,72 @@ class Base(object): ...@@ -72,43 +81,72 @@ class Base(object):
self.indexes = numpy.array(range(self.n_samples)) self.indexes = numpy.array(range(self.n_samples))
numpy.random.shuffle(self.indexes) numpy.random.shuffle(self.indexes)
self.data_placeholder = None # Use data data augmentation?
self.label_placeholder = None
self.data_augmentation = data_augmentation self.data_augmentation = data_augmentation
self.deployment_shape = [None] + list(input_shape)
def set_placeholders(self, data, label): # Preparing placeholders
self.data_placeholder = data self.data_ph = None
self.label_placeholder = label self.label_ph = None
# Prefetch variables
self.prefetch = prefetch
self.data_ph_from_queue = None
self.label_ph_from_queue = None
def get_batch(self): def create_placeholders(self):
""" """
Shuffle dataset and get a random batch. Create place holder instances
:return:
""" """
raise NotImplementedError("Method not implemented in this level. You should use one of the derived classes.") with tf.name_scope("Input"):
self.data_ph = tf.placeholder(tf.float32, shape=self.input_shape, name="data")
self.label_ph = tf.placeholder(tf.int64, shape=[None], name="label")
# If prefetch, setup the queue to feed data
if self.prefetch:
queue = tf.FIFOQueue(capacity=self.prefetch_capacity,
dtypes=[tf.float32, tf.int64],
shapes=[self.input_shape[1:], []])
def get_placeholders(self, name=""): # Fetching the place holders from the queue
self.enqueue_op = queue.enqueue_many([self.data_ph, self.label_ph])
self.data_ph_from_queue, self.label_ph_from_queue = queue.dequeue_many(self.batch_size)
else:
self.data_ph_from_queue = self.data_ph
self.label_ph_from_queue = self.label_ph
def __call__(self, element, from_queue=False):
""" """
Returns a place holder with the size of your batch Return the necessary placeholder
""" """
if self.data_placeholder is None: if not element in ["data", "label"]:
self.data_placeholder = tf.placeholder(tf.float32, shape=self.shape, name=name) raise ValueError("Value '{0}' invalid. Options available are {1}".format(element, self.placeholder_options))
# If None, create the placeholders from scratch
if self.data_ph is None:
self.create_placeholders()
if self.label_placeholder is None: if element == "data":
self.label_placeholder = tf.placeholder(tf.int64, shape=self.shape[0]) if from_queue:
return self.data_ph_from_queue
else:
return self.data_ph
return [self.data_placeholder, self.label_placeholder] else:
if from_queue:
return self.label_ph_from_queue
else:
return self.label_ph
def get_placeholders_forprefetch(self, name=""): def get_batch(self):
""" """
Returns a place holder with the size of your batch Shuffle dataset and get a random batch.
""" """
if self.data_placeholder is None: raise NotImplementedError("Method not implemented in this level. You should use one of the derived classes.")
self.data_placeholder = tf.placeholder(tf.float32, shape=tuple([None] + list(self.shape[1:])), name=name)
self.label_placeholder = tf.placeholder(tf.int64, shape=[None, ])
return [self.data_placeholder, self.label_placeholder]
def bob2skimage(self, bob_image): def bob2skimage(self, bob_image):
""" """
...@@ -167,10 +205,6 @@ class Base(object): ...@@ -167,10 +205,6 @@ class Base(object):
else: else:
return data return data
def reshape_for_deploy(self, data):
shape = tuple([1] + list(data.shape))
return numpy.reshape(data, shape)
def normalize_sample(self, x): def normalize_sample(self, x):
""" """
Normalize the sample. Normalize the sample.
......
...@@ -10,6 +10,7 @@ from .VGG16 import VGG16 ...@@ -10,6 +10,7 @@ from .VGG16 import VGG16
from .VGG16_mod import VGG16_mod from .VGG16_mod import VGG16_mod
from .SimpleAudio import SimpleAudio from .SimpleAudio import SimpleAudio
from .Embedding import Embedding from .Embedding import Embedding
#from .Input import Input
# gets sphinx autodoc done right - don't remove it # gets sphinx autodoc done right - don't remove it
def __appropriate__(*args): def __appropriate__(*args):
......
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
import numpy import numpy
from bob.learn.tensorflow.datashuffler import Memory, SiameseMemory, TripletMemory, ImageAugmentation, ScaleFactor from bob.learn.tensorflow.datashuffler import Memory, SiameseMemory, TripletMemory, ImageAugmentation, ScaleFactor
from bob.learn.tensorflow.network import Chopra, SequenceNetwork from bob.learn.tensorflow.network import Chopra
from bob.learn.tensorflow.loss import BaseLoss, ContrastiveLoss, TripletLoss from bob.learn.tensorflow.loss import BaseLoss, ContrastiveLoss, TripletLoss
from bob.learn.tensorflow.trainers import Trainer, SiameseTrainer, TripletTrainer, constant from bob.learn.tensorflow.trainers import Trainer, SiameseTrainer, TripletTrainer, constant
from .test_cnn_scratch import validate_network from .test_cnn_scratch import validate_network
...@@ -23,7 +23,7 @@ Some unit tests for the datashuffler ...@@ -23,7 +23,7 @@ Some unit tests for the datashuffler
""" """
batch_size = 32 batch_size = 32
validation_batch_size = 400 validation_batch_size = 32
iterations = 300 iterations = 300
seed = 10 seed = 10
...@@ -77,6 +77,7 @@ def dummy_experiment(data_s, architecture): ...@@ -77,6 +77,7 @@ def dummy_experiment(data_s, architecture):
def test_cnn_trainer(): def test_cnn_trainer():
# Loading data
train_data, train_labels, validation_data, validation_labels = load_mnist() train_data, train_labels, validation_data, validation_labels = load_mnist()
train_data = numpy.reshape(train_data, (train_data.shape[0], 28, 28, 1)) train_data = numpy.reshape(train_data, (train_data.shape[0], 28, 28, 1))
validation_data = numpy.reshape(validation_data, (validation_data.shape[0], 28, 28, 1)) validation_data = numpy.reshape(validation_data, (validation_data.shape[0], 28, 28, 1))
...@@ -84,40 +85,45 @@ def test_cnn_trainer(): ...@@ -84,40 +85,45 @@ def test_cnn_trainer():
# Creating datashufflers # Creating datashufflers
data_augmentation = ImageAugmentation() data_augmentation = ImageAugmentation()
train_data_shuffler = Memory(train_data, train_labels, train_data_shuffler = Memory(train_data, train_labels,
input_shape=[28, 28, 1], input_shape=[None, 28, 28, 1],
batch_size=batch_size, batch_size=batch_size,
data_augmentation=data_augmentation, data_augmentation=data_augmentation,
normalizer=ScaleFactor()) normalizer=ScaleFactor())
validation_data_shuffler = Memory(validation_data, validation_labels,
input_shape=[None, 28, 28, 1],
batch_size=batch_size,
data_augmentation=data_augmentation,
normalizer=ScaleFactor())
directory = "./temp/cnn" directory = "./temp/cnn"
# Loss for the softmax # Loss for the softmax
loss = BaseLoss(tf.nn.sparse_softmax_cross_entropy_with_logits, tf.reduce_mean) loss = BaseLoss(tf.nn.sparse_softmax_cross_entropy_with_logits, tf.reduce_mean)
inputs = {}
inputs['data'] = tf.placeholder(tf.float32, shape=[None, 28, 28, 1], name="train_data")
inputs['label'] = tf.placeholder(tf.int64, shape=[None], name="train_label")
# Preparing the architecture # Preparing the architecture
architecture = Chopra(seed=seed, architecture = Chopra(seed=seed,
fc1_output=10) fc1_output=10)
graph = architecture(inputs['data']) input_pl = train_data_shuffler("data", from_queue=True)
embedding = Embedding(inputs['data'], graph) graph = architecture(input_pl)
embedding = Embedding(train_data_shuffler("data", from_queue=False), graph)
# One graph trainer # One graph trainer
trainer = Trainer(inputs=inputs, trainer = Trainer(train_data_shuffler,
graph=graph,
loss=loss,
iterations=iterations, iterations=iterations,
analizer=None, analizer=None,
prefetch=False,
learning_rate=constant(0.01, name="regular_lr"),
optimizer=tf.train.GradientDescentOptimizer(0.01),
temp_dir=directory temp_dir=directory
) )
trainer.train(train_data_shuffler) trainer.create_network_from_scratch(graph=graph,
loss=loss,
learning_rate=constant(0.01, name="regular_lr"),
optimizer=tf.train.GradientDescentOptimizer(0.01),
)
trainer.train()
#trainer.train(validation_data_shuffler)
# Using embedding to compute the accuracy
accuracy = validate_network(embedding, validation_data, validation_labels) accuracy = validate_network(embedding, validation_data, validation_labels)
# At least 80% of accuracy # At least 80% of accuracy
assert accuracy > 80. assert accuracy > 80.
shutil.rmtree(directory) shutil.rmtree(directory)
...@@ -165,8 +171,6 @@ def test_siamesecnn_trainer(): ...@@ -165,8 +171,6 @@ def test_siamesecnn_trainer():
optimizer=tf.train.AdamOptimizer(name="adam_siamese"), optimizer=tf.train.AdamOptimizer(name="adam_siamese"),
temp_dir=directory temp_dir=directory
) )
import ipdb; ipdb.set_trace();
trainer.train(train_data_shuffler) trainer.train(train_data_shuffler)
eer = dummy_experiment(validation_data_shuffler, architecture) eer = dummy_experiment(validation_data_shuffler, architecture)
......
...@@ -10,9 +10,11 @@ from bob.learn.tensorflow.datashuffler import Memory, ImageAugmentation, Triplet ...@@ -10,9 +10,11 @@ from bob.learn.tensorflow.datashuffler import Memory, ImageAugmentation, Triplet
from bob.learn.tensorflow.loss import BaseLoss, TripletLoss, ContrastiveLoss from bob.learn.tensorflow.loss import BaseLoss, TripletLoss, ContrastiveLoss
from bob.learn.tensorflow.trainers import Trainer, constant, TripletTrainer, SiameseTrainer from bob.learn.tensorflow.trainers import Trainer, constant, TripletTrainer, SiameseTrainer
from bob.learn.tensorflow.utils import load_mnist from bob.learn.tensorflow.utils import load_mnist
from bob.learn.tensorflow.network import SequenceNetwork
from bob.learn.tensorflow.layers import Conv2D, FullyConnected from bob.learn.tensorflow.layers import Conv2D, FullyConnected
from bob.learn.tensorflow.network import Embedding
from .test_cnn import dummy_experiment from .test_cnn import dummy_experiment
from .test_cnn_scratch import validate_network
import tensorflow as tf import tensorflow as tf
import shutil import shutil
...@@ -23,46 +25,38 @@ Some unit tests that create networks on the fly and load variables ...@@ -23,46 +25,38 @@ Some unit tests that create networks on the fly and load variables
batch_size = 16 batch_size = 16
validation_batch_size = 400 validation_batch_size = 400
iterations = 50 iterations =300
seed = 10 seed = 10
def scratch_network(): def scratch_network(input_pl):
# Creating a random network # Creating a random network
scratch = SequenceNetwork(default_feature_layer="fc1") slim = tf.contrib.slim
scratch.add(Conv2D(name="conv1", kernel_size=3,
filters=10,
activation=tf.nn.tanh,
batch_norm=False))
scratch.add(FullyConnected(name="fc1", output_dim=10,
activation=None,
batch_norm=False
))
return scratch
initializer = tf.contrib.layers.xavier_initializer(uniform=False, dtype=tf.float32, seed=10)
def validate_network(validation_data, validation_labels, network): scratch = slim.conv2d(input_pl, 10, 3, activation_fn=tf.nn.tanh,
# Testing stride=1,
validation_data_shuffler = Memory(validation_data, validation_labels, weights_initializer=initializer,
input_shape=[28, 28, 1], scope='conv1')
batch_size=validation_batch_size) scratch = slim.flatten(scratch, scope='flatten1')
scratch = slim.fully_connected(scratch, 10,
[data, labels] = validation_data_shuffler.get_batch() weights_initializer=initializer,
predictions = network.predict(data) activation_fn=None,
accuracy = 100. * numpy.sum(predictions == labels) / predictions.shape[0] scope='fc1')
return accuracy return scratch
def test_cnn_pretrained(): def test_cnn_pretrained():
# Preparing input data
train_data, train_labels, validation_data, validation_labels = load_mnist() train_data, train_labels, validation_data, validation_labels = load_mnist()
train_data = numpy.reshape(train_data, (train_data.shape[0], 28, 28, 1)) train_data = numpy.reshape(train_data, (train_data.shape[0], 28, 28, 1))
# Creating datashufflers # Creating datashufflers
data_augmentation = ImageAugmentation() data_augmentation = ImageAugmentation()
train_data_shuffler = Memory(train_data, train_labels, train_data_shuffler = Memory(train_data, train_labels,
input_shape=[28, 28, 1], input_shape=[None, 28, 28, 1],
batch_size=batch_size, batch_size=batch_size,
data_augmentation=data_augmentation) data_augmentation=data_augmentation)
validation_data = numpy.reshape(validation_data, (validation_data.shape[0], 28, 28, 1)) validation_data = numpy.reshape(validation_data, (validation_data.shape[0], 28, 28, 1))
...@@ -71,51 +65,55 @@ def test_cnn_pretrained(): ...@@ -71,51 +65,55 @@ def test_cnn_pretrained():
directory2 = "./temp/cnn2" directory2 = "./temp/cnn2"
# Creating a random network # Creating a random network
scratch = scratch_network() input_pl = train_data_shuffler("data", from_queue=True)
graph = scratch_network(input_pl)
embedding = Embedding(train_data_shuffler("data", from_queue=False), graph)
# Loss for the softmax # Loss for the softmax
loss = BaseLoss(tf.nn.sparse_softmax_cross_entropy_with_logits, tf.reduce_mean) loss = BaseLoss(tf.nn.sparse_softmax_cross_entropy_with_logits, tf.reduce_mean)
# One graph trainer # One graph trainer
trainer = Trainer(architecture=scratch, # One graph trainer
loss=loss, trainer = Trainer(train_data_shuffler,
iterations=iterations, iterations=iterations,
analizer=None, analizer=None,
prefetch=False,
learning_rate=constant(0.05, name="regular_lr"),
optimizer=tf.train.AdamOptimizer(name="adam_pretrained_model"),
temp_dir=directory temp_dir=directory
) )
trainer.create_network_from_scratch(graph=graph,
trainer.train(train_data_shuffler) loss=loss,
accuracy = validate_network(validation_data, validation_labels, scratch) learning_rate=constant(0.01, name="regular_lr"),
assert accuracy > 85 optimizer=tf.train.GradientDescentOptimizer(0.01),
)
del scratch trainer.train()
accuracy = validate_network(embedding, validation_data, validation_labels)
assert accuracy > 80
del graph
del loss del loss
del trainer del trainer
# Training the network using a pre trained model # Training the network using a pre trained model
loss = BaseLoss(tf.nn.sparse_softmax_cross_entropy_with_logits, tf.reduce_mean, name="loss") loss = BaseLoss(tf.nn.sparse_softmax_cross_entropy_with_logits, tf.reduce_mean, name="loss")
scratch = scratch_network() graph = scratch_network(input_pl)
trainer = Trainer(architecture=scratch,
loss=loss, # One graph trainer
iterations=iterations + 200, trainer = Trainer(train_data_shuffler,
iterations=iterations,
analizer=None, analizer=None,
prefetch=False, temp_dir=directory
learning_rate=None,
temp_dir=directory2,
model_from_file=os.path.join(directory, "model.ckp")
) )
trainer.create_network_from_file(os.path.join(directory, "model.ckp"))
trainer.train(train_data_shuffler) import ipdb;
ipdb.set_trace()
accuracy = validate_network(validation_data, validation_labels, scratch) trainer.train()
accuracy = validate_network(embedding, validation_data, validation_labels)
assert accuracy > 90 assert accuracy > 90
shutil.rmtree(directory) shutil.rmtree(directory)
shutil.rmtree(directory2) shutil.rmtree(directory2)
del scratch del graph
del loss del loss
del trainer del trainer
......
...@@ -48,7 +48,7 @@ def scratch_network(): ...@@ -48,7 +48,7 @@ def scratch_network():
def validate_network(embedding, validation_data, validation_labels): def validate_network(embedding, validation_data, validation_labels):
# Testing # Testing
validation_data_shuffler = Memory(validation_data, validation_labels, validation_data_shuffler = Memory(validation_data, validation_labels,
input_shape=[28, 28, 1], input_shape=[None, 28, 28, 1],
batch_size=validation_batch_size, batch_size=validation_batch_size,
normalizer=ScaleFactor()) normalizer=ScaleFactor())
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment