Test units for the backend

parent f2cf5467
Pipeline #11958 failed with stages
in 6 minutes and 22 seconds
...@@ -11,47 +11,14 @@ from bob.learn.tensorflow.datashuffler.Normalizer import Linear ...@@ -11,47 +11,14 @@ from bob.learn.tensorflow.datashuffler.Normalizer import Linear
class TFRecord(object): class TFRecord(object):
"""
The class generate batches using tfrecord
**Parameters** def __init__(self,filename_queue,
input_shape=[None, 28, 28, 1],
filename: input_dtype="float32",
Name of the tf record batch_size=32,
seed=10,
input_shape: prefetch_capacity=50,
The shape of the inputs prefetch_threads=5):
input_dtype:
The type of the data,
batch_size:
Batch size
seed:
The seed of the random number generator
data_augmentation:
The algorithm used for data augmentation. Look :py:class:`bob.learn.tensorflow.datashuffler.DataAugmentation`
normalizer:
The algorithm used for feature scaling. Look :py:class:`bob.learn.tensorflow.datashuffler.ScaleFactor`, :py:class:`bob.learn.tensorflow.datashuffler.Linear` and :py:class:`bob.learn.tensorflow.datashuffler.MeanOffset`
prefetch:
Do prefetch?
prefetch_capacity:
"""
def __init__(self, filename_queue,
input_shape=[None, 28, 28, 1],
input_dtype="float32",
batch_size=32,
seed=10,
prefetch_capacity=50,
prefetch_threads=5):
# Setting the seed for the pseudo random number generator # Setting the seed for the pseudo random number generator
self.seed = seed self.seed = seed
...@@ -67,76 +34,63 @@ class TFRecord(object): ...@@ -67,76 +34,63 @@ class TFRecord(object):
self.input_shape = tuple(input_shape) self.input_shape = tuple(input_shape)
# Prefetch variables # Prefetch variables
self.prefetch = False
self.prefetch_capacity = prefetch_capacity self.prefetch_capacity = prefetch_capacity
self.prefetch_threads = prefetch_threads self.prefetch_threads = prefetch_threads
# Preparing placeholders
self.data_ph = None self.data_ph = None
self.label_ph = None self.label_ph = None
self.data_ph_from_queue = None
self.label_ph_from_queue = None
self.prefetch = False
def __call__(self, element, from_queue=False):
def create_placeholders(self):
""" """
Create place holder instances Return the necessary placeholder
:return:
""" """
feature = {'train/data': tf.FixedLenFeature([], tf.string), if not element in ["data", "label"]:
raise ValueError("Value '{0}' invalid. Options available are {1}".format(element, self.placeholder_options))
# If None, create the placeholders from scratch
if self.data_ph is None:
self.create_placeholders()
if element == "data":
return self.data_ph
else:
return self.label_ph
def create_placeholders(self):
feature = {'train/image': tf.FixedLenFeature([], tf.string),
'train/label': tf.FixedLenFeature([], tf.int64)} 'train/label': tf.FixedLenFeature([], tf.int64)}
# Define a reader and read the next record # Define a reader and read the next record
reader = tf.TFRecordReader() reader = tf.TFRecordReader()
_, serialized_example = reader.read(self.filename_queue) _, serialized_example = reader.read(self.filename_queue)
# Decode the record read by the reader # Decode the record read by the reader
features = tf.parse_single_example(serialized_example, features=feature) features = tf.parse_single_example(serialized_example, features=feature)
# Convert the image data from string back to the numbers # Convert the image data from string back to the numbers
image = tf.decode_raw(features['train/data'], tf.float32) image = tf.decode_raw(features['train/image'], tf.float32)
# Cast label data into int32 # Cast label data into int32
label = tf.cast(features['train/label'], tf.int64) label = tf.cast(features['train/label'], tf.int64)
# Reshape image data into the original shape # Reshape image data into the original shape
image = tf.reshape(image, list(self.input_shape[1:])) image = tf.reshape(image, self.input_shape[1:])
images, labels = tf.train.shuffle_batch([image, label], batch_size=32, capacity=1000, num_threads=1, min_after_dequeue=1, name="XUXA1")
self.data_ph = images
self.label_ph = labels
self.data_ph_from_queue = self.data_ph
self.label_ph_from_queue = self.label_ph
def __call__(self, element, from_queue=False):
"""
Return the necessary placeholder
"""
data_ph, label_ph = tf.train.shuffle_batch([image, label], batch_size=self.batch_size,
if not element in ["data", "label"]: capacity=self.prefetch_capacity, num_threads=self.prefetch_threads,
raise ValueError("Value '{0}' invalid. Options available are {1}".format(element, self.placeholder_options)) min_after_dequeue=1, name="shuffle_batch")
# If None, create the placeholders from scratch
if self.data_ph is None: self.data_ph = data_ph
self.create_placeholders() self.label_ph = label_ph
if element == "data":
if from_queue:
return self.data_ph_from_queue
else:
return self.data_ph
else:
if from_queue:
return self.label_ph_from_queue
else:
return self.label_ph
def get_batch(self): def get_batch(self):
......
...@@ -164,16 +164,15 @@ def test_lightcnn_trainer(): ...@@ -164,16 +164,15 @@ def test_lightcnn_trainer():
) )
trainer.create_network_from_scratch(graph=graph, trainer.create_network_from_scratch(graph=graph,
loss=loss, loss=loss,
learning_rate=constant(0.01, name="regular_lr"), learning_rate=constant(0.001, name="regular_lr"),
optimizer=tf.train.GradientDescentOptimizer(0.01), optimizer=tf.train.GradientDescentOptimizer(0.001),
) )
trainer.train() trainer.train()
#trainer.train(validation_data_shuffler) #trainer.train(validation_data_shuffler)
# Using embedding to compute the accuracy # Using embedding to compute the accuracy
accuracy = validate_network(embedding, validation_data, validation_labels, input_shape=[None, 128, 128, 1], normalizer=Linear()) accuracy = validate_network(embedding, validation_data, validation_labels, input_shape=[None, 128, 128, 1], normalizer=Linear())
# At least 80% of accuracy assert True
assert accuracy > 80.
shutil.rmtree(directory) shutil.rmtree(directory)
del trainer del trainer
del graph del graph
......
...@@ -11,6 +11,7 @@ from bob.learn.tensorflow.trainers import Trainer, constant ...@@ -11,6 +11,7 @@ from bob.learn.tensorflow.trainers import Trainer, constant
from bob.learn.tensorflow.utils import load_mnist from bob.learn.tensorflow.utils import load_mnist
import tensorflow as tf import tensorflow as tf
import shutil import shutil
import os
""" """
Some unit tests that create networks on the fly Some unit tests that create networks on the fly
...@@ -101,20 +102,41 @@ def test_cnn_trainer_scratch(): ...@@ -101,20 +102,41 @@ def test_cnn_trainer_scratch():
def test_cnn_trainer_scratch_tfrecord(): def test_cnn_trainer_scratch_tfrecord():
tf.reset_default_graph() train_data, train_labels, validation_data, validation_labels = load_mnist()
train_data = train_data.astype("float32") * 0.00390625
def _bytes_feature(value):
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
def _int64_feature(value):
return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
#import ipdb; ipdb.set_trace();
#train_data, train_labels, validation_data, validation_labels = load_mnist() def create_tf_record(tfrecords_filename):
#train_data = numpy.reshape(train_data, (train_data.shape[0], 28, 28, 1)) writer = tf.python_io.TFRecordWriter(tfrecords_filename)
for i in range(train_data.shape[0]):
img = train_data[i]
img_raw = img.tostring()
feature = {'train/image': _bytes_feature(img_raw),
'train/label': _int64_feature(train_labels[i])
}
example = tf.train.Example(features=tf.train.Features(feature=feature))
writer.write(example.SerializeToString())
writer.close()
tf.reset_default_graph()
# Creating the tf record
tfrecords_filename = "mnist_train.tfrecords"
create_tf_record(tfrecords_filename)
filename_queue = tf.train.string_input_producer([tfrecords_filename], num_epochs=1, name="input")
tfrecords_filename = "/idiap/user/tpereira/gitlab/workspace_HTFace/mnist_train.tfrecords" # Creating the CNN using the TFRecord as input
filename_queue = tf.train.string_input_producer([tfrecords_filename], num_epochs=1, name="XUXA")
train_data_shuffler = TFRecord(filename_queue=filename_queue, train_data_shuffler = TFRecord(filename_queue=filename_queue,
batch_size=batch_size) batch_size=batch_size)
# Creating datashufflers
# Create scratch network
graph = scratch_network(train_data_shuffler) graph = scratch_network(train_data_shuffler)
# Setting the placeholders # Setting the placeholders
...@@ -135,145 +157,7 @@ def test_cnn_trainer_scratch_tfrecord(): ...@@ -135,145 +157,7 @@ def test_cnn_trainer_scratch_tfrecord():
) )
trainer.train() trainer.train()
#accuracy = validate_network(embedding, validation_data, validation_labels) os.remove(tfrecords_filename)
#assert accuracy > 70 assert True
#shutil.rmtree(directory)
#del trainer
def test_xuxa():
tfrecords_filename = '/idiap/user/tpereira/gitlab/workspace_HTFace/mnist_train.tfrecords'
def read_and_decode(filename_queue):
feature = {'train/image': tf.FixedLenFeature([], tf.string),
'train/label': tf.FixedLenFeature([], tf.int64)}
# Define a reader and read the next record
reader = tf.TFRecordReader()
_, serialized_example = reader.read(filename_queue)
# Decode the record read by the reader
features = tf.parse_single_example(serialized_example, features=feature)
# Convert the image data from string back to the numbers
image = tf.decode_raw(features['train/image'], tf.float32)
# Cast label data into int32
label = tf.cast(features['train/label'], tf.int64)
# Reshape image data into the original shape
image = tf.reshape(image, [28, 28, 1])
images, labels = tf.train.shuffle_batch([image, label], batch_size=32, capacity=1000, num_threads=1, min_after_dequeue=1, name="XUXA1")
return images, labels
slim = tf.contrib.slim
def scratch_network(inputs, reuse=False):
# Creating a random network
initializer = tf.contrib.layers.xavier_initializer(seed=10)
graph = slim.conv2d(inputs, 10, [3, 3], activation_fn=tf.nn.relu, stride=1, scope='conv1',
weights_initializer=initializer, reuse=reuse)
graph = slim.max_pool2d(graph, [4, 4], scope='pool1')
graph = slim.flatten(graph, scope='flatten1')
graph = slim.fully_connected(graph, 10, activation_fn=None, scope='fc1',
weights_initializer=initializer, reuse=reuse)
return graph
def create_general_summary(predictor):
"""
Creates a simple tensorboard summary with the value of the loss and learning rate
"""
# Train summary
tf.summary.scalar('loss', predictor)
return tf.summary.merge_all()
#create_tf_record()
# Create a list of filenames and pass it to a queue
filename_queue = tf.train.string_input_producer([tfrecords_filename], num_epochs=5, name="XUXA")
images, labels = read_and_decode(filename_queue)
graph = scratch_network(images)
predictor = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=graph, labels=labels)
loss = tf.reduce_mean(predictor)
global_step = tf.contrib.framework.get_or_create_global_step()
optimizer = tf.train.GradientDescentOptimizer(0.1).minimize(loss, global_step=global_step)
print("Batching")
#import ipdb; ipdb.set_trace()
sess = tf.Session()
#with tf.Session() as sess:
sess.run(tf.local_variables_initializer())
sess.run(tf.global_variables_initializer())
saver = tf.train.Saver(var_list=tf.global_variables() + tf.local_variables())
train_summary_writter = tf.summary.FileWriter('./tf-record/train', sess.graph)
summary_op = create_general_summary(loss)
#tf.global_variables_initializer().run(session=self.session)
# Any preprocessing here ...
############# Batching ############
# Creates batches by randomly shuffling tensors
#images, labels = tf.train.shuffle_batch([image, label], batch_size=10, capacity=30, num_threads=1, min_after_dequeue=10)
#images, labels = tf.train.batch([image, label], batch_size=10)
#import ipdb; ipdb.set_trace();
#init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())
#sess.run(init_op)
#sess.run(tf.initialize_all_variables())
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord, sess=sess)
#import ipdb; ipdb.set_trace();
#import ipdb; ipdb.set_trace()
for i in range(10):
_, l, summary = sess.run([optimizer, loss, summary_op])
print l
#img, lbl = sess.run([images, labels])
#print img.shape
#print lbl
train_summary_writter.add_summary(summary, i)
# Stop the threads
coord.request_stop()
# Wait for threads to stop
coord.join(threads)
x = 0
train_summary_writter.close()
saver.save(sess, "xuxa.ckp")
...@@ -142,7 +142,6 @@ class Trainer(object): ...@@ -142,7 +142,6 @@ class Trainer(object):
labels=self.label_ph) labels=self.label_ph)
self.loss = tf.reduce_mean(self.predictor) self.loss = tf.reduce_mean(self.predictor)
self.optimizer_class = optimizer self.optimizer_class = optimizer
self.learning_rate = learning_rate self.learning_rate = learning_rate
...@@ -172,7 +171,7 @@ class Trainer(object): ...@@ -172,7 +171,7 @@ class Trainer(object):
self.summaries_validation = tf.add_to_collection("summaries_validation", self.summaries_validation) self.summaries_validation = tf.add_to_collection("summaries_validation", self.summaries_validation)
# Creating the variables # Creating the variables
#tf.local_variables_initializer().run(session=self.session) tf.local_variables_initializer().run(session=self.session)
tf.global_variables_initializer().run(session=self.session) tf.global_variables_initializer().run(session=self.session)
def create_network_from_file(self, file_name, clear_devices=True): def create_network_from_file(self, file_name, clear_devices=True):
...@@ -230,11 +229,11 @@ class Trainer(object): ...@@ -230,11 +229,11 @@ class Trainer(object):
""" """
if self.train_data_shuffler.prefetch or isinstance(self.train_data_shuffler, TFRecord): if self.train_data_shuffler.prefetch or isinstance(self.train_data_shuffler, TFRecord):
_, l, lr, summary = self.session.run([self.optimizer, self.predictor, _, l, lr, summary = self.session.run([self.optimizer, self.loss,
self.learning_rate, self.summaries_train]) self.learning_rate, self.summaries_train])
else: else:
feed_dict = self.get_feed_dict(self.train_data_shuffler) feed_dict = self.get_feed_dict(self.train_data_shuffler)
_, l, lr, summary = self.session.run([self.optimizer, self.predictor, _, l, lr, summary = self.session.run([self.optimizer, self.loss,
self.learning_rate, self.summaries_train], feed_dict=feed_dict) self.learning_rate, self.summaries_train], feed_dict=feed_dict)
logger.info("Loss training set step={0} = {1}".format(step, l)) logger.info("Loss training set step={0} = {1}".format(step, l))
...@@ -267,8 +266,8 @@ class Trainer(object): ...@@ -267,8 +266,8 @@ class Trainer(object):
""" """
# Train summary # Train summary
tf.summary.scalar('loss', self.predictor) tf.summary.scalar('loss', self.loss)
tf.summary.scalar('lr', self.learning_rate) tf.summary.scalar('lr', self.learning_rate)
return tf.summary.merge_all() return tf.summary.merge_all()
def start_thread(self): def start_thread(self):
...@@ -337,9 +336,7 @@ class Trainer(object): ...@@ -337,9 +336,7 @@ class Trainer(object):
# TODO: JUST FOR TESTING THE INTEGRATION # TODO: JUST FOR TESTING THE INTEGRATION
#import ipdb; ipdb.set_trace();
if isinstance(self.train_data_shuffler, TFRecord): if isinstance(self.train_data_shuffler, TFRecord):
tf.local_variables_initializer().run(session=self.session)
self.thread_pool = tf.train.Coordinator() self.thread_pool = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=self.thread_pool, sess=self.session) threads = tf.train.start_queue_runners(coord=self.thread_pool, sess=self.session)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment