sequence tfrecord reader and test case

70e380b1 · Pavel KORSHUNOV · 5b4b3962 · 70e380b1 · 70e380b1 · 70e380b1
Commit 70e380b1 authored 7 years ago by Pavel KORSHUNOV
--- a/bob/learn/tensorflow/datashuffler/TFRecord.py
+++ b/bob/learn/tensorflow/datashuffler/TFRecord.py
@@ -11,14 +11,13 @@ from bob.learn.tensorflow.datashuffler.Normalizer import Linear
 class TFRecord(object):
+    def __init__(self, filename_queue,
-    def __init__(self,filename_queue,
+                 input_shape=[None, 28, 28, 1],
-                         input_shape=[None, 28, 28, 1],
+                 input_dtype="float32",
-                         input_dtype="float32",
+                 batch_size=32,
-                         batch_size=32,
+                 seed=10,
-                         seed=10,
+                 prefetch_capacity=50,
-                         prefetch_capacity=50,
+                 prefetch_threads=5):
-                         prefetch_threads=5):
        # Setting the seed for the pseudo random number generator
        self.seed = seed
@@ -37,7 +36,7 @@ class TFRecord(object):
        self.prefetch = True
        self.prefetch_capacity = prefetch_capacity
        self.prefetch_threads = prefetch_threads
        self.data_ph = None
        self.label_ph = None
@@ -59,7 +58,6 @@ class TFRecord(object):
        else:
            return self.label_ph
    def create_placeholders(self):
        feature = {'train/data': tf.FixedLenFeature([], tf.string),
@@ -67,32 +65,28 @@ class TFRecord(object):
        # Define a reader and read the next record
        reader = tf.TFRecordReader()
        _, serialized_example = reader.read(self.filename_queue)
        # Decode the record read by the reader
        features = tf.parse_single_example(serialized_example, features=feature)
        # Convert the image data from string back to the numbers
        image = tf.decode_raw(features['train/data'], tf.float32)
        # Cast label data into int32
        label = tf.cast(features['train/label'], tf.int64)
        # Reshape image data into the original shape
        image = tf.reshape(image, self.input_shape[1:])
        data_ph, label_ph = tf.train.shuffle_batch([image, label], batch_size=self.batch_size,
-                         capacity=self.prefetch_capacity, num_threads=self.prefetch_threads,
+                                                   capacity=self.prefetch_capacity, num_threads=self.prefetch_threads,
-                         min_after_dequeue=1, name="shuffle_batch")
+                                                   min_after_dequeue=1, name="shuffle_batch")
        self.data_ph = data_ph
        self.label_ph = label_ph
    def get_batch(self):
        """
        Shuffle the Memory dataset and get a random batch.
@@ -107,4 +101,3 @@ class TFRecord(object):
        """
        pass
--- a/bob/learn/tensorflow/datashuffler/TFRecordSequence.py
+++ b/bob/learn/tensorflow/datashuffler/TFRecordSequence.py
+#!/usr/bin/env python
+# vim: set fileencoding=utf-8 :
+# @author: Tiago de Freitas Pereira <tiago.pereira@idiap.ch>
+# @date: Wed 11 May 2016 09:39:36 CEST 
+import numpy
+import tensorflow as tf
+import bob.ip.base
+import numpy
+from bob.learn.tensorflow.datashuffler.TFRecord import TFRecord
+class TFRecordSequence(TFRecord):
+    def __init__(self, filename_queue,
+                 input_shape=[None, 28, 28, 1],
+                 input_dtype="float32",
+                 batch_size=32,
+                 seed=10,
+                 prefetch_capacity=50,
+                 prefetch_threads=5,
+                 sliding_win_len=5,
+                 sliding_win_step=1,
+                 min_after_dequeue=1):
+        super(TFRecordSequence, self).__init__(filename_queue, input_shape, input_dtype,
+                                               batch_size, seed, prefetch_capacity, prefetch_threads)
+        self.min_after_dequeue = min_after_dequeue
+        self.sliding_win_len = sliding_win_len
+        self.sliding_win_step = sliding_win_step
+        # sliding_win_shape = [-1, sliding_win_len] + list(input_shape[2:])
+        self.sliding_win_shape = [-1, self.sliding_win_len] + list(self.input_shape[2:])
+        # we assume that the second dimension is the temporal axis
+        # so, the input data is of shape (batch_size, temporal_size, features, ....)
+        # self.temporal_sequence_length = input_shape[1]
+        #
+        # # pre-compute number and shape of sliding windows
+        # num_sliding_wins = (temporal_sequence_length - sliding_win_len) // sliding_win_step + 1
+        # max_win_index = temporal_sequence_length - sliding_win_len + 1  # used in range function
+    def generate_sliding_wins(self, data_ph, label_ph):
+        """
+        Assuming, the input is a temporal sequence, create a set of sliding windows
+        :param data_ph: placeholder for data of input shape (batch_size, temporal_size, features_size, ...)
+        :param label_ph: placeholder for label, assumed to be constant
+        :return: sliding windows generated from sequence in data_ph and the corresponding array of labels
+        """
+        # Generate sliding windows
+        print('inputs', data_ph)
+        # we assume that the second dimension is the temporal axis
+        # so, the input data is of shape (batch_size, temporal_size, features, ....)
+        temporal_sequence_length = tf.shape(data_ph)[1]
+        # pre-compute number and shape of sliding windows
+        num_sliding_wins = (temporal_sequence_length - self.sliding_win_len) // self.sliding_win_step + 1
+        max_win_index = temporal_sequence_length - self.sliding_win_len + 1  # used in range function
+        # create sliding windows
+        # data_sliding_wins = tf.map_fn(lambda i: data_ph[:, i:i + sliding_win_len], tf.range(0, max_win_index, sliding_win_step), dtype=tf.float32)
+        # note that map_fn first applies lamda operation for i=0 to all batches, then for i=1, etc.,
+        # so the final order of sliding window is not the one we need
+        # we want first all windows for zeroth batch, then for first batch, etc. Hence, we re-arrange them later
+        data_sliding_wins = tf.map_fn(lambda i: data_ph[:, i:i + self.sliding_win_len],
+                                      tf.range(0, max_win_index, self.sliding_win_step),
+                                      dtype=tf.float32)
+        # make them of a correct shape
+        # data_sliding_wins = tf.reshape(data_sliding_wins, sliding_win_shape)
+        data_sliding_wins = tf.reshape(data_sliding_wins, self.sliding_win_shape)
+        # since tf.map_fn returns sliding windows in a wrong order, we need to re-arrange the windows
+        # we take even blocks first, then odd, and then concatenate them
+        # data_sliding_wins_even = data_sliding_wins[::2]
+        # data_sliding_wins_odd = data_sliding_wins[1::2]
+        # data_sliding_wins = tf.concat([data_sliding_wins_even, data_sliding_wins_odd], 0)
+        print('data_sliding_wins: ', data_sliding_wins)
+        # correctly duplicate labels (the three operations emulate what numpy.repeat does)
+        # label_sliding_win = tf.reshape(label_ph, [-1, 1])  # convert to len(label_ph) x 1 matrix.
+        label_sliding_win = tf.tile(label_ph, [num_sliding_wins])  # Create multiple columns.
+        # label_sliding_win = tf.tile(label_sliding_win, [1, num_sliding_wins])  # Create multiple columns.
+        # label_sliding_win = tf.reshape(label_sliding_win, [-1])  # Convert back to a vector.
+        print('label_sliding_win: ', label_sliding_win)
+        return data_sliding_wins, label_sliding_win
+    def create_placeholders(self):
+        feature = {'train/data': tf.FixedLenFeature([], tf.string),
+                   'train/label': tf.FixedLenFeature([], tf.int64)}
+        # Define a reader and read the next record
+        reader = tf.TFRecordReader()
+        _, serialized_example = reader.read(self.filename_queue)
+        # Decode the record read by the reader
+        features = tf.parse_single_example(serialized_example, features=feature)
+        # Convert the data from string back to the numbers
+        data = tf.decode_raw(features['train/data'], tf.float32)
+        print("data: ", data)
+        # Cast label into int32
+        label = tf.cast(features['train/label'], tf.int64)
+        print("label: ", label)
+        # Reshape data into the original shape
+        data = tf.reshape(data, self.input_shape[1:])
+        # get the placeholders from shuffle_batch
+        data_ph, label_ph = tf.train.shuffle_batch([data, label],
+                                                   batch_size=self.batch_size,
+                                                   capacity=self.prefetch_capacity,
+                                                   num_threads=self.prefetch_threads,
+                                                   min_after_dequeue=self.min_after_dequeue,
+                                                   name="shuffle_batch", seed=self.seed
+                                                   )
+        # data_ph, label_ph = tf.train.batch([data, label], batch_size=self.batch_size,
+        #                                            capacity=self.prefetch_capacity, num_threads=self.prefetch_threads,
+        #                                            name="simple_batch", enqueue_many=False)
+        print("data_ph: ", data_ph)
+        print("label_ph: ", label_ph)
+        self.data_ph, self.label_ph = self.generate_sliding_wins(data_ph, label_ph)
--- a/bob/learn/tensorflow/datashuffler/__init__.py
+++ b/bob/learn/tensorflow/datashuffler/__init__.py
@@ -22,7 +22,7 @@ from .Normalizer import ScaleFactor, MeanOffset, Linear
 from .DiskAudio import DiskAudio
 from .TFRecord import TFRecord
+from .TFRecordSequence import TFRecordSequence
 # gets sphinx autodoc done right - don't remove it
 def __appropriate__(*args):
  """Says object was actually declared here, an not on the import module.
@@ -55,5 +55,7 @@ __appropriate__(
    ImageAugmentation,
    ScaleFactor, MeanOffset, Linear,
    DiskAudio,
+    TFRecord,
+    TFRecordSequence,
    )
 __all__ = [_ for _ in dir() if not _.startswith('_')]
--- a/bob/learn/tensorflow/test/test_tfrecord_sequence.py
+++ b/bob/learn/tensorflow/test/test_tfrecord_sequence.py
+#!/usr/bin/env python
+# vim: set fileencoding=utf-8 :
+# @author: Pavel Korshunov <pavel.korshunov@idiap.ch>
+# @date: Wed 13 Sep 2017 13:22 CEST
+import numpy
+from bob.learn.tensorflow.datashuffler import Memory, ScaleFactor, TFRecordSequence
+from bob.learn.tensorflow.loss import MeanSoftMaxLoss
+from bob.learn.tensorflow.trainers import Trainer, constant
+from bob.learn.tensorflow.utils import load_mnist
+from bob.learn.tensorflow.layers import lstm
+import tensorflow as tf
+import shutil
+import os
+import logging
+logger = logging.getLogger("bob.project.savi")
+slim = tf.contrib.slim
+def scratch_lstm_network(train_data_shuffler, batch_size=10, num_time_steps=28, num_classes=10, seed=10, reuse=False):
+    inputs = train_data_shuffler("data", from_queue=False)
+    lstm_cell_size = 64
+    initializer = tf.contrib.layers.xavier_initializer(seed=seed)
+    # Creating an LSTM network
+    graph = lstm(inputs, lstm_cell_size, num_time_steps=num_time_steps, batch_size=batch_size,
+                 output_activation_size=lstm_cell_size, scope='lstm',
+                 weights_initializer=initializer, activation=tf.nn.relu, reuse=reuse)
+    # fully connect the LSTM output to the classes
+    graph = slim.fully_connected(graph, num_classes, activation_fn=None, scope='fc1',
+                                 weights_initializer=initializer, reuse=reuse)
+    return graph
+def validate_network(embedding, validation_data, validation_labels,
+                     input_shape=[None, 28, 28, 1], validation_batch_size=10,
+                     normalizer=ScaleFactor()):
+    # Testing
+    validation_data_shuffler = Memory(validation_data, validation_labels,
+                                      input_shape=input_shape,
+                                      batch_size=validation_batch_size,
+                                      normalizer=normalizer)
+    [data, labels] = validation_data_shuffler.get_batch()
+    predictions = embedding(data)
+    accuracy = 100. * numpy.sum(numpy.argmax(predictions, axis=1) == labels) / predictions.shape[0]
+    logger.info("Validation accuracy = {0}".format(accuracy))
+    return accuracy
+def create_tf_record(tfrecords_filename, train=True):
+    def _bytes_feature(value):
+        return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
+    def _int64_feature(value):
+        return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
+    # tf.reset_default_graph()
+    data, labels, validation_data, validation_labels = load_mnist()
+    data = data.astype("float32") * 0.00390625
+    validation_data = validation_data.astype("float32") * 0.00390625
+    if not train:
+        data = validation_data
+        labels = validation_labels
+    writer = tf.python_io.TFRecordWriter(tfrecords_filename)
+    # for i in range(train_data.shape[0]):
+    for i in range(6000):
+        img = data[i]
+        img_raw = img.tostring()
+        feature = {'train/data': _bytes_feature(img_raw),
+                   'train/label': _int64_feature(labels[i])
+                   }
+        example = tf.train.Example(features=tf.train.Features(feature=feature))
+        writer.write(example.SerializeToString())
+    writer.close()
+def test_tfrecord_sequence():
+    # define constants that describe data and define experiments
+    num_time_steps = 28
+    feature_size = 28
+    sliding_win_len = 26  # we feed 26 values into LSTM
+    sliding_win_step = 2  # two sliding windows our of 28 values
+    batch_size = 5
+    validation_batch_size = 1
+    input_shape = [None, num_time_steps, feature_size, 1]
+    iterations = 500
+    seed = 10
+    directory = "./temp/lstm_scratch"
+    num_classes = 10
+    # Creating the tf record
+    tfrecords_filename = "mnist_train.tfrecords"
+    create_tf_record(tfrecords_filename)
+    filename_queue = tf.train.string_input_producer([tfrecords_filename], num_epochs=15, name="input")
+    tfrecords_filename_val = "mnist_validation.tfrecords"
+    create_tf_record(tfrecords_filename_val, train=False)
+    filename_queue_val = tf.train.string_input_producer([tfrecords_filename_val], num_epochs=15,
+                                                        name="input_validation")
+    # Creating TFRecord
+    train_data_shuffler = TFRecordSequence(filename_queue=filename_queue,
+                                           input_shape=input_shape,
+                                           batch_size=batch_size,
+                                           prefetch_threads=1,
+                                           prefetch_capacity=50,
+                                           sliding_win_len=sliding_win_len,
+                                           sliding_win_step=sliding_win_step,
+                                           min_after_dequeue=1)
+    validation_data_shuffler = TFRecordSequence(filename_queue=filename_queue_val,
+                                                input_shape=input_shape,
+                                                batch_size=validation_batch_size,
+                                                prefetch_threads=1,
+                                                prefetch_capacity=50,
+                                                sliding_win_len=sliding_win_len,
+                                                sliding_win_step=sliding_win_step,
+                                                min_after_dequeue=1)
+    num_sliding_wins = (num_time_steps - sliding_win_len) // sliding_win_step + 1
+    # after we generate sliding windows, the num_time_steps is the same as sliding windwos length
+    num_time_steps = sliding_win_len
+    graph = scratch_lstm_network(train_data_shuffler,
+                                 batch_size=num_sliding_wins * batch_size,
+                                 num_time_steps=num_time_steps,
+                                 seed=seed,
+                                 num_classes=num_classes)
+    validation_graph = scratch_lstm_network(validation_data_shuffler,
+                                            batch_size=num_sliding_wins * validation_batch_size,
+                                            num_time_steps=num_time_steps,
+                                            seed=seed,
+                                            num_classes=num_classes,
+                                            reuse=True)
+    # Setting the placeholders
+    # Loss for the softmax
+    loss = MeanSoftMaxLoss()
+    # One graph trainer
+    trainer = Trainer(train_data_shuffler,
+                      validation_data_shuffler=None,
+                      iterations=iterations,  # It is supper fast
+                      analizer=None,
+                      temp_dir=directory)
+    learning_rate = constant(0.001, name="regular_lr")
+    trainer.create_network_from_scratch(graph=graph,
+                                        validation_graph=validation_graph,
+                                        loss=loss,
+                                        learning_rate=learning_rate,
+                                        optimizer=tf.train.AdamOptimizer(learning_rate),
+                                        )
+    trainer.train()
+    os.remove(tfrecords_filename)
+    os.remove(tfrecords_filename_val)
+    assert True
+    tf.reset_default_graph()
+    del trainer
+    assert len(tf.global_variables()) == 0