From f7213a21e206780c169916abd00e793d1c5e7139 Mon Sep 17 00:00:00 2001
From: Pavel Korshunov <pavel.korshunov@idiap.ch>
Date: Fri, 8 Sep 2017 16:58:33 +0200
Subject: [PATCH] tfrecord with keras

---
 bob/learn/tensorflow/test/test_keras.py       |  24 +-
 .../tensorflow/test/test_keras_tfrecord.py    | 243 ++++++++++++++++++
 2 files changed, 255 insertions(+), 12 deletions(-)
 create mode 100644 bob/learn/tensorflow/test/test_keras_tfrecord.py

diff --git a/bob/learn/tensorflow/test/test_keras.py b/bob/learn/tensorflow/test/test_keras.py
index 88ac1605..aaaefd33 100644
--- a/bob/learn/tensorflow/test/test_keras.py
+++ b/bob/learn/tensorflow/test/test_keras.py
@@ -2,18 +2,18 @@
 
 import pprint
 
-from tensorflow.contrib.keras.python.keras.models import Sequential
-from tensorflow.contrib.keras.python.keras.engine import InputLayer
-from tensorflow.contrib.keras.python.keras.layers import Dense
-from tensorflow.contrib.keras.python.keras.layers import LSTM
-from tensorflow.contrib.keras.python.keras.layers import Lambda
-from tensorflow.contrib.keras.python.keras.layers import Flatten
-from tensorflow.contrib.keras.python.keras.layers import Reshape
-
-from tensorflow.contrib.keras.python.keras.datasets import mnist
-from tensorflow.contrib.keras.python.keras.utils import np_utils
-
-from tensorflow.contrib.keras.python.keras.utils.layer_utils import print_summary
+from keras.models import Sequential
+from keras.engine import InputLayer
+from keras.layers import Dense
+from keras.layers import LSTM
+from keras.layers import Lambda
+from keras.layers import Flatten
+from keras.layers import Reshape
+
+from keras.datasets import mnist
+from keras.utils import np_utils
+
+from keras.utils.layer_utils import print_summary
 
 method = "drop-first" # See if/elif block below for explanation
 
diff --git a/bob/learn/tensorflow/test/test_keras_tfrecord.py b/bob/learn/tensorflow/test/test_keras_tfrecord.py
new file mode 100644
index 00000000..f9bb6021
--- /dev/null
+++ b/bob/learn/tensorflow/test/test_keras_tfrecord.py
@@ -0,0 +1,243 @@
+#!/usr/bin/env python
+
+from keras.models import Sequential
+from keras.engine import InputLayer
+from keras.layers import Dense
+from keras.layers import LSTM
+from keras.layers import Lambda
+from keras.layers import Flatten
+from keras.layers import Reshape
+
+from keras.datasets import mnist
+from keras.utils import np_utils
+
+from keras.utils.layer_utils import print_summary
+
+import os
+import copy
+import time
+
+import numpy as np
+
+import tensorflow as tf
+from tensorflow.python.ops import data_flow_ops
+from keras import backend as K
+from keras.models import Model
+from keras.layers import Dense
+from keras.layers import Dropout
+from keras.layers import Flatten
+from keras.layers import Input
+from keras.layers import Conv2D
+from keras.layers import MaxPooling2D
+from keras.callbacks import EarlyStopping
+from keras.callbacks import TensorBoard
+from keras.objectives import categorical_crossentropy
+from keras.utils import np_utils
+from keras.utils.generic_utils import Progbar
+from keras import callbacks as cbks
+from keras import optimizers, objectives
+from keras import metrics as metrics_module
+
+if K.backend() != 'tensorflow':
+    raise RuntimeError('This example can only run with the '
+                       'TensorFlow backend for the time being, '
+                       'because it requires TFRecords, which '
+                       'are not supported on other platforms.')
+
+
+def images_to_tfrecord(images, labels, filename):
+    def _int64_feature(value):
+        return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
+
+    def _bytes_feature(value):
+        return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
+
+    """ Save data into TFRecord """
+    if not os.path.isfile(filename):
+        num_examples = images.shape[0]
+
+        rows = images.shape[1]
+        cols = images.shape[2]
+        depth = images.shape[3]
+
+        print('Writing', filename)
+        writer = tf.python_io.TFRecordWriter(filename)
+        for index in range(num_examples):
+            image_raw = images[index].tostring()
+            example = tf.train.Example(features=tf.train.Features(feature={
+                'height': _int64_feature(rows),
+                'width': _int64_feature(cols),
+                'depth': _int64_feature(depth),
+                'label': _int64_feature(int(labels[index])),
+                'image_raw': _bytes_feature(image_raw)}))
+            writer.write(example.SerializeToString())
+        writer.close()
+    else:
+        print('tfrecord %s already exists' % filename)
+
+
+def read_and_decode_recordinput(tf_glob, one_hot=True, classes=None, is_train=None,
+                                batch_shape=[1000, 28, 28, 1], parallelism=1):
+    """ Return tensor to read from TFRecord """
+    print 'Creating graph for loading %s TFRecords...' % tf_glob
+    with tf.variable_scope("TFRecords"):
+        record_input = data_flow_ops.RecordInput(
+            tf_glob, batch_size=batch_shape[0], parallelism=parallelism)
+        records_op = record_input.get_yield_op()
+        records_op = tf.split(records_op, batch_shape[0], 0)
+        records_op = [tf.reshape(record, []) for record in records_op]
+        progbar = Progbar(len(records_op))
+
+        images = []
+        labels = []
+        for i, serialized_example in enumerate(records_op):
+            progbar.update(i)
+            with tf.variable_scope("parse_images", reuse=True):
+                features = tf.parse_single_example(
+                    serialized_example,
+                    features={
+                        'label': tf.FixedLenFeature([], tf.int64),
+                        'image_raw': tf.FixedLenFeature([], tf.string),
+                    })
+                img = tf.decode_raw(features['image_raw'], tf.uint8)
+                img.set_shape(batch_shape[1] * batch_shape[2])
+                img = tf.reshape(img, [1] + batch_shape[1:])
+
+                img = tf.cast(img, tf.float32) * (1. / 255) - 0.5
+
+                label = tf.cast(features['label'], tf.int32)
+                if one_hot and classes:
+                    label = tf.one_hot(label, classes)
+
+                images.append(img)
+                labels.append(label)
+
+        images = tf.parallel_stack(images, 0)
+        labels = tf.parallel_stack(labels, 0)
+        images = tf.cast(images, tf.float32)
+
+        images = tf.reshape(images, shape=batch_shape)
+
+        # StagingArea will store tensors
+        # across multiple steps to
+        # speed up execution
+        images_shape = images.get_shape()
+        labels_shape = labels.get_shape()
+        copy_stage = data_flow_ops.StagingArea(
+            [tf.float32, tf.float32],
+            shapes=[images_shape, labels_shape])
+        copy_stage_op = copy_stage.put(
+            [images, labels])
+        staged_images, staged_labels = copy_stage.get()
+
+        return images, labels
+
+
+def save_mnist_as_tfrecord():
+    (X_train, y_train), (X_test, y_test) = mnist.load_data()
+    X_train = X_train[..., np.newaxis]
+    X_test = X_test[..., np.newaxis]
+    images_to_tfrecord(images=X_train, labels=y_train, filename='train.mnist.tfrecord')
+    images_to_tfrecord(images=X_test, labels=y_test, filename='test.mnist.tfrecord')
+
+
+def lstm_layers(x_train_input, n_hidden, n_drop_first, n_new_steps, n_classes):
+    x = LSTM(n_hidden, input_shape=(28, 28), return_sequences=True)(x_train_input)
+    x = Lambda(lambda k: k[:, n_drop_first:, :])(x)
+    x = Reshape((n_hidden*n_new_steps,), input_shape=(n_new_steps, n_hidden))(x)
+    x_train_out = Dense(n_classes, activation="softmax",
+                        name='x_train_out')(x)
+
+    return x_train_out
+
+
+
+def main(argv=None):
+
+    sess = tf.Session()
+    K.set_session(sess)
+
+    save_mnist_as_tfrecord()
+
+    n_epochs = 2
+    n_hidden = 32  # Inside the LSTM cell
+    n_drop_first = 2  # Number of first output to drop after LSTM
+    classes = 10
+    parallelism = 10
+    batch_size = 100
+    batch_shape = [batch_size, 28, 28, 1]
+
+
+    x_train_batch, y_train_batch = read_and_decode_recordinput(
+        'train.mnist.tfrecord',
+        one_hot=True,
+        classes=classes,
+        is_train=True,
+        batch_shape=batch_shape,
+        parallelism=parallelism)
+
+    x_test_batch, y_test_batch = read_and_decode_recordinput(
+        'test.mnist.tfrecord',
+        one_hot=True,
+        classes=classes,
+        is_train=True,
+        batch_shape=batch_shape,
+        parallelism=parallelism)
+
+    x_batch_shape = x_train_batch.get_shape().as_list()
+    y_batch_shape = y_train_batch.get_shape().as_list()
+
+    print("Train data {}".format(x_batch_shape))
+    print("Train labels {}".format(y_batch_shape))
+
+    x_train_input = Input(tensor=x_train_batch, batch_shape=x_batch_shape)
+    n_steps = batch_shape[0]
+    n_new_steps = n_steps - n_drop_first
+
+    x_train_out = lstm_layers(x_train_input, n_hidden, n_drop_first, n_new_steps, classes)
+
+    y_train_in_out = Input(tensor=y_train_batch, batch_shape=y_batch_shape, name='y_labels')
+    cce = categorical_crossentropy(y_train_batch, x_train_out)
+
+    # LSTM
+    model = Sequential(inputs=[x_train_input], outputs=[x_train_out])
+    model.add_loss(cce)
+
+    ######################################################################
+
+    print_summary(model)
+
+    model.compile(optimizer="adam",
+                  loss="categorical_crossentropy",
+                  metrics=["accuracy"])
+    model.summary()
+    tensorboard = TensorBoard()
+    # tensorboard disabled due to Keras bug
+    model.fit(batch_size=batch_size,
+              epochs=n_epochs)  # callbacks=[tensorboard])
+
+    model.save_weights('saved_wt.h5')
+
+    K.clear_session()
+
+    # Second Session, pure Keras
+    (X_train, y_train), (X_test, y_test) = mnist.load_data()
+    X_train = X_train[..., np.newaxis]
+    X_test = X_test[..., np.newaxis]
+    x_test_inp = Input(batch_shape=(None,) + (X_test.shape[1:]))
+    test_out = lstm_layers(x_test_inp, n_hidden, n_drop_first, n_new_steps, classes)
+    test_model = Model(inputs=x_test_inp, outputs=test_out)
+
+    test_model.load_weights('saved_wt.h5')
+    test_model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])
+    test_model.summary()
+
+    loss, acc = test_model.evaluate(X_test, np_utils.to_categorical(y_test), classes)
+    print('\nTest accuracy: {0}'.format(acc))
+
+# for layer in model.layers:
+#     print("{} {}".format(layer.name, model.get_layer(layer.name).output.shape))
+
+
+if __name__ == '__main__':
+    main()
-- 
GitLab