Commit 3dcbd6af authored by Tiago de Freitas Pereira's avatar Tiago de Freitas Pereira
Browse files

Created mechanism that loads data from bob's HDF5 and it converts to tensor

parent 1f16cf5c
Pipeline #13598 failed with stages
in 22 minutes and 51 seconds
import tensorflow as tf
import numpy
import os
import bob.io.base
import bob.io.image
DEFAULT_FEATURE = {'data': tf.FixedLenFeature([], tf.string),
'label': tf.FixedLenFeature([], tf.int64),
'key': tf.FixedLenFeature([], tf.string)}
def from_hdf5file_to_tensor(filename):
data = bob.io.image.to_matplotlib(bob.io.base.load(filename))
#reshaping to ndim == 3
if data.ndim == 2:
data = numpy.reshape(data, (data.shape[0], data.shape[1], 1))
data = data.astype("float32")
data = numpy.zeros((160, 160, 1), dtype="float32")
return data
def from_filename_to_tensor(filename, extension=None):
"""
Read a file and it convert it to tensor.
If the file extension is something that tensorflow understands (.jpg, .bmp, .tif,...),
it uses the `tf.image.decode_image` otherwise it uses `bob.io.base.load`
"""
if extension == "hdf5":
return tf.py_func(from_hdf5file_to_tensor, [filename], [tf.float32])
else:
return tf.cast(tf.image.decode_image(tf.read_file(filename)), tf.float32)
def append_image_augmentation(image, gray_scale=False,
output_shape=None,
random_flip=False,
......
......@@ -4,7 +4,7 @@
import tensorflow as tf
from functools import partial
from . import append_image_augmentation
from . import append_image_augmentation, from_filename_to_tensor
def shuffle_data_and_labels_image_augmentation(filenames, labels, data_shape, data_type,
......@@ -15,14 +15,14 @@ def shuffle_data_and_labels_image_augmentation(filenames, labels, data_shape, da
random_brightness=False,
random_contrast=False,
random_saturation=False,
per_image_normalization=True):
per_image_normalization=True,
extension=None):
"""
Dump random batches from a list of image paths and labels:
The list of files and labels should be in the same order e.g.
filenames = ['class_1_img1', 'class_1_img2', 'class_2_img1']
labels = [0, 0, 1]
**Parameters**
......@@ -66,7 +66,10 @@ def shuffle_data_and_labels_image_augmentation(filenames, labels, data_shape, da
Adjust the saturation of an RGB image by a random factor (https://www.tensorflow.org/api_docs/python/tf/image/random_saturation)
per_image_normalization:
Linearly scales image to have zero mean and unit norm.
Linearly scales image to have zero mean and unit norm.
extension:
If None, will load files using `tf.image.decode..` if set to `hdf5`, will load with `bob.io.base.load`
"""
......@@ -78,7 +81,8 @@ def shuffle_data_and_labels_image_augmentation(filenames, labels, data_shape, da
random_brightness=random_brightness,
random_contrast=random_contrast,
random_saturation=random_saturation,
per_image_normalization=per_image_normalization)
per_image_normalization=per_image_normalization,
extension=extension)
dataset = dataset.shuffle(buffer_size).batch(batch_size).repeat(epochs)
......@@ -94,7 +98,8 @@ def create_dataset_from_path_augmentation(filenames, labels,
random_brightness=False,
random_contrast=False,
random_saturation=False,
per_image_normalization=True):
per_image_normalization=True,
extension=None):
"""
Create dataset from a list of tf-record files
......@@ -125,7 +130,8 @@ def create_dataset_from_path_augmentation(filenames, labels,
random_brightness=random_brightness,
random_contrast=random_contrast,
random_saturation=random_saturation,
per_image_normalization=per_image_normalization)
per_image_normalization=per_image_normalization,
extension=extension)
dataset = tf.contrib.data.Dataset.from_tensor_slices((filenames, labels))
dataset = dataset.map(parser)
......@@ -139,14 +145,15 @@ def image_augmentation_parser(filename, label, data_shape, data_type,
random_brightness=False,
random_contrast=False,
random_saturation=False,
per_image_normalization=True):
per_image_normalization=True,
extension=None):
"""
Parses a single tf.Example into image and label tensors.
"""
# Convert the image data from string back to the numbers
image = tf.cast(tf.image.decode_image(tf.read_file(filename)), tf.float32)
image = from_filename_to_tensor(filename, extension=extension)
# Reshape image data into the original shape
image = tf.reshape(image, data_shape)
......
......@@ -4,7 +4,7 @@
import tensorflow as tf
from functools import partial
from . import append_image_augmentation, siamease_pairs_generator
from . import append_image_augmentation, siamease_pairs_generator, from_filename_to_tensor
def shuffle_data_and_labels_image_augmentation(filenames, labels, data_shape, data_type,
......@@ -15,7 +15,8 @@ def shuffle_data_and_labels_image_augmentation(filenames, labels, data_shape, da
random_brightness=False,
random_contrast=False,
random_saturation=False,
per_image_normalization=True):
per_image_normalization=True,
extension=None):
"""
Dump random batches for siamese networks from a list of image paths and labels:
......@@ -72,8 +73,9 @@ def shuffle_data_and_labels_image_augmentation(filenames, labels, data_shape, da
per_image_normalization:
Linearly scales image to have zero mean and unit norm.
extension:
If None, will load files using `tf.image.decode..` if set to `hdf5`, will load with `bob.io.base.load`
"""
dataset = create_dataset_from_path_augmentation(filenames, labels, data_shape,
......@@ -84,11 +86,10 @@ def shuffle_data_and_labels_image_augmentation(filenames, labels, data_shape, da
random_brightness=random_brightness,
random_contrast=random_contrast,
random_saturation=random_saturation,
per_image_normalization=per_image_normalization)
per_image_normalization=per_image_normalization,
extension=extension)
dataset = dataset.shuffle(buffer_size).batch(batch_size).repeat(epochs)
#dataset = dataset.batch(buffer_size).batch(batch_size).repeat(epochs)
data, labels = dataset.make_one_shot_iterator().get_next()
return data, labels
......@@ -101,7 +102,8 @@ def create_dataset_from_path_augmentation(filenames, labels,
random_brightness=False,
random_contrast=False,
random_saturation=False,
per_image_normalization=True):
per_image_normalization=True,
extension=None):
"""
Create dataset from a list of tf-record files
......@@ -118,8 +120,39 @@ def create_dataset_from_path_augmentation(filenames, labels,
data_type:
tf data type(https://www.tensorflow.org/versions/r0.12/resources/dims_types#data_types)
batch_size:
Size of the batch
feature:
epochs:
Number of epochs to be batched
buffer_size:
Size of the shuffle bucket
gray_scale:
Convert to gray scale?
output_shape:
If set, will randomly crop the image given the output shape
random_flip:
Randomly flip an image horizontally (https://www.tensorflow.org/api_docs/python/tf/image/random_flip_left_right)
random_brightness:
Adjust the brightness of an RGB image by a random factor (https://www.tensorflow.org/api_docs/python/tf/image/random_brightness)
random_contrast:
Adjust the contrast of an RGB image by a random factor (https://www.tensorflow.org/api_docs/python/tf/image/random_contrast)
random_saturation:
Adjust the saturation of an RGB image by a random factor (https://www.tensorflow.org/api_docs/python/tf/image/random_saturation)
per_image_normalization:
Linearly scales image to have zero mean and unit norm.
extension:
If None, will load files using `tf.image.decode..` if set to `hdf5`, will load with `bob.io.base.load`
"""
......@@ -132,7 +165,8 @@ def create_dataset_from_path_augmentation(filenames, labels,
random_brightness=random_brightness,
random_contrast=random_contrast,
random_saturation=random_saturation,
per_image_normalization=per_image_normalization)
per_image_normalization=per_image_normalization,
extension=extension)
left_data, right_data, siamese_labels = siamease_pairs_generator(filenames, labels)
dataset = tf.contrib.data.Dataset.from_tensor_slices((left_data, right_data, siamese_labels))
......@@ -147,15 +181,16 @@ def image_augmentation_parser(filename_left, filename_right, label, data_shape,
random_brightness=False,
random_contrast=False,
random_saturation=False,
per_image_normalization=True):
per_image_normalization=True,
extension=None):
"""
Parses a single tf.Example into image and label tensors.
"""
# Convert the image data from string back to the numbers
image_left = tf.cast(tf.image.decode_image(tf.read_file(filename_left)), tf.float32)
image_right = tf.cast(tf.image.decode_image(tf.read_file(filename_right)), tf.float32)
image_left = from_filename_to_tensor(filename_left, extension=extension)
image_right = from_filename_to_tensor(filename_right, extension=extension)
# Reshape image data into the original shape
image_left = tf.reshape(image_left, data_shape)
......
......@@ -4,7 +4,7 @@
import tensorflow as tf
from functools import partial
from . import append_image_augmentation, triplets_random_generator
from . import append_image_augmentation, triplets_random_generator, from_filename_to_tensor
def shuffle_data_and_labels_image_augmentation(filenames, labels, data_shape, data_type,
......@@ -15,7 +15,8 @@ def shuffle_data_and_labels_image_augmentation(filenames, labels, data_shape, da
random_brightness=False,
random_contrast=False,
random_saturation=False,
per_image_normalization=True):
per_image_normalization=True,
extension=None):
"""
Dump random batches for triplee networks from a list of image paths and labels:
......@@ -69,8 +70,10 @@ def shuffle_data_and_labels_image_augmentation(filenames, labels, data_shape, da
Adjust the saturation of an RGB image by a random factor (https://www.tensorflow.org/api_docs/python/tf/image/random_saturation)
per_image_normalization:
Linearly scales image to have zero mean and unit norm.
Linearly scales image to have zero mean and unit norm.
extension:
If None, will load files using `tf.image.decode..` if set to `hdf5`, will load with `bob.io.base.load`
"""
......@@ -82,7 +85,8 @@ def shuffle_data_and_labels_image_augmentation(filenames, labels, data_shape, da
random_brightness=random_brightness,
random_contrast=random_contrast,
random_saturation=random_saturation,
per_image_normalization=per_image_normalization)
per_image_normalization=per_image_normalization,
extension=extension)
dataset = dataset.shuffle(buffer_size).batch(batch_size).repeat(epochs)
#dataset = dataset.batch(buffer_size).batch(batch_size).repeat(epochs)
......@@ -99,7 +103,8 @@ def create_dataset_from_path_augmentation(filenames, labels,
random_brightness=False,
random_contrast=False,
random_saturation=False,
per_image_normalization=True):
per_image_normalization=True,
extension=None):
"""
Create dataset from a list of tf-record files
......@@ -130,7 +135,8 @@ def create_dataset_from_path_augmentation(filenames, labels,
random_brightness=random_brightness,
random_contrast=random_contrast,
random_saturation=random_saturation,
per_image_normalization=per_image_normalization)
per_image_normalization=per_image_normalization,
extension=extension)
anchor_data, positive_data, negative_data = triplets_random_generator(filenames, labels)
......@@ -146,7 +152,8 @@ def image_augmentation_parser(anchor, positive, negative, data_shape, data_type=
random_brightness=False,
random_contrast=False,
random_saturation=False,
per_image_normalization=True):
per_image_normalization=True,
extension=None):
"""
Parses a single tf.Example into image and label tensors.
......@@ -156,7 +163,7 @@ def image_augmentation_parser(anchor, positive, negative, data_shape, data_type=
for n, v in zip(['anchor', 'positive', 'negative'], [anchor, positive, negative]):
# Convert the image data from string back to the numbers
image = tf.cast(tf.image.decode_image(tf.read_file(v)), data_type)
image = from_filename_to_tensor(v, extension=extension)
# Reshape image data into the original shape
image = tf.reshape(image, data_shape)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment