Commit 467d4d05 authored by Tiago de Freitas Pereira's avatar Tiago de Freitas Pereira

Merge branch 'hdf5-loading' into 'master'

Hdf5 loading

See merge request !30
parents a01b861f 04dad265
Pipeline #13732 failed with stages
in 44 minutes and 39 seconds
import tensorflow as tf
import numpy
import os
import bob.io.base
DEFAULT_FEATURE = {'data': tf.FixedLenFeature([], tf.string),
'label': tf.FixedLenFeature([], tf.int64),
'key': tf.FixedLenFeature([], tf.string)}
def from_hdf5file_to_tensor(filename):
import bob.io.image
data = bob.io.image.to_matplotlib(bob.io.base.load(filename))
#reshaping to ndim == 3
if data.ndim == 2:
data = numpy.reshape(data, (data.shape[0], data.shape[1], 1))
data = data.astype("float32")
return data
def from_filename_to_tensor(filename, extension=None):
"""
Read a file and it convert it to tensor.
If the file extension is something that tensorflow understands (.jpg, .bmp, .tif,...),
it uses the `tf.image.decode_image` otherwise it uses `bob.io.base.load`
"""
if extension == "hdf5":
return tf.py_func(from_hdf5file_to_tensor, [filename], [tf.float32])
else:
return tf.cast(tf.image.decode_image(tf.read_file(filename)), tf.float32)
def append_image_augmentation(image, gray_scale=False,
output_shape=None,
random_flip=False,
......
......@@ -4,7 +4,7 @@
import tensorflow as tf
from functools import partial
from . import append_image_augmentation
from . import append_image_augmentation, from_filename_to_tensor
def shuffle_data_and_labels_image_augmentation(filenames, labels, data_shape, data_type,
......@@ -15,14 +15,14 @@ def shuffle_data_and_labels_image_augmentation(filenames, labels, data_shape, da
random_brightness=False,
random_contrast=False,
random_saturation=False,
per_image_normalization=True):
per_image_normalization=True,
extension=None):
"""
Dump random batches from a list of image paths and labels:
The list of files and labels should be in the same order e.g.
filenames = ['class_1_img1', 'class_1_img2', 'class_2_img1']
labels = [0, 0, 1]
**Parameters**
......@@ -66,7 +66,10 @@ def shuffle_data_and_labels_image_augmentation(filenames, labels, data_shape, da
Adjust the saturation of an RGB image by a random factor (https://www.tensorflow.org/api_docs/python/tf/image/random_saturation)
per_image_normalization:
Linearly scales image to have zero mean and unit norm.
Linearly scales image to have zero mean and unit norm.
extension:
If None, will load files using `tf.image.decode..` if set to `hdf5`, will load with `bob.io.base.load`
"""
......@@ -78,7 +81,8 @@ def shuffle_data_and_labels_image_augmentation(filenames, labels, data_shape, da
random_brightness=random_brightness,
random_contrast=random_contrast,
random_saturation=random_saturation,
per_image_normalization=per_image_normalization)
per_image_normalization=per_image_normalization,
extension=extension)
dataset = dataset.shuffle(buffer_size).batch(batch_size).repeat(epochs)
......@@ -94,7 +98,8 @@ def create_dataset_from_path_augmentation(filenames, labels,
random_brightness=False,
random_contrast=False,
random_saturation=False,
per_image_normalization=True):
per_image_normalization=True,
extension=None):
"""
Create dataset from a list of tf-record files
......@@ -125,7 +130,8 @@ def create_dataset_from_path_augmentation(filenames, labels,
random_brightness=random_brightness,
random_contrast=random_contrast,
random_saturation=random_saturation,
per_image_normalization=per_image_normalization)
per_image_normalization=per_image_normalization,
extension=extension)
dataset = tf.contrib.data.Dataset.from_tensor_slices((filenames, labels))
dataset = dataset.map(parser)
......@@ -139,14 +145,15 @@ def image_augmentation_parser(filename, label, data_shape, data_type,
random_brightness=False,
random_contrast=False,
random_saturation=False,
per_image_normalization=True):
per_image_normalization=True,
extension=None):
"""
Parses a single tf.Example into image and label tensors.
"""
# Convert the image data from string back to the numbers
image = tf.cast(tf.image.decode_image(tf.read_file(filename)), tf.float32)
image = from_filename_to_tensor(filename, extension=extension)
# Reshape image data into the original shape
image = tf.reshape(image, data_shape)
......
......@@ -4,7 +4,7 @@
import tensorflow as tf
from functools import partial
from . import append_image_augmentation, siamease_pairs_generator
from . import append_image_augmentation, siamease_pairs_generator, from_filename_to_tensor
def shuffle_data_and_labels_image_augmentation(filenames, labels, data_shape, data_type,
......@@ -15,7 +15,8 @@ def shuffle_data_and_labels_image_augmentation(filenames, labels, data_shape, da
random_brightness=False,
random_contrast=False,
random_saturation=False,
per_image_normalization=True):
per_image_normalization=True,
extension=None):
"""
Dump random batches for siamese networks from a list of image paths and labels:
......@@ -72,8 +73,9 @@ def shuffle_data_and_labels_image_augmentation(filenames, labels, data_shape, da
per_image_normalization:
Linearly scales image to have zero mean and unit norm.
extension:
If None, will load files using `tf.image.decode..` if set to `hdf5`, will load with `bob.io.base.load`
"""
dataset = create_dataset_from_path_augmentation(filenames, labels, data_shape,
......@@ -84,11 +86,10 @@ def shuffle_data_and_labels_image_augmentation(filenames, labels, data_shape, da
random_brightness=random_brightness,
random_contrast=random_contrast,
random_saturation=random_saturation,
per_image_normalization=per_image_normalization)
per_image_normalization=per_image_normalization,
extension=extension)
dataset = dataset.shuffle(buffer_size).batch(batch_size).repeat(epochs)
#dataset = dataset.batch(buffer_size).batch(batch_size).repeat(epochs)
data, labels = dataset.make_one_shot_iterator().get_next()
return data, labels
......@@ -101,7 +102,8 @@ def create_dataset_from_path_augmentation(filenames, labels,
random_brightness=False,
random_contrast=False,
random_saturation=False,
per_image_normalization=True):
per_image_normalization=True,
extension=None):
"""
Create dataset from a list of tf-record files
......@@ -118,8 +120,39 @@ def create_dataset_from_path_augmentation(filenames, labels,
data_type:
tf data type(https://www.tensorflow.org/versions/r0.12/resources/dims_types#data_types)
batch_size:
Size of the batch
feature:
epochs:
Number of epochs to be batched
buffer_size:
Size of the shuffle bucket
gray_scale:
Convert to gray scale?
output_shape:
If set, will randomly crop the image given the output shape
random_flip:
Randomly flip an image horizontally (https://www.tensorflow.org/api_docs/python/tf/image/random_flip_left_right)
random_brightness:
Adjust the brightness of an RGB image by a random factor (https://www.tensorflow.org/api_docs/python/tf/image/random_brightness)
random_contrast:
Adjust the contrast of an RGB image by a random factor (https://www.tensorflow.org/api_docs/python/tf/image/random_contrast)
random_saturation:
Adjust the saturation of an RGB image by a random factor (https://www.tensorflow.org/api_docs/python/tf/image/random_saturation)
per_image_normalization:
Linearly scales image to have zero mean and unit norm.
extension:
If None, will load files using `tf.image.decode..` if set to `hdf5`, will load with `bob.io.base.load`
"""
......@@ -132,7 +165,8 @@ def create_dataset_from_path_augmentation(filenames, labels,
random_brightness=random_brightness,
random_contrast=random_contrast,
random_saturation=random_saturation,
per_image_normalization=per_image_normalization)
per_image_normalization=per_image_normalization,
extension=extension)
left_data, right_data, siamese_labels = siamease_pairs_generator(filenames, labels)
dataset = tf.contrib.data.Dataset.from_tensor_slices((left_data, right_data, siamese_labels))
......@@ -147,15 +181,16 @@ def image_augmentation_parser(filename_left, filename_right, label, data_shape,
random_brightness=False,
random_contrast=False,
random_saturation=False,
per_image_normalization=True):
per_image_normalization=True,
extension=None):
"""
Parses a single tf.Example into image and label tensors.
"""
# Convert the image data from string back to the numbers
image_left = tf.cast(tf.image.decode_image(tf.read_file(filename_left)), tf.float32)
image_right = tf.cast(tf.image.decode_image(tf.read_file(filename_right)), tf.float32)
image_left = from_filename_to_tensor(filename_left, extension=extension)
image_right = from_filename_to_tensor(filename_right, extension=extension)
# Reshape image data into the original shape
image_left = tf.reshape(image_left, data_shape)
......
......@@ -4,7 +4,7 @@
import tensorflow as tf
from functools import partial
from . import append_image_augmentation, triplets_random_generator
from . import append_image_augmentation, triplets_random_generator, from_filename_to_tensor
def shuffle_data_and_labels_image_augmentation(filenames, labels, data_shape, data_type,
......@@ -15,7 +15,8 @@ def shuffle_data_and_labels_image_augmentation(filenames, labels, data_shape, da
random_brightness=False,
random_contrast=False,
random_saturation=False,
per_image_normalization=True):
per_image_normalization=True,
extension=None):
"""
Dump random batches for triplee networks from a list of image paths and labels:
......@@ -69,8 +70,10 @@ def shuffle_data_and_labels_image_augmentation(filenames, labels, data_shape, da
Adjust the saturation of an RGB image by a random factor (https://www.tensorflow.org/api_docs/python/tf/image/random_saturation)
per_image_normalization:
Linearly scales image to have zero mean and unit norm.
Linearly scales image to have zero mean and unit norm.
extension:
If None, will load files using `tf.image.decode..` if set to `hdf5`, will load with `bob.io.base.load`
"""
......@@ -82,7 +85,8 @@ def shuffle_data_and_labels_image_augmentation(filenames, labels, data_shape, da
random_brightness=random_brightness,
random_contrast=random_contrast,
random_saturation=random_saturation,
per_image_normalization=per_image_normalization)
per_image_normalization=per_image_normalization,
extension=extension)
dataset = dataset.shuffle(buffer_size).batch(batch_size).repeat(epochs)
#dataset = dataset.batch(buffer_size).batch(batch_size).repeat(epochs)
......@@ -99,7 +103,8 @@ def create_dataset_from_path_augmentation(filenames, labels,
random_brightness=False,
random_contrast=False,
random_saturation=False,
per_image_normalization=True):
per_image_normalization=True,
extension=None):
"""
Create dataset from a list of tf-record files
......@@ -130,7 +135,8 @@ def create_dataset_from_path_augmentation(filenames, labels,
random_brightness=random_brightness,
random_contrast=random_contrast,
random_saturation=random_saturation,
per_image_normalization=per_image_normalization)
per_image_normalization=per_image_normalization,
extension=extension)
anchor_data, positive_data, negative_data = triplets_random_generator(filenames, labels)
......@@ -146,7 +152,8 @@ def image_augmentation_parser(anchor, positive, negative, data_shape, data_type=
random_brightness=False,
random_contrast=False,
random_saturation=False,
per_image_normalization=True):
per_image_normalization=True,
extension=None):
"""
Parses a single tf.Example into image and label tensors.
......@@ -156,7 +163,7 @@ def image_augmentation_parser(anchor, positive, negative, data_shape, data_type=
for n, v in zip(['anchor', 'positive', 'negative'], [anchor, positive, negative]):
# Convert the image data from string back to the numbers
image = tf.cast(tf.image.decode_image(tf.read_file(v)), data_type)
image = from_filename_to_tensor(v, extension=extension)
# Reshape image data into the original shape
image = tf.reshape(image, data_shape)
......
......@@ -7,7 +7,6 @@ import threading
import os
import bob.io.base
import bob.core
from ..analyzers import SoftmaxAnalizer
from tensorflow.core.framework import summary_pb2
import time
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment