Commit 85c6b71d authored by Amir MOHAMMADI's avatar Amir MOHAMMADI
Browse files

nosetests are passing

parent 573b8230
from bob.bio.base import read_original_data
from .generator import Generator
import logging
logger = logging.getLogger(__name__)
class BioGenerator(Generator):
"""A generator class which wraps bob.bio.base databases so that they can
be used with tf.data.Dataset.from_generator
Attributes
----------
biofile_to_label : :obj:`object`, optional
A callable with the signature of ``label = biofile_to_label(biofile)``.
By default -1 is returned as label.
database : :any:`bob.bio.base.database.BioDatabase`
The database that you want to use.
load_data : :obj:`object`, optional
A callable with the signature of
``data = load_data(database, biofile)``.
:any:`bob.bio.base.read_original_data` is wrapped to be used by
default.
biofiles : [:any:`bob.bio.base.database.BioFile`]
The list of the bio files .
keys : [str]
The keys of samples obtained by calling ``biofile.make_path("", "")``
labels : [int]
The labels obtained by calling ``label = biofile_to_label(biofile)``
"""
def __init__(
self,
database,
biofiles,
load_data=None,
biofile_to_label=None,
multiple_samples=False,
**kwargs
):
if load_data is None:
def load_data(database, biofile):
data = read_original_data(
biofile, database.original_directory, database.original_extension
)
return data
if biofile_to_label is None:
def biofile_to_label(biofile):
return -1
self.database = database
self.load_data = load_data
self.biofile_to_label = biofile_to_label
def _reader(f):
label = int(self.biofile_to_label(f))
data = self.load_data(self.database, f)
key = str(f.make_path("", "")).encode("utf-8")
return data, label, key
if multiple_samples:
def reader(f):
data, label, key = _reader(f)
for d in data:
yield (d, label, key)
else:
def reader(f):
return _reader(f)
super(BioGenerator, self).__init__(
biofiles, reader, multiple_samples=multiple_samples, **kwargs
)
@property
def labels(self):
for f in self.biofiles:
yield int(self.biofile_to_label(f))
@property
def keys(self):
for f in self.biofiles:
yield str(f.make_path("", "")).encode("utf-8")
@property
def biofiles(self):
return self.samples
def __len__(self):
return len(self.biofiles)
......@@ -55,8 +55,8 @@ class Generator:
break
# Creating a "fake" dataset just to get the types and shapes
dataset = tf.data.Dataset.from_tensors(dlk)
self._output_types = dataset.output_types
self._output_shapes = dataset.output_shapes
self._output_types = tf.compat.v1.data.get_output_types(dataset)
self._output_shapes = tf.compat.v1.data.get_output_shapes(dataset)
logger.info(
"Initializing a dataset with %d %s and %s types and %s shapes",
......
#!/usr/bin/env python
# vim: set fileencoding=utf-8 :
# @author: Tiago de Freitas Pereira <tiago.pereira@idiap.ch>
import tensorflow as tf
from functools import partial
from . import append_image_augmentation, siamease_pairs_generator, from_filename_to_tensor
def shuffle_data_and_labels_image_augmentation(filenames,
labels,
data_shape,
data_type,
batch_size,
epochs=None,
buffer_size=10**3,
gray_scale=False,
output_shape=None,
random_flip=False,
random_brightness=False,
random_contrast=False,
random_saturation=False,
random_rotate=False,
per_image_normalization=True,
extension=None):
"""
Dump random batches for siamese networks from a list of image paths and labels:
The list of files and labels should be in the same order e.g.
filenames = ['class_1_img1', 'class_1_img2', 'class_2_img1']
labels = [0, 0, 1]
The batches returned with tf.Session.run() with be in the following format:
**data** a dictionary containing the keys ['left', 'right'], each one representing
one element of the pair and **labels** which is [0, 1] where 0 is the genuine pair
and 1 is the impostor pair.
**Parameters**
filenames:
List containing the path of the images
labels:
List containing the labels (needs to be in EXACT same order as filenames)
data_shape:
Samples shape saved in the tf-record
data_type:
tf data type(https://www.tensorflow.org/versions/r0.12/resources/dims_types#data_types)
batch_size:
Size of the batch
epochs:
Number of epochs to be batched
buffer_size:
Size of the shuffle bucket
gray_scale:
Convert to gray scale?
output_shape:
If set, will randomly crop the image given the output shape
random_flip:
Randomly flip an image horizontally (https://www.tensorflow.org/api_docs/python/tf/image/random_flip_left_right)
random_brightness:
Adjust the brightness of an RGB image by a random factor (https://www.tensorflow.org/api_docs/python/tf/image/random_brightness)
random_contrast:
Adjust the contrast of an RGB image by a random factor (https://www.tensorflow.org/api_docs/python/tf/image/random_contrast)
random_saturation:
Adjust the saturation of an RGB image by a random factor (https://www.tensorflow.org/api_docs/python/tf/image/random_saturation)
random_rotate:
Randomly rotate face images between -5 and 5 degrees
per_image_normalization:
Linearly scales image to have zero mean and unit norm.
extension:
If None, will load files using `tf.image.decode..` if set to `hdf5`, will load with `bob.io.base.load`
"""
dataset = create_dataset_from_path_augmentation(
filenames,
labels,
data_shape,
data_type,
gray_scale=gray_scale,
output_shape=output_shape,
random_flip=random_flip,
random_brightness=random_brightness,
random_contrast=random_contrast,
random_saturation=random_saturation,
random_rotate=random_rotate,
per_image_normalization=per_image_normalization,
extension=extension)
dataset = dataset.shuffle(buffer_size).batch(batch_size).repeat(epochs)
data, labels = tf.compat.v1.data.make_one_shot_iterator(dataset).get_next()
return data, labels
def create_dataset_from_path_augmentation(filenames,
labels,
data_shape,
data_type,
gray_scale=False,
output_shape=None,
random_flip=False,
random_brightness=False,
random_contrast=False,
random_saturation=False,
random_rotate=False,
per_image_normalization=True,
extension=None):
"""
Create dataset from a list of tf-record files
**Parameters**
filenames:
List containing the path of the images
labels:
List containing the labels (needs to be in EXACT same order as filenames)
data_shape:
Samples shape saved in the tf-record
data_type:
tf data type(https://www.tensorflow.org/versions/r0.12/resources/dims_types#data_types)
batch_size:
Size of the batch
epochs:
Number of epochs to be batched
buffer_size:
Size of the shuffle bucket
gray_scale:
Convert to gray scale?
output_shape:
If set, will randomly crop the image given the output shape
random_flip:
Randomly flip an image horizontally (https://www.tensorflow.org/api_docs/python/tf/image/random_flip_left_right)
random_brightness:
Adjust the brightness of an RGB image by a random factor (https://www.tensorflow.org/api_docs/python/tf/image/random_brightness)
random_contrast:
Adjust the contrast of an RGB image by a random factor (https://www.tensorflow.org/api_docs/python/tf/image/random_contrast)
random_saturation:
Adjust the saturation of an RGB image by a random factor (https://www.tensorflow.org/api_docs/python/tf/image/random_saturation)
random_rotate:
Randomly rotate face images between -10 and 10 degrees
per_image_normalization:
Linearly scales image to have zero mean and unit norm.
extension:
If None, will load files using `tf.image.decode..` if set to `hdf5`, will load with `bob.io.base.load`
"""
parser = partial(
image_augmentation_parser,
data_shape=data_shape,
data_type=data_type,
gray_scale=gray_scale,
output_shape=output_shape,
random_flip=random_flip,
random_brightness=random_brightness,
random_contrast=random_contrast,
random_saturation=random_saturation,
random_rotate=random_rotate,
per_image_normalization=per_image_normalization,
extension=extension)
left_data, right_data, siamese_labels = siamease_pairs_generator(
filenames, labels)
dataset = tf.data.Dataset.from_tensor_slices((left_data, right_data,
siamese_labels))
dataset = dataset.map(parser)
return dataset
def image_augmentation_parser(filename_left,
filename_right,
label,
data_shape,
data_type,
gray_scale=False,
output_shape=None,
random_flip=False,
random_brightness=False,
random_contrast=False,
random_saturation=False,
random_rotate=False,
per_image_normalization=True,
extension=None):
"""
Parses a single tf.Example into image and label tensors.
"""
# Convert the image data from string back to the numbers
image_left = from_filename_to_tensor(filename_left, extension=extension)
image_right = from_filename_to_tensor(filename_right, extension=extension)
# Reshape image data into the original shape
image_left = tf.reshape(image_left, data_shape)
image_right = tf.reshape(image_right, data_shape)
#Applying image augmentation
image_left = append_image_augmentation(
image_left,
gray_scale=gray_scale,
output_shape=output_shape,
random_flip=random_flip,
random_brightness=random_brightness,
random_contrast=random_contrast,
random_saturation=random_saturation,
random_rotate=random_rotate,
per_image_normalization=per_image_normalization)
image_right = append_image_augmentation(
image_right,
gray_scale=gray_scale,
output_shape=output_shape,
random_flip=random_flip,
random_brightness=random_brightness,
random_contrast=random_contrast,
random_saturation=random_saturation,
random_rotate=random_rotate,
per_image_normalization=per_image_normalization)
image = dict()
image['left'] = image_left
image['right'] = image_right
label = tf.cast(label, tf.int64)
return image, label
......@@ -8,11 +8,10 @@ from functools import partial
import json
import logging
import os
import sys
import tensorflow as tf
from . import append_image_augmentation, DEFAULT_FEATURE
from . import DEFAULT_FEATURE
logger = logging.getLogger(__name__)
......@@ -32,6 +31,8 @@ def normalize_tfrecords_path(output):
def bytes_feature(value):
if isinstance(value, type(tf.constant(0))):
value = value.numpy() # BytesList won't unpack a string from an EagerTensor.
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
......@@ -61,8 +62,8 @@ def dataset_to_tfrecord(dataset, output):
output, json_output = tfrecord_name_and_json_name(output)
# dump the structure so that we can read it back
meta = {
"output_types": repr(dataset.output_types),
"output_shapes": repr(dataset.output_shapes),
"output_types": repr(tf.compat.v1.data.get_output_types(dataset)),
"output_shapes": repr(tf.compat.v1.data.get_output_shapes(dataset)),
}
with open(json_output, "w") as f:
json.dump(meta, f)
......@@ -79,7 +80,7 @@ def dataset_to_tfrecord(dataset, output):
def tf_serialize_example(*args):
args = tf.nest.flatten(args)
args = [tf.io.serialize_tensor(f) for f in args]
tf_string = tf.compat.v1.py_func(serialize_example_pyfunction, args, tf.string)
tf_string = tf.py_function(serialize_example_pyfunction, args, tf.string)
return tf.reshape(tf_string, ()) # The result is a scalar
dataset = dataset.map(tf_serialize_example)
......@@ -107,7 +108,7 @@ def dataset_from_tfrecord(tfrecord, num_parallel_reads=None):
A dataset that contains the data from the TFRecord file.
"""
# these imports are needed so that eval can work
from tensorflow import TensorShape, Dimension
from tensorflow import TensorShape
if isinstance(tfrecord, str):
tfrecord = [tfrecord]
......@@ -131,7 +132,9 @@ def dataset_from_tfrecord(tfrecord, num_parallel_reads=None):
def _parse_function(example_proto):
# Parse the input tf.Example proto using the dictionary above.
args = tf.io.parse_single_example(serialized=example_proto, features=feature_description)
args = tf.io.parse_single_example(
serialized=example_proto, features=feature_description
)
args = tf.nest.flatten(args)
args = [tf.io.parse_tensor(v, t) for v, t in zip(args, output_types)]
args = [tf.reshape(v, s) for v, s in zip(args, output_shapes)]
......@@ -140,527 +143,89 @@ def dataset_from_tfrecord(tfrecord, num_parallel_reads=None):
return raw_dataset.map(_parse_function)
def write_a_sample(writer, data, label, key, feature=None, size_estimate=False):
if feature is None:
feature = {
"data": bytes_feature(data.tostring()),
"label": int64_feature(label),
"key": bytes_feature(key),
}
# def write_a_sample(writer, data, label, key, feature=None, size_estimate=False):
# if feature is None:
# feature = {
# "data": bytes_feature(data.tostring()),
# "label": int64_feature(label),
# "key": bytes_feature(key),
# }
# example = tf.train.Example(features=tf.train.Features(feature=feature))
# example = example.SerializeToString()
# if not size_estimate:
# writer.write(example)
# return sys.getsizeof(example)
# def example_parser(serialized_example, feature, data_shape, data_type):
# """
# Parses a single tf.Example into image and label tensors.
# """
# # Decode the record read by the reader
# features = tf.io.parse_single_example(
# serialized=serialized_example, features=feature
# )
# # Convert the image data from string back to the numbers
# image = tf.io.decode_raw(features["data"], data_type)
# # Cast label data into int64
# label = tf.cast(features["label"], tf.int64)
# # Reshape image data into the original shape
# image = tf.reshape(image, data_shape)
# key = tf.cast(features["key"], tf.string)
# return image, label, key
# def image_augmentation_parser(
# serialized_example,
# feature,
# data_shape,
# data_type,
# gray_scale=False,
# output_shape=None,
# random_flip=False,
# random_brightness=False,
# random_contrast=False,
# random_saturation=False,
# random_rotate=False,
# per_image_normalization=True,
# random_gamma=False,
# random_crop=False,
# ):
# """
# Parses a single tf.Example into image and label tensors.
# """
# # Decode the record read by the reader
# features = tf.io.parse_single_example(
# serialized=serialized_example, features=feature
# )
# # Convert the image data from string back to the numbers
# image = tf.io.decode_raw(features["data"], data_type)
# # Reshape image data into the original shape
# image = tf.reshape(image, data_shape)
# # Applying image augmentation
# image = append_image_augmentation(
# image,
# gray_scale=gray_scale,
# output_shape=output_shape,
# random_flip=random_flip,
# random_brightness=random_brightness,
# random_contrast=random_contrast,
# random_saturation=random_saturation,
# random_rotate=random_rotate,
# per_image_normalization=per_image_normalization,
# random_gamma=random_gamma,
# random_crop=random_crop,
# )
# # Cast label data into int64
# label = tf.cast(features["label"], tf.int64)
# key = tf.cast(features["key"], tf.string)
# return image, label, key
example = tf.train.Example(features=tf.train.Features(feature=feature))
example = example.SerializeToString()
if not size_estimate:
writer.write(example)
return sys.getsizeof(example)
def example_parser(serialized_example, feature, data_shape, data_type):
"""
Parses a single tf.Example into image and label tensors.
"""
# Decode the record read by the reader
features = tf.io.parse_single_example(serialized=serialized_example, features=feature)
# Convert the image data from string back to the numbers
image = tf.io.decode_raw(features["data"], data_type)
# Cast label data into int64
label = tf.cast(features["label"], tf.int64)
# Reshape image data into the original shape
image = tf.reshape(image, data_shape)
key = tf.cast(features["key"], tf.string)
return image, label, key
def image_augmentation_parser(
serialized_example,
feature,
data_shape,
data_type,
gray_scale=False,
output_shape=None,
random_flip=False,
random_brightness=False,
random_contrast=False,
random_saturation=False,
random_rotate=False,
per_image_normalization=True,
random_gamma=False,
random_crop=False,
):
"""
Parses a single tf.Example into image and label tensors.
"""
# Decode the record read by the reader
features = tf.io.parse_single_example(serialized=serialized_example, features=feature)
# Convert the image data from string back to the numbers
image = tf.io.decode_raw(features["data"], data_type)
# Reshape image data into the original shape
image = tf.reshape(image, data_shape)
# Applying image augmentation
image = append_image_augmentation(
image,
gray_scale=gray_scale,
output_shape=output_shape,
random_flip=random_flip,
random_brightness=random_brightness,
random_contrast=random_contrast,
random_saturation=random_saturation,
random_rotate=random_rotate,
per_image_normalization=per_image_normalization,
random_gamma=random_gamma,
random_crop=random_crop,
)
# Cast label data into int64
label = tf.cast(features["label"], tf.int64)
key = tf.cast(features["key"], tf.string)
return image, label, key
def read_and_decode(filename_queue, data_shape, data_type=tf.float32, feature=None):
"""
Simples parse possible for a tfrecord.
It assumes that you have the pair **train/data** and **train/label**
"""
if feature is None:
feature = DEFAULT_FEATURE
# Define a reader and read the next record
reader = tf.compat.v1.TFRecordReader()
_, serialized_example = reader.read(filename_queue)
return example_parser(serialized_example, feature, data_shape, data_type)
def create_dataset_from_records(
tfrecord_filenames, data_shape, data_type, feature=None
):
"""