Commit a102df67 authored by Tiago de Freitas Pereira's avatar Tiago de Freitas Pereira
Browse files

Merge branch 'predict' into 'master'

Changes to the biogenerator

See merge request !33
parents 5fb18ca1 4d08ad93
Pipeline #13923 failed with stages
in 24 minutes and 26 seconds
...@@ -73,7 +73,8 @@ def append_image_augmentation(image, gray_scale=False, ...@@ -73,7 +73,8 @@ def append_image_augmentation(image, gray_scale=False,
if output_shape is not None: if output_shape is not None:
assert len(output_shape) == 2 assert len(output_shape) == 2
image = tf.image.resize_image_with_crop_or_pad(image, output_shape[0], output_shape[1]) image = tf.image.resize_image_with_crop_or_pad(
image, output_shape[0], output_shape[1])
if random_flip: if random_flip:
image = tf.image.random_flip_left_right(image) image = tf.image.random_flip_left_right(image)
...@@ -136,15 +137,18 @@ def triplets_random_generator(input_data, input_labels): ...@@ -136,15 +137,18 @@ def triplets_random_generator(input_data, input_labels):
input_labels = numpy.array(input_labels) input_labels = numpy.array(input_labels)
total_samples = input_data.shape[0] total_samples = input_data.shape[0]
indexes_per_labels = arrange_indexes_by_label(input_labels, possible_labels) indexes_per_labels = arrange_indexes_by_label(
input_labels, possible_labels)
# searching for random triplets # searching for random triplets
offset_class = 0 offset_class = 0
for i in range(total_samples): for i in range(total_samples):
anchor_sample = input_data[indexes_per_labels[possible_labels[offset_class]][numpy.random.randint(len(indexes_per_labels[possible_labels[offset_class]]))], ...] anchor_sample = input_data[indexes_per_labels[possible_labels[offset_class]][numpy.random.randint(
len(indexes_per_labels[possible_labels[offset_class]]))], ...]
positive_sample = input_data[indexes_per_labels[possible_labels[offset_class]][numpy.random.randint(len(indexes_per_labels[possible_labels[offset_class]]))], ...] positive_sample = input_data[indexes_per_labels[possible_labels[offset_class]][numpy.random.randint(
len(indexes_per_labels[possible_labels[offset_class]]))], ...]
# Changing the class # Changing the class
offset_class += 1 offset_class += 1
...@@ -152,10 +156,11 @@ def triplets_random_generator(input_data, input_labels): ...@@ -152,10 +156,11 @@ def triplets_random_generator(input_data, input_labels):
if offset_class == len(possible_labels): if offset_class == len(possible_labels):
offset_class = 0 offset_class = 0
negative_sample = input_data[indexes_per_labels[possible_labels[offset_class]][numpy.random.randint(len(indexes_per_labels[possible_labels[offset_class]]))], ...] negative_sample = input_data[indexes_per_labels[possible_labels[offset_class]][numpy.random.randint(
len(indexes_per_labels[possible_labels[offset_class]]))], ...]
append(str(anchor_sample), str(positive_sample), str(negative_sample)) append(str(anchor_sample), str(positive_sample), str(negative_sample))
#yield anchor, positive, negative # yield anchor, positive, negative
return anchor, positive, negative return anchor, positive, negative
...@@ -191,13 +196,16 @@ def siamease_pairs_generator(input_data, input_labels): ...@@ -191,13 +196,16 @@ def siamease_pairs_generator(input_data, input_labels):
# Filtering the samples by label and shuffling all the indexes # Filtering the samples by label and shuffling all the indexes
#indexes_per_labels = dict() #indexes_per_labels = dict()
#for l in possible_labels: # for l in possible_labels:
# indexes_per_labels[l] = numpy.where(input_labels == l)[0] # indexes_per_labels[l] = numpy.where(input_labels == l)[0]
# numpy.random.shuffle(indexes_per_labels[l]) # numpy.random.shuffle(indexes_per_labels[l])
indexes_per_labels = arrange_indexes_by_label(input_labels, possible_labels) indexes_per_labels = arrange_indexes_by_label(
input_labels, possible_labels)
left_possible_indexes = numpy.random.choice(possible_labels, total_samples, replace=True) left_possible_indexes = numpy.random.choice(
right_possible_indexes = numpy.random.choice(possible_labels, total_samples, replace=True) possible_labels, total_samples, replace=True)
right_possible_indexes = numpy.random.choice(
possible_labels, total_samples, replace=True)
genuine = True genuine = True
for i in range(total_samples): for i in range(total_samples):
...@@ -207,10 +215,12 @@ def siamease_pairs_generator(input_data, input_labels): ...@@ -207,10 +215,12 @@ def siamease_pairs_generator(input_data, input_labels):
class_index = left_possible_indexes[i] class_index = left_possible_indexes[i]
# Now selecting the samples for the pair # Now selecting the samples for the pair
left = input_data[indexes_per_labels[class_index][numpy.random.randint(len(indexes_per_labels[class_index]))]] left = input_data[indexes_per_labels[class_index][numpy.random.randint(
right = input_data[indexes_per_labels[class_index][numpy.random.randint(len(indexes_per_labels[class_index]))]] len(indexes_per_labels[class_index]))]]
right = input_data[indexes_per_labels[class_index][numpy.random.randint(
len(indexes_per_labels[class_index]))]]
append(left, right, 0) append(left, right, 0)
#yield left, right, 0 # yield left, right, 0
else: else:
# Selecting the 2 classes # Selecting the 2 classes
class_index = list() class_index = list()
...@@ -219,7 +229,7 @@ def siamease_pairs_generator(input_data, input_labels): ...@@ -219,7 +229,7 @@ def siamease_pairs_generator(input_data, input_labels):
# Finding the right pair # Finding the right pair
j = i j = i
# TODO: Lame solution. Fix this # TODO: Lame solution. Fix this
while j < total_samples: # Here is an unidiretinal search for the negative pair while j < total_samples: # Here is an unidiretinal search for the negative pair
if left_possible_indexes[i] != right_possible_indexes[j]: if left_possible_indexes[i] != right_possible_indexes[j]:
class_index.append(right_possible_indexes[j]) class_index.append(right_possible_indexes[j])
break break
...@@ -227,11 +237,12 @@ def siamease_pairs_generator(input_data, input_labels): ...@@ -227,11 +237,12 @@ def siamease_pairs_generator(input_data, input_labels):
if j < total_samples: if j < total_samples:
# Now selecting the samples for the pair # Now selecting the samples for the pair
left = input_data[indexes_per_labels[class_index[0]][numpy.random.randint(len(indexes_per_labels[class_index[0]]))]] left = input_data[indexes_per_labels[class_index[0]][numpy.random.randint(
right = input_data[indexes_per_labels[class_index[1]][numpy.random.randint(len(indexes_per_labels[class_index[1]]))]] len(indexes_per_labels[class_index[0]]))]]
right = input_data[indexes_per_labels[class_index[1]][numpy.random.randint(
len(indexes_per_labels[class_index[1]]))]]
append(left, right, 1) append(left, right, 1)
genuine = not genuine genuine = not genuine
return left_data, right_data, labels return left_data, right_data, labels
...@@ -296,3 +307,30 @@ def tf_repeat(tensor, repeats): ...@@ -296,3 +307,30 @@ def tf_repeat(tensor, repeats):
tiled_tensor = tf.tile(expanded_tensor, multiples=multiples) tiled_tensor = tf.tile(expanded_tensor, multiples=multiples)
repeated_tesnor = tf.reshape(tiled_tensor, tf.shape(tensor) * repeats) repeated_tesnor = tf.reshape(tiled_tensor, tf.shape(tensor) * repeats)
return repeated_tesnor return repeated_tesnor
def all_patches(image, label, key, size):
"""Extracts all patches of an image
Parameters
----------
image
The image should be channels_last format and already batched.
label
The label for the image
key
The key for the image
size : (int, int)
The height and width of the blocks.
Returns
-------
(blocks, label, key)
The non-overlapping blocks of size from image and labels and keys are
repeated.
"""
blocks, n_blocks = blocks_tensorflow(image, size)
# duplicate label and key as n_blocks
label = tf_repeat(label, [n_blocks])
key = tf_repeat(key, [n_blocks])
return blocks, label, key
import six import six
import tensorflow as tf import tensorflow as tf
from bob.bio.base import read_original_data from bob.bio.base import read_original_data
import logging
logger = logging.getLogger(__name__)
def bio_generator(database, biofiles, load_data=None, biofile_to_label=None,
multiple_samples=False, repeat=False):
"""Returns a generator and its output types and shapes based on
bob.bio.base databases.
Parameters class BioGenerator(object):
"""A generator class which wraps bob.bio.base databases so that they can
be used with tf.data.Dataset.from_generator
Attributes
---------- ----------
database : :any:`bob.bio.base.database.BioDatabase` biofile_to_label : :obj:`object`, optional
The database that you want to use. A callable with the signature of ``label = biofile_to_label(biofile)``.
By default -1 is returned as label.
biofiles : [:any:`bob.bio.base.database.BioFile`] biofiles : [:any:`bob.bio.base.database.BioFile`]
The list of the bio files . The list of the bio files .
database : :any:`bob.bio.base.database.BioDatabase`
The database that you want to use.
epoch : int
The number of epochs that have been passed so far.
keys : [str]
The keys of samples obtained by calling ``biofile.make_path("", "")``
labels : [int]
The labels obtained by calling ``label = biofile_to_label(biofile)``
load_data : :obj:`object`, optional load_data : :obj:`object`, optional
A callable with the signature of A callable with the signature of
``data = load_data(database, biofile)``. ``data = load_data(database, biofile)``.
:any:`bob.bio.base.read_original_data` is used by default. :any:`bob.bio.base.read_original_data` is wrapped to be used by
biofile_to_label : :obj:`object`, optional default.
A callable with the signature of ``label = biofile_to_label(biofile)``.
By default -1 is returned as label.
multiple_samples : bool, optional multiple_samples : bool, optional
If true, it assumes that the bio database's samples actually contain If true, it assumes that the bio database's samples actually contain
multiple samples. This is useful for when you want to treat video multiple samples. This is useful for when you want to for example treat
databases as image databases. video databases as image databases.
repeat : bool, optional
If True, the samples are repeated forever.
Returns
-------
generator : object
A generator function that when called will return the samples. The
samples will be like ``(data, label, key)``.
output_types : (object, object, object) output_types : (object, object, object)
The types of the returned samples. The types of the returned samples.
output_shapes : (tf.TensorShape, tf.TensorShape, tf.TensorShape) output_shapes : (tf.TensorShape, tf.TensorShape, tf.TensorShape)
The shapes of the returned samples. The shapes of the returned samples.
""" """
if load_data is None:
def load_data(database, biofile):
data = read_original_data(
biofile,
database.original_directory,
database.original_extension)
return data
if biofile_to_label is None:
def biofile_to_label(biofile):
return -1
labels = (biofile_to_label(f) for f in biofiles)
keys = (str(f.make_path("", "")) for f in biofiles)
def generator(): def __init__(self, database, biofiles, load_data=None,
while True: biofile_to_label=None, multiple_samples=False):
for f, label, key in six.moves.zip(biofiles, labels, keys): if load_data is None:
data = load_data(database, f) def load_data(database, biofile):
# labels data = read_original_data(
if multiple_samples: biofile,
for d in data: database.original_directory,
yield (d, label, key) database.original_extension)
else: return data
yield (data, label, key) if biofile_to_label is None:
if not repeat: def biofile_to_label(biofile):
break return -1
self.database = database
self.biofiles = list(biofiles)
self.load_data = load_data
self.biofile_to_label = biofile_to_label
self.multiple_samples = multiple_samples
self.epoch = 0
# load one data to get its type and shape
data = load_data(database, biofiles[0])
if multiple_samples:
try:
data = data[0]
except TypeError:
# if the data is a generator
data = six.next(data)
data = tf.convert_to_tensor(data)
self._output_types = (data.dtype, tf.int64, tf.string)
self._output_shapes = (
data.shape, tf.TensorShape([]), tf.TensorShape([]))
logger.info("Initializing a dataset with %d files and %s types "
"and %s shapes", len(self.biofiles), self.output_types,
self.output_shapes)
@property
def labels(self):
for f in self.biofiles:
yield self.biofile_to_label(f)
@property
def keys(self):
for f in self.biofiles:
yield str(f.make_path("", "")).encode('utf-8')
@property
def output_types(self):
return self._output_types
@property
def output_shapes(self):
return self._output_shapes
# load one data to get its type and shape def __call__(self):
data = load_data(database, biofiles[0]) """A generator function that when called will return the samples.
if multiple_samples:
try:
data = data[0]
except TypeError:
# if the data is a generator
data = six.next(data)
data = tf.convert_to_tensor(data)
output_types = (data.dtype, tf.int64, tf.string)
output_shapes = (data.shape, tf.TensorShape([]), tf.TensorShape([]))
return (generator, output_types, output_shapes) Yields
------
(data, label, key) : tuple
A tuple containing the data, label, and the key.
"""
for f, label, key in six.moves.zip(
self.biofiles, self.labels, self.keys):
data = self.load_data(self.database, f)
# labels
if self.multiple_samples:
for d in data:
yield (d, label, key)
else:
yield (data, label, key)
self.epoch += 1
logger.info("Elapsed %d epochs", self.epoch)
...@@ -3,14 +3,6 @@ ...@@ -3,14 +3,6 @@
# @author: Tiago de Freitas Pereira <tiago.pereira@idiap.ch> # @author: Tiago de Freitas Pereira <tiago.pereira@idiap.ch>
import tensorflow as tf import tensorflow as tf
import threading
import os
import bob.io.base
import bob.core
from tensorflow.core.framework import summary_pb2
import time
#logger = bob.core.log.setup("bob.learn.tensorflow")
from bob.learn.tensorflow.network.utils import append_logits from bob.learn.tensorflow.network.utils import append_logits
from tensorflow.python.estimator import estimator from tensorflow.python.estimator import estimator
from bob.learn.tensorflow.utils import predict_using_tensors from bob.learn.tensorflow.utils import predict_using_tensors
...@@ -28,102 +20,88 @@ class Logits(estimator.Estimator): ...@@ -28,102 +20,88 @@ class Logits(estimator.Estimator):
The **architecture** function should follow the following pattern: The **architecture** function should follow the following pattern:
def my_beautiful_function(placeholder): def my_beautiful_architecture(placeholder, **kwargs):
end_points = dict() end_points = dict()
graph = convXX(placeholder) graph = convXX(placeholder)
end_points['conv'] = graph end_points['conv'] = graph
.... ....
return graph, end_points return graph, end_points
The **loss** function should follow the following pattern: The **loss** function should follow the following pattern:
def my_beautiful_loss(logits, labels): def my_beautiful_loss(logits, labels, **kwargs):
return loss_set_of_ops(logits, labels) return loss_set_of_ops(logits, labels)
Variables, scopes... from other models can be loaded by the model_fn.
For that, please, wrap the the path of the OTHER checkpoint and the list
of variables in a dictionary with the key "load_variable_from_checkpoint" an provide them to the keyword `params`:
{"load_variable_from_checkpoint": {"checkpoint_path":"mypath",
"scopes":{"my_scope/": my_scope/}}}
Parameters
----------
**Parameters**
architecture: architecture:
Pointer to a function that builds the graph. Pointer to a function that builds the graph.
optimizer: optimizer:
One of the tensorflow solvers (https://www.tensorflow.org/api_guides/python/train) One of the tensorflow solvers
(https://www.tensorflow.org/api_guides/python/train)
- tf.train.GradientDescentOptimizer - tf.train.GradientDescentOptimizer
- tf.train.AdagradOptimizer - tf.train.AdagradOptimizer
- .... - ....
config: config:
n_classes: n_classes:
Number of classes of your problem. The logits will be appended in this class Number of classes of your problem. The logits will be appended in this
class
loss_op: loss_op:
Pointer to a function that computes the loss. Pointer to a function that computes the loss.
embedding_validation: embedding_validation:
Run the validation using embeddings?? [default: False] Run the validation using embeddings?? [default: False]
model_dir: model_dir:
Model path Model path
validation_batch_size: validation_batch_size:
Size of the batch for validation. This value is used when the Size of the batch for validation. This value is used when the
validation with embeddings is used. This is a hack. validation with embeddings is used. This is a hack.
params: params:
Extra params for the model function (please see https://www.tensorflow.org/extend/estimators for more info) Extra params for the model function (please see
https://www.tensorflow.org/extend/estimators for more info)
extra_checkpoint: dict() extra_checkpoint: dict
In case you want to use other model to initialize some variables. In case you want to use other model to initialize some variables.
This argument should be in the following format This argument should be in the following format
extra_checkpoint = {"checkpoint_path": <YOUR_CHECKPOINT>, extra_checkpoint = {
"scopes": dict({"<SOURCE_SCOPE>/": "<TARGET_SCOPE>/"}), "checkpoint_path": <YOUR_CHECKPOINT>,
"is_trainable": <IF_THOSE_LOADED_VARIABLES_ARE_TRAINABLE> "scopes": dict({"<SOURCE_SCOPE>/": "<TARGET_SCOPE>/"}),
} "is_trainable": <IF_THOSE_LOADED_VARIABLES_ARE_TRAINABLE>
}
""" """
def __init__(self, def __init__(self,
architecture=None, architecture,
optimizer=None, optimizer,
loss_op,
n_classes,
config=None, config=None,
n_classes=0,
loss_op=None,
embedding_validation=False, embedding_validation=False,
model_dir="", model_dir="",
validation_batch_size=None, validation_batch_size=None,
params=None, params=None,
extra_checkpoint=None extra_checkpoint=None
): ):
self.architecture = architecture self.architecture = architecture
self.optimizer=optimizer self.optimizer = optimizer
self.n_classes=n_classes self.n_classes = n_classes
self.loss_op=loss_op self.loss_op = loss_op
self.loss = None self.loss = None
self.embedding_validation = embedding_validation self.embedding_validation = embedding_validation
self.extra_checkpoint = extra_checkpoint self.extra_checkpoint = extra_checkpoint
if self.architecture is None:
raise ValueError("Please specify a function to build the architecture !!")
if self.optimizer is None:
raise ValueError("Please specify a optimizer (https://www.tensorflow.org/api_guides/python/train) !!")
if self.loss_op is None:
raise ValueError("Please specify a function to build the loss !!")
if self.n_classes <= 0:
raise ValueError("Number of classes must be greated than 0")
def _model_fn(features, labels, mode, params, config): def _model_fn(features, labels, mode, params, config):
check_features(features) check_features(features)
...@@ -164,28 +142,36 @@ class Logits(estimator.Estimator): ...@@ -164,28 +142,36 @@ class Logits(estimator.Estimator):
# Compute the embeddings # Compute the embeddings
embeddings = tf.nn.l2_normalize(prelogits, 1) embeddings = tf.nn.l2_normalize(prelogits, 1)
predictions = { predictions = {
"embeddings": embeddings "embeddings": embeddings,
"key": key,
} }
else: else:
probabilities = tf.nn.softmax(logits, name="softmax_tensor")
predictions = { predictions = {
# Generate predictions (for PREDICT and EVAL mode) # Generate predictions (for PREDICT and EVAL mode)
"classes": tf.argmax(input=logits, axis=1), "classes": tf.argmax(input=logits, axis=1),
# Add `softmax_tensor` to the graph. It is used for PREDICT and by the # Add `softmax_tensor` to the graph. It is used for PREDICT
# `logging_hook`. # and by the `logging_hook`.
"probabilities": tf.nn.softmax(logits, name="softmax_tensor") "probabilities": probabilities,
"key": key,
} }
if mode == tf.estimator.ModeKeys.PREDICT: if mode == tf.estimator.ModeKeys.PREDICT:
return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions) return tf.estimator.EstimatorSpec(mode=mode,
predictions=predictions)
# IF Validation # IF Validation
self.loss = self.loss_op(logits, labels) self.loss = self.loss_op(logits, labels)
if self.embedding_validation: if self.embedding_validation:
predictions_op = predict_using_tensors(predictions["embeddings"], labels, num=validation_batch_size) predictions_op = predict_using_tensors(
eval_metric_ops = {"accuracy": tf