Implemented a normalizer

parent e42a6d27
...@@ -7,6 +7,7 @@ import numpy ...@@ -7,6 +7,7 @@ import numpy
import tensorflow as tf import tensorflow as tf
import bob.ip.base import bob.ip.base
import numpy import numpy
from bob.learn.tensorflow.datashuffler.Normalizer import Linear
class Base(object): class Base(object):
...@@ -16,7 +17,8 @@ class Base(object): ...@@ -16,7 +17,8 @@ class Base(object):
scale=True, scale=True,
batch_size=1, batch_size=1,
seed=10, seed=10,
data_augmentation=None): data_augmentation=None,
normalizer=Linear()):
""" """
The class provide base functionalities to shuffle the data before to train a neural network The class provide base functionalities to shuffle the data before to train a neural network
...@@ -33,7 +35,7 @@ class Base(object): ...@@ -33,7 +35,7 @@ class Base(object):
numpy.random.seed(seed) numpy.random.seed(seed)
self.scale = scale self.scale = scale
self.scale_value = 0.00390625 self.normalizer = normalizer
self.input_dtype = input_dtype self.input_dtype = input_dtype
# TODO: Check if the bacth size is higher than the input data # TODO: Check if the bacth size is higher than the input data
...@@ -153,10 +155,4 @@ class Base(object): ...@@ -153,10 +155,4 @@ class Base(object):
For the time being I'm only scaling from 0-1 For the time being I'm only scaling from 0-1
""" """
if self.scale: return self.normalizer(x)
return x * self.scale_value
return x
...@@ -11,6 +11,7 @@ import bob.core ...@@ -11,6 +11,7 @@ import bob.core
from .Base import Base from .Base import Base
logger = bob.core.log.setup("bob.learn.tensorflow") logger = bob.core.log.setup("bob.learn.tensorflow")
from bob.learn.tensorflow.datashuffler.Normalizer import Linear
class Disk(Base): class Disk(Base):
...@@ -20,7 +21,8 @@ class Disk(Base): ...@@ -20,7 +21,8 @@ class Disk(Base):
scale=True, scale=True,
batch_size=1, batch_size=1,
seed=10, seed=10,
data_augmentation=None): data_augmentation=None,
normalizer=Linear()):
""" """
This datashuffler deal with databases that are stored in the disk. This datashuffler deal with databases that are stored in the disk.
The data is loaded on the fly,. The data is loaded on the fly,.
...@@ -48,7 +50,8 @@ class Disk(Base): ...@@ -48,7 +50,8 @@ class Disk(Base):
scale=scale, scale=scale,
batch_size=batch_size, batch_size=batch_size,
seed=seed, seed=seed,
data_augmentation=data_augmentation data_augmentation=data_augmentation,
normalizer=normalizer
) )
# Seting the seed # Seting the seed
numpy.random.seed(seed) numpy.random.seed(seed)
...@@ -92,8 +95,7 @@ class Disk(Base): ...@@ -92,8 +95,7 @@ class Disk(Base):
selected_data[i, ...] = data selected_data[i, ...] = data
# Scaling # Scaling
if self.scale: selected_data[i, ...] = self.normalize_sample(selected_data[i, ...])
selected_data[i, ...] *= self.scale_value
selected_labels = self.labels[indexes[0:self.batch_size]] selected_labels = self.labels[indexes[0:self.batch_size]]
......
...@@ -5,6 +5,7 @@ ...@@ -5,6 +5,7 @@
import numpy import numpy
from .Base import Base from .Base import Base
from bob.learn.tensorflow.datashuffler.Normalizer import Linear
import tensorflow as tf import tensorflow as tf
...@@ -16,7 +17,8 @@ class Memory(Base): ...@@ -16,7 +17,8 @@ class Memory(Base):
scale=True, scale=True,
batch_size=1, batch_size=1,
seed=10, seed=10,
data_augmentation=None): data_augmentation=None,
normalizer=Linear()):
""" """
This datashuffler deal with databases that are stored in a :py:class`numpy.array` This datashuffler deal with databases that are stored in a :py:class`numpy.array`
...@@ -37,7 +39,8 @@ class Memory(Base): ...@@ -37,7 +39,8 @@ class Memory(Base):
scale=scale, scale=scale,
batch_size=batch_size, batch_size=batch_size,
seed=seed, seed=seed,
data_augmentation=data_augmentation data_augmentation=data_augmentation,
normalizer=normalizer
) )
# Seting the seed # Seting the seed
numpy.random.seed(seed) numpy.random.seed(seed)
......
#!/usr/bin/env python
# vim: set fileencoding=utf-8 :
# @author: Tiago de Freitas Pereira <tiago.pereira@idiap.ch>
# @date: Mon 07 Nov 2016 09:39:36 CET
class ScaleFactor(object):
"""
Normalize a sample by a scale factor
"""
def __init__(self, scale_factor=0.00390625):
self.scale_factor = scale_factor
def __call__(self, x):
return x * self.scale_factor
class MeanOffset(object):
"""
Normalize a sample by a mean offset
"""
def __init__(self, mean_offset):
self.mean_offset = mean_offset
def __call__(self, x):
for i in range(len(self.mean_offset)):
x[:, i, :, :] = x[:, i, :, :] - self.mean_offset[i]
return x
class Linear(object):
def __init__(self):
pass
def __call__(self, x):
return x
...@@ -10,6 +10,8 @@ logger = bob.core.log.setup("bob.learn.tensorflow") ...@@ -10,6 +10,8 @@ logger = bob.core.log.setup("bob.learn.tensorflow")
from .Disk import Disk from .Disk import Disk
from .Siamese import Siamese from .Siamese import Siamese
from bob.learn.tensorflow.datashuffler.Normalizer import Linear
class SiameseDisk(Siamese, Disk): class SiameseDisk(Siamese, Disk):
def __init__(self, data, labels, def __init__(self, data, labels,
...@@ -18,7 +20,8 @@ class SiameseDisk(Siamese, Disk): ...@@ -18,7 +20,8 @@ class SiameseDisk(Siamese, Disk):
scale=True, scale=True,
batch_size=1, batch_size=1,
seed=10, seed=10,
data_augmentation=None): data_augmentation=None,
normalizer=Linear()):
""" """
Shuffler that deal with file list Shuffler that deal with file list
...@@ -45,7 +48,8 @@ class SiameseDisk(Siamese, Disk): ...@@ -45,7 +48,8 @@ class SiameseDisk(Siamese, Disk):
scale=scale, scale=scale,
batch_size=batch_size, batch_size=batch_size,
seed=seed, seed=seed,
data_augmentation=data_augmentation data_augmentation=data_augmentation,
normalizer=normalizer
) )
# Seting the seed # Seting the seed
numpy.random.seed(seed) numpy.random.seed(seed)
......
...@@ -8,7 +8,7 @@ import numpy ...@@ -8,7 +8,7 @@ import numpy
from .Memory import Memory from .Memory import Memory
from .Siamese import Siamese from .Siamese import Siamese
import tensorflow as tf import tensorflow as tf
from bob.learn.tensorflow.datashuffler.Normalizer import Linear
class SiameseMemory(Siamese, Memory): class SiameseMemory(Siamese, Memory):
...@@ -18,7 +18,8 @@ class SiameseMemory(Siamese, Memory): ...@@ -18,7 +18,8 @@ class SiameseMemory(Siamese, Memory):
scale=True, scale=True,
batch_size=1, batch_size=1,
seed=10, seed=10,
data_augmentation=None): data_augmentation=None,
normalizer=Linear()):
""" """
Shuffler that deal with memory datasets Shuffler that deal with memory datasets
...@@ -39,7 +40,8 @@ class SiameseMemory(Siamese, Memory): ...@@ -39,7 +40,8 @@ class SiameseMemory(Siamese, Memory):
scale=scale, scale=scale,
batch_size=batch_size, batch_size=batch_size,
seed=seed, seed=seed,
data_augmentation=data_augmentation data_augmentation=data_augmentation,
normalizer=normalizer
) )
# Seting the seed # Seting the seed
numpy.random.seed(seed) numpy.random.seed(seed)
......
...@@ -14,7 +14,7 @@ import tensorflow as tf ...@@ -14,7 +14,7 @@ import tensorflow as tf
from .Disk import Disk from .Disk import Disk
from .Triplet import Triplet from .Triplet import Triplet
from bob.learn.tensorflow.datashuffler.Normalizer import Linear
class TripletDisk(Triplet, Disk): class TripletDisk(Triplet, Disk):
def __init__(self, data, labels, def __init__(self, data, labels,
...@@ -23,7 +23,8 @@ class TripletDisk(Triplet, Disk): ...@@ -23,7 +23,8 @@ class TripletDisk(Triplet, Disk):
scale=True, scale=True,
batch_size=1, batch_size=1,
seed=10, seed=10,
data_augmentation=None): data_augmentation=None,
normalizer=Linear()):
""" """
Shuffler that deal with file list Shuffler that deal with file list
...@@ -49,7 +50,8 @@ class TripletDisk(Triplet, Disk): ...@@ -49,7 +50,8 @@ class TripletDisk(Triplet, Disk):
input_dtype=input_dtype, input_dtype=input_dtype,
scale=scale, scale=scale,
batch_size=batch_size, batch_size=batch_size,
data_augmentation=data_augmentation data_augmentation=data_augmentation,
normalizer=normalizer
) )
# Seting the seed # Seting the seed
numpy.random.seed(seed) numpy.random.seed(seed)
......
...@@ -8,7 +8,7 @@ import tensorflow as tf ...@@ -8,7 +8,7 @@ import tensorflow as tf
from .Memory import Memory from .Memory import Memory
from Triplet import Triplet from Triplet import Triplet
from bob.learn.tensorflow.datashuffler.Normalizer import Linear
class TripletMemory(Triplet, Memory): class TripletMemory(Triplet, Memory):
...@@ -18,7 +18,8 @@ class TripletMemory(Triplet, Memory): ...@@ -18,7 +18,8 @@ class TripletMemory(Triplet, Memory):
scale=True, scale=True,
batch_size=1, batch_size=1,
seed=10, seed=10,
data_augmentation=None): data_augmentation=None,
normalizer=Linear()):
""" """
Shuffler that deal with memory datasets Shuffler that deal with memory datasets
...@@ -39,7 +40,8 @@ class TripletMemory(Triplet, Memory): ...@@ -39,7 +40,8 @@ class TripletMemory(Triplet, Memory):
scale=scale, scale=scale,
batch_size=batch_size, batch_size=batch_size,
seed=seed, seed=seed,
data_augmentation=data_augmentation data_augmentation=data_augmentation,
normalizer=normalizer
) )
# Seting the seed # Seting the seed
numpy.random.seed(seed) numpy.random.seed(seed)
......
...@@ -13,7 +13,7 @@ from scipy.spatial.distance import euclidean, cdist ...@@ -13,7 +13,7 @@ from scipy.spatial.distance import euclidean, cdist
import logging import logging
logger = logging.getLogger("bob.learn.tensorflow") logger = logging.getLogger("bob.learn.tensorflow")
from bob.learn.tensorflow.datashuffler.Normalizer import Linear
class TripletWithFastSelectionDisk(Triplet, Disk, OnLineSampling): class TripletWithFastSelectionDisk(Triplet, Disk, OnLineSampling):
""" """
...@@ -50,7 +50,8 @@ class TripletWithFastSelectionDisk(Triplet, Disk, OnLineSampling): ...@@ -50,7 +50,8 @@ class TripletWithFastSelectionDisk(Triplet, Disk, OnLineSampling):
batch_size=1, batch_size=1,
seed=10, seed=10,
data_augmentation=None, data_augmentation=None,
total_identities=10): total_identities=10,
normalizer=Linear()):
super(TripletWithFastSelectionDisk, self).__init__( super(TripletWithFastSelectionDisk, self).__init__(
data=data, data=data,
...@@ -60,7 +61,8 @@ class TripletWithFastSelectionDisk(Triplet, Disk, OnLineSampling): ...@@ -60,7 +61,8 @@ class TripletWithFastSelectionDisk(Triplet, Disk, OnLineSampling):
scale=scale, scale=scale,
batch_size=batch_size, batch_size=batch_size,
seed=seed, seed=seed,
data_augmentation=data_augmentation data_augmentation=data_augmentation,
normalizer=normalizer
) )
self.clear_variables() self.clear_variables()
# Seting the seed # Seting the seed
......
...@@ -10,10 +10,11 @@ from .Disk import Disk ...@@ -10,10 +10,11 @@ from .Disk import Disk
from .Triplet import Triplet from .Triplet import Triplet
from .OnlineSampling import OnLineSampling from .OnlineSampling import OnLineSampling
from scipy.spatial.distance import euclidean from scipy.spatial.distance import euclidean
from bob.learn.tensorflow.datashuffler.Normalizer import Linear
import logging import logging
logger = logging.getLogger("bob.learn.tensorflow") logger = logging.getLogger("bob.learn.tensorflow")
from bob.learn.tensorflow.datashuffler.Normalizer import Linear
class TripletWithSelectionDisk(Triplet, Disk, OnLineSampling): class TripletWithSelectionDisk(Triplet, Disk, OnLineSampling):
""" """
...@@ -51,7 +52,8 @@ class TripletWithSelectionDisk(Triplet, Disk, OnLineSampling): ...@@ -51,7 +52,8 @@ class TripletWithSelectionDisk(Triplet, Disk, OnLineSampling):
batch_size=1, batch_size=1,
seed=10, seed=10,
data_augmentation=None, data_augmentation=None,
total_identities=10): total_identities=10,
normalizer=Linear()):
super(TripletWithSelectionDisk, self).__init__( super(TripletWithSelectionDisk, self).__init__(
data=data, data=data,
...@@ -61,7 +63,8 @@ class TripletWithSelectionDisk(Triplet, Disk, OnLineSampling): ...@@ -61,7 +63,8 @@ class TripletWithSelectionDisk(Triplet, Disk, OnLineSampling):
scale=scale, scale=scale,
batch_size=batch_size, batch_size=batch_size,
seed=seed, seed=seed,
data_augmentation=data_augmentation data_augmentation=data_augmentation,
normalizer=normalizer
) )
self.clear_variables() self.clear_variables()
# Seting the seed # Seting the seed
......
...@@ -10,6 +10,7 @@ from .Memory import Memory ...@@ -10,6 +10,7 @@ from .Memory import Memory
from .Triplet import Triplet from .Triplet import Triplet
from .OnlineSampling import OnLineSampling from .OnlineSampling import OnLineSampling
from scipy.spatial.distance import euclidean from scipy.spatial.distance import euclidean
from bob.learn.tensorflow.datashuffler.Normalizer import Linear
class TripletWithSelectionMemory(Triplet, Memory, OnLineSampling): class TripletWithSelectionMemory(Triplet, Memory, OnLineSampling):
...@@ -48,7 +49,8 @@ class TripletWithSelectionMemory(Triplet, Memory, OnLineSampling): ...@@ -48,7 +49,8 @@ class TripletWithSelectionMemory(Triplet, Memory, OnLineSampling):
batch_size=1, batch_size=1,
seed=10, seed=10,
data_augmentation=None, data_augmentation=None,
total_identities=10): total_identities=10,
normalizer=Linear()):
super(TripletWithSelectionMemory, self).__init__( super(TripletWithSelectionMemory, self).__init__(
data=data, data=data,
...@@ -58,7 +60,8 @@ class TripletWithSelectionMemory(Triplet, Memory, OnLineSampling): ...@@ -58,7 +60,8 @@ class TripletWithSelectionMemory(Triplet, Memory, OnLineSampling):
scale=scale, scale=scale,
batch_size=batch_size, batch_size=batch_size,
seed=seed, seed=seed,
data_augmentation=data_augmentation data_augmentation=data_augmentation,
normalizer=normalizer
) )
self.clear_variables() self.clear_variables()
# Seting the seed # Seting the seed
...@@ -141,10 +144,9 @@ class TripletWithSelectionMemory(Triplet, Memory, OnLineSampling): ...@@ -141,10 +144,9 @@ class TripletWithSelectionMemory(Triplet, Memory, OnLineSampling):
data_n[i, ...] = d data_n[i, ...] = d
# Scaling # Scaling
if self.scale: data_a = self.normalize_sample(data_a)
data_a *= self.scale_value data_p = self.normalize_sample(data_p)
data_p *= self.scale_value data_n = self.normalize_sample(data_n)
data_n *= self.scale_value
return data_a, data_p, data_n return data_a, data_p, data_n
......
...@@ -19,5 +19,7 @@ from .TripletWithSelectionDisk import TripletWithSelectionDisk ...@@ -19,5 +19,7 @@ from .TripletWithSelectionDisk import TripletWithSelectionDisk
from .DataAugmentation import DataAugmentation from .DataAugmentation import DataAugmentation
from .ImageAugmentation import ImageAugmentation from .ImageAugmentation import ImageAugmentation
from .Normalizer import ScaleFactor, MeanOffset, Linear
# gets sphinx autodoc done right - don't remove it # gets sphinx autodoc done right - don't remove it
__all__ = [_ for _ in dir() if not _.startswith('_')] __all__ = [_ for _ in dir() if not _.startswith('_')]
...@@ -40,17 +40,18 @@ class Xavier(Initialization): ...@@ -40,17 +40,18 @@ class Xavier(Initialization):
initializer = tf.truncated_normal(shape, stddev=stddev, seed=self.seed) initializer = tf.truncated_normal(shape, stddev=stddev, seed=self.seed)
reuse = self.variable_exist(scope) reuse = self.variable_exist(scope)
""" self.use_gpu = False
with tf.variable_scope(scope, reuse=reuse): with tf.variable_scope(scope, reuse=reuse):
if self.use_gpu: if self.use_gpu:
with tf.device("/gpu:0"): with tf.device("/gpu:0"):
return tf.get_variable(name, initializer=initializer, dtype=tf.float32) return tf.get_variable(name, initializer=initializer, dtype=tf.float32)
else: else:
with tf.device("/cpu"): with tf.device("/cpu:0"):
#return tf.get_variable(name, initializer=initializer, dtype=tf.float32)
return tf.get_variable(name, initializer=initializer, dtype=tf.float32) return tf.get_variable(name, initializer=initializer, dtype=tf.float32)
"""
"""
try: try:
with tf.variable_scope(scope): with tf.variable_scope(scope):
if self.use_gpu: if self.use_gpu:
...@@ -68,3 +69,4 @@ class Xavier(Initialization): ...@@ -68,3 +69,4 @@ class Xavier(Initialization):
with tf.device("/cpu"): with tf.device("/cpu"):
return tf.get_variable(name, initializer=initializer, dtype=tf.float32) return tf.get_variable(name, initializer=initializer, dtype=tf.float32)
"""
\ No newline at end of file
...@@ -61,14 +61,23 @@ class Conv2D(Layer): ...@@ -61,14 +61,23 @@ class Conv2D(Layer):
n_channels = input_layer.get_shape().as_list()[3] n_channels = input_layer.get_shape().as_list()[3]
if self.W is None: if self.W is None:
self.W = self.weights_initialization(shape=[self.kernel_size, self.kernel_size, n_channels, self.filters],
name="w_" + str(self.name), variable = "w_" + str(self.name)
scope="w_" + str(self.name) if self.get_varible_by_name(variable) is not None:
) self.W = self.get_varible_by_name(variable)
else:
self.b = self.bias_initialization(shape=[self.filters], self.W = self.weights_initialization(shape=[self.kernel_size, self.kernel_size, n_channels, self.filters],
name="b_" + str(self.name) + "bias", name=variable,
scope="b_" + str(self.name)) scope=variable
)
variable = "b_" + str(self.name) + "bias"
if self.get_varible_by_name(variable) is not None:
self.b = self.get_varible_by_name(variable)
else:
self.b = self.bias_initialization(shape=[self.filters],
name=variable,
scope="b_" + str(self.name))
def get_graph(self, training_phase=True): def get_graph(self, training_phase=True):
......
...@@ -53,15 +53,24 @@ class FullyConnected(Layer): ...@@ -53,15 +53,24 @@ class FullyConnected(Layer):
self.input_layer = input_layer self.input_layer = input_layer
if self.W is None: if self.W is None:
input_dim = reduce(mul, self.input_layer.get_shape().as_list()[1:]) input_dim = reduce(mul, self.input_layer.get_shape().as_list()[1:])
self.W = self.weights_initialization(shape=[input_dim, self.output_dim],
name="W_" + str(self.name), variable = "W_" + str(self.name)
scope="W_" +str(self.name) if self.get_varible_by_name(variable) is not None:
) self.W = self.get_varible_by_name(variable)
else:
self.W = self.weights_initialization(shape=[input_dim, self.output_dim],
name="W_" + str(self.name),
scope="W_" +str(self.name)
)
# if self.activation is not None: # if self.activation is not None:
self.b = self.bias_initialization(shape=[self.output_dim], variable = "b_" + str(self.name)
name="b_" + str(self.name), if self.get_varible_by_name(variable) is not None:
scope="b_" + str(self.name) self.b = self.get_varible_by_name(variable)
) else:
self.b = self.bias_initialization(shape=[self.output_dim],
name="b_" + str(self.name),
scope="b_" + str(self.name)
)
def get_graph(self, training_phase=True): def get_graph(self, training_phase=True):
......
...@@ -110,3 +110,14 @@ class Layer(object): ...@@ -110,3 +110,14 @@ class Layer(object):
normed = tf.nn.batch_normalization(x, mean, var, self.beta, self.gamma, 1e-3) normed = tf.nn.batch_normalization(x, mean, var, self.beta, self.gamma, 1e-3)
return normed return normed
def get_varible_by_name(self, var):
"""
Doing this because of that https://github.com/tensorflow/tensorflow/issues/1325
"""
for v in tf.all_variables():
if (len(v.name.split("/")) > 1) and (var in v.name.split("/")[1]):
return v
return None
...@@ -303,8 +303,9 @@ class SequenceNetwork(six.with_metaclass(abc.ABCMeta, object)): ...@@ -303,8 +303,9 @@ class SequenceNetwork(six.with_metaclass(abc.ABCMeta, object)):
open(path+"_sequence_net.pickle", 'w').write(self.pickle_architecture) open(path+"_sequence_net.pickle", 'w').write(self.pickle_architecture)
return saver.save(session, path) return saver.save(session, path)
def load(self, session, path): def load(self, session, path, clear_devices=False):
self.sequence_net = pickle.loads(open(path+"_sequence_net.pickle").read()) self.sequence_net = pickle.loads(open(path+"_sequence_net.pickle").read())
#saver = tf.train.import_meta_graph(path + ".meta", clear_devices=clear_devices)
saver = tf.train.import_meta_graph(path + ".meta") saver = tf.train.import_meta_graph(path + ".meta")
saver.restore(session, path) saver.restore(session, path)
self.inference_graph = tf.get_collection("inference_graph")[0] self.inference_graph = tf.get_collection("inference_graph")[0]
......
...@@ -62,7 +62,6 @@ class VGG16_mod(SequenceNetwork): ...@@ -62,7 +62,6 @@ class VGG16_mod(SequenceNetwork):
conv5_3_output=512, conv5_3_output=512,
fc6_output=4096, fc6_output=4096,
fc7_output=4096,
n_classes=10, n_classes=10,
default_feature_layer="fc8", default_feature_layer="fc8",
...@@ -132,7 +131,6 @@ class VGG16_mod(SequenceNetwork): ...@@ -132,7 +131,6 @@ class VGG16_mod(SequenceNetwork):
self.conv5_3_output = conv5_3_output self.conv5_3_output = conv5_3_output