Commit 3a91b4ee authored by Tiago de Freitas Pereira's avatar Tiago de Freitas Pereira
Browse files

Implemented a normalizer

parent e42a6d27
......@@ -7,6 +7,7 @@ import numpy
import tensorflow as tf
import bob.ip.base
import numpy
from bob.learn.tensorflow.datashuffler.Normalizer import Linear
class Base(object):
......@@ -16,7 +17,8 @@ class Base(object):
scale=True,
batch_size=1,
seed=10,
data_augmentation=None):
data_augmentation=None,
normalizer=Linear()):
"""
The class provide base functionalities to shuffle the data before to train a neural network
......@@ -33,7 +35,7 @@ class Base(object):
numpy.random.seed(seed)
self.scale = scale
self.scale_value = 0.00390625
self.normalizer = normalizer
self.input_dtype = input_dtype
# TODO: Check if the bacth size is higher than the input data
......@@ -153,10 +155,4 @@ class Base(object):
For the time being I'm only scaling from 0-1
"""
if self.scale:
return x * self.scale_value
return x
return self.normalizer(x)
......@@ -11,6 +11,7 @@ import bob.core
from .Base import Base
logger = bob.core.log.setup("bob.learn.tensorflow")
from bob.learn.tensorflow.datashuffler.Normalizer import Linear
class Disk(Base):
......@@ -20,7 +21,8 @@ class Disk(Base):
scale=True,
batch_size=1,
seed=10,
data_augmentation=None):
data_augmentation=None,
normalizer=Linear()):
"""
This datashuffler deal with databases that are stored in the disk.
The data is loaded on the fly,.
......@@ -48,7 +50,8 @@ class Disk(Base):
scale=scale,
batch_size=batch_size,
seed=seed,
data_augmentation=data_augmentation
data_augmentation=data_augmentation,
normalizer=normalizer
)
# Seting the seed
numpy.random.seed(seed)
......@@ -92,8 +95,7 @@ class Disk(Base):
selected_data[i, ...] = data
# Scaling
if self.scale:
selected_data[i, ...] *= self.scale_value
selected_data[i, ...] = self.normalize_sample(selected_data[i, ...])
selected_labels = self.labels[indexes[0:self.batch_size]]
......
......@@ -5,6 +5,7 @@
import numpy
from .Base import Base
from bob.learn.tensorflow.datashuffler.Normalizer import Linear
import tensorflow as tf
......@@ -16,7 +17,8 @@ class Memory(Base):
scale=True,
batch_size=1,
seed=10,
data_augmentation=None):
data_augmentation=None,
normalizer=Linear()):
"""
This datashuffler deal with databases that are stored in a :py:class`numpy.array`
......@@ -37,7 +39,8 @@ class Memory(Base):
scale=scale,
batch_size=batch_size,
seed=seed,
data_augmentation=data_augmentation
data_augmentation=data_augmentation,
normalizer=normalizer
)
# Seting the seed
numpy.random.seed(seed)
......
#!/usr/bin/env python
# vim: set fileencoding=utf-8 :
# @author: Tiago de Freitas Pereira <tiago.pereira@idiap.ch>
# @date: Mon 07 Nov 2016 09:39:36 CET
class ScaleFactor(object):
"""
Normalize a sample by a scale factor
"""
def __init__(self, scale_factor=0.00390625):
self.scale_factor = scale_factor
def __call__(self, x):
return x * self.scale_factor
class MeanOffset(object):
"""
Normalize a sample by a mean offset
"""
def __init__(self, mean_offset):
self.mean_offset = mean_offset
def __call__(self, x):
for i in range(len(self.mean_offset)):
x[:, i, :, :] = x[:, i, :, :] - self.mean_offset[i]
return x
class Linear(object):
def __init__(self):
pass
def __call__(self, x):
return x
......@@ -10,6 +10,8 @@ logger = bob.core.log.setup("bob.learn.tensorflow")
from .Disk import Disk
from .Siamese import Siamese
from bob.learn.tensorflow.datashuffler.Normalizer import Linear
class SiameseDisk(Siamese, Disk):
def __init__(self, data, labels,
......@@ -18,7 +20,8 @@ class SiameseDisk(Siamese, Disk):
scale=True,
batch_size=1,
seed=10,
data_augmentation=None):
data_augmentation=None,
normalizer=Linear()):
"""
Shuffler that deal with file list
......@@ -45,7 +48,8 @@ class SiameseDisk(Siamese, Disk):
scale=scale,
batch_size=batch_size,
seed=seed,
data_augmentation=data_augmentation
data_augmentation=data_augmentation,
normalizer=normalizer
)
# Seting the seed
numpy.random.seed(seed)
......
......@@ -8,7 +8,7 @@ import numpy
from .Memory import Memory
from .Siamese import Siamese
import tensorflow as tf
from bob.learn.tensorflow.datashuffler.Normalizer import Linear
class SiameseMemory(Siamese, Memory):
......@@ -18,7 +18,8 @@ class SiameseMemory(Siamese, Memory):
scale=True,
batch_size=1,
seed=10,
data_augmentation=None):
data_augmentation=None,
normalizer=Linear()):
"""
Shuffler that deal with memory datasets
......@@ -39,7 +40,8 @@ class SiameseMemory(Siamese, Memory):
scale=scale,
batch_size=batch_size,
seed=seed,
data_augmentation=data_augmentation
data_augmentation=data_augmentation,
normalizer=normalizer
)
# Seting the seed
numpy.random.seed(seed)
......
......@@ -14,7 +14,7 @@ import tensorflow as tf
from .Disk import Disk
from .Triplet import Triplet
from bob.learn.tensorflow.datashuffler.Normalizer import Linear
class TripletDisk(Triplet, Disk):
def __init__(self, data, labels,
......@@ -23,7 +23,8 @@ class TripletDisk(Triplet, Disk):
scale=True,
batch_size=1,
seed=10,
data_augmentation=None):
data_augmentation=None,
normalizer=Linear()):
"""
Shuffler that deal with file list
......@@ -49,7 +50,8 @@ class TripletDisk(Triplet, Disk):
input_dtype=input_dtype,
scale=scale,
batch_size=batch_size,
data_augmentation=data_augmentation
data_augmentation=data_augmentation,
normalizer=normalizer
)
# Seting the seed
numpy.random.seed(seed)
......
......@@ -8,7 +8,7 @@ import tensorflow as tf
from .Memory import Memory
from Triplet import Triplet
from bob.learn.tensorflow.datashuffler.Normalizer import Linear
class TripletMemory(Triplet, Memory):
......@@ -18,7 +18,8 @@ class TripletMemory(Triplet, Memory):
scale=True,
batch_size=1,
seed=10,
data_augmentation=None):
data_augmentation=None,
normalizer=Linear()):
"""
Shuffler that deal with memory datasets
......@@ -39,7 +40,8 @@ class TripletMemory(Triplet, Memory):
scale=scale,
batch_size=batch_size,
seed=seed,
data_augmentation=data_augmentation
data_augmentation=data_augmentation,
normalizer=normalizer
)
# Seting the seed
numpy.random.seed(seed)
......
......@@ -13,7 +13,7 @@ from scipy.spatial.distance import euclidean, cdist
import logging
logger = logging.getLogger("bob.learn.tensorflow")
from bob.learn.tensorflow.datashuffler.Normalizer import Linear
class TripletWithFastSelectionDisk(Triplet, Disk, OnLineSampling):
"""
......@@ -50,7 +50,8 @@ class TripletWithFastSelectionDisk(Triplet, Disk, OnLineSampling):
batch_size=1,
seed=10,
data_augmentation=None,
total_identities=10):
total_identities=10,
normalizer=Linear()):
super(TripletWithFastSelectionDisk, self).__init__(
data=data,
......@@ -60,7 +61,8 @@ class TripletWithFastSelectionDisk(Triplet, Disk, OnLineSampling):
scale=scale,
batch_size=batch_size,
seed=seed,
data_augmentation=data_augmentation
data_augmentation=data_augmentation,
normalizer=normalizer
)
self.clear_variables()
# Seting the seed
......
......@@ -10,10 +10,11 @@ from .Disk import Disk
from .Triplet import Triplet
from .OnlineSampling import OnLineSampling
from scipy.spatial.distance import euclidean
from bob.learn.tensorflow.datashuffler.Normalizer import Linear
import logging
logger = logging.getLogger("bob.learn.tensorflow")
from bob.learn.tensorflow.datashuffler.Normalizer import Linear
class TripletWithSelectionDisk(Triplet, Disk, OnLineSampling):
"""
......@@ -51,7 +52,8 @@ class TripletWithSelectionDisk(Triplet, Disk, OnLineSampling):
batch_size=1,
seed=10,
data_augmentation=None,
total_identities=10):
total_identities=10,
normalizer=Linear()):
super(TripletWithSelectionDisk, self).__init__(
data=data,
......@@ -61,7 +63,8 @@ class TripletWithSelectionDisk(Triplet, Disk, OnLineSampling):
scale=scale,
batch_size=batch_size,
seed=seed,
data_augmentation=data_augmentation
data_augmentation=data_augmentation,
normalizer=normalizer
)
self.clear_variables()
# Seting the seed
......
......@@ -10,6 +10,7 @@ from .Memory import Memory
from .Triplet import Triplet
from .OnlineSampling import OnLineSampling
from scipy.spatial.distance import euclidean
from bob.learn.tensorflow.datashuffler.Normalizer import Linear
class TripletWithSelectionMemory(Triplet, Memory, OnLineSampling):
......@@ -48,7 +49,8 @@ class TripletWithSelectionMemory(Triplet, Memory, OnLineSampling):
batch_size=1,
seed=10,
data_augmentation=None,
total_identities=10):
total_identities=10,
normalizer=Linear()):
super(TripletWithSelectionMemory, self).__init__(
data=data,
......@@ -58,7 +60,8 @@ class TripletWithSelectionMemory(Triplet, Memory, OnLineSampling):
scale=scale,
batch_size=batch_size,
seed=seed,
data_augmentation=data_augmentation
data_augmentation=data_augmentation,
normalizer=normalizer
)
self.clear_variables()
# Seting the seed
......@@ -141,10 +144,9 @@ class TripletWithSelectionMemory(Triplet, Memory, OnLineSampling):
data_n[i, ...] = d
# Scaling
if self.scale:
data_a *= self.scale_value
data_p *= self.scale_value
data_n *= self.scale_value
data_a = self.normalize_sample(data_a)
data_p = self.normalize_sample(data_p)
data_n = self.normalize_sample(data_n)
return data_a, data_p, data_n
......
......@@ -19,5 +19,7 @@ from .TripletWithSelectionDisk import TripletWithSelectionDisk
from .DataAugmentation import DataAugmentation
from .ImageAugmentation import ImageAugmentation
from .Normalizer import ScaleFactor, MeanOffset, Linear
# gets sphinx autodoc done right - don't remove it
__all__ = [_ for _ in dir() if not _.startswith('_')]
......@@ -40,17 +40,18 @@ class Xavier(Initialization):
initializer = tf.truncated_normal(shape, stddev=stddev, seed=self.seed)
reuse = self.variable_exist(scope)
"""
self.use_gpu = False
with tf.variable_scope(scope, reuse=reuse):
if self.use_gpu:
with tf.device("/gpu:0"):
return tf.get_variable(name, initializer=initializer, dtype=tf.float32)
else:
with tf.device("/cpu"):
with tf.device("/cpu:0"):
#return tf.get_variable(name, initializer=initializer, dtype=tf.float32)
return tf.get_variable(name, initializer=initializer, dtype=tf.float32)
"""
"""
try:
with tf.variable_scope(scope):
if self.use_gpu:
......@@ -68,3 +69,4 @@ class Xavier(Initialization):
with tf.device("/cpu"):
return tf.get_variable(name, initializer=initializer, dtype=tf.float32)
"""
\ No newline at end of file
......@@ -61,13 +61,22 @@ class Conv2D(Layer):
n_channels = input_layer.get_shape().as_list()[3]
if self.W is None:
variable = "w_" + str(self.name)
if self.get_varible_by_name(variable) is not None:
self.W = self.get_varible_by_name(variable)
else:
self.W = self.weights_initialization(shape=[self.kernel_size, self.kernel_size, n_channels, self.filters],
name="w_" + str(self.name),
scope="w_" + str(self.name)
name=variable,
scope=variable
)
variable = "b_" + str(self.name) + "bias"
if self.get_varible_by_name(variable) is not None:
self.b = self.get_varible_by_name(variable)
else:
self.b = self.bias_initialization(shape=[self.filters],
name="b_" + str(self.name) + "bias",
name=variable,
scope="b_" + str(self.name))
def get_graph(self, training_phase=True):
......
......@@ -53,11 +53,20 @@ class FullyConnected(Layer):
self.input_layer = input_layer
if self.W is None:
input_dim = reduce(mul, self.input_layer.get_shape().as_list()[1:])
variable = "W_" + str(self.name)
if self.get_varible_by_name(variable) is not None:
self.W = self.get_varible_by_name(variable)
else:
self.W = self.weights_initialization(shape=[input_dim, self.output_dim],
name="W_" + str(self.name),
scope="W_" +str(self.name)
)
# if self.activation is not None:
variable = "b_" + str(self.name)
if self.get_varible_by_name(variable) is not None:
self.b = self.get_varible_by_name(variable)
else:
self.b = self.bias_initialization(shape=[self.output_dim],
name="b_" + str(self.name),
scope="b_" + str(self.name)
......
......@@ -110,3 +110,14 @@ class Layer(object):
normed = tf.nn.batch_normalization(x, mean, var, self.beta, self.gamma, 1e-3)
return normed
def get_varible_by_name(self, var):
"""
Doing this because of that https://github.com/tensorflow/tensorflow/issues/1325
"""
for v in tf.all_variables():
if (len(v.name.split("/")) > 1) and (var in v.name.split("/")[1]):
return v
return None
......@@ -303,8 +303,9 @@ class SequenceNetwork(six.with_metaclass(abc.ABCMeta, object)):
open(path+"_sequence_net.pickle", 'w').write(self.pickle_architecture)
return saver.save(session, path)
def load(self, session, path):
def load(self, session, path, clear_devices=False):
self.sequence_net = pickle.loads(open(path+"_sequence_net.pickle").read())
#saver = tf.train.import_meta_graph(path + ".meta", clear_devices=clear_devices)
saver = tf.train.import_meta_graph(path + ".meta")
saver.restore(session, path)
self.inference_graph = tf.get_collection("inference_graph")[0]
......
......@@ -62,7 +62,6 @@ class VGG16_mod(SequenceNetwork):
conv5_3_output=512,
fc6_output=4096,
fc7_output=4096,
n_classes=10,
default_feature_layer="fc8",
......@@ -132,7 +131,6 @@ class VGG16_mod(SequenceNetwork):
self.conv5_3_output = conv5_3_output
self.fc6_output = fc6_output
self.fc7_output = fc7_output
self.n_classes = n_classes
# First convolutional
......@@ -236,12 +234,6 @@ class VGG16_mod(SequenceNetwork):
))
self.add(AveragePooling(name="pooling5", strides=[1, 2, 2, 1]))
self.add(FullyConnected(name="fc6", output_dim=fc6_output,
activation=tf.nn.relu,
weights_initialization=Xavier(seed=seed, use_gpu=self.use_gpu),
bias_initialization=Constant(use_gpu=self.use_gpu)
))
self.add(Dropout(name="dropout", keep_prob=0.5))
self.add(FullyConnected(name="fc8", output_dim=n_classes,
activation=None,
weights_initialization=Xavier(seed=seed, use_gpu=self.use_gpu),
......
......@@ -22,7 +22,7 @@ Some unit tests for the datashuffler
batch_size = 16
validation_batch_size = 400
iterations = 50
iterations = 200
seed = 10
......@@ -39,7 +39,7 @@ def test_dnn_trainer():
directory = "./temp/dnn"
# Preparing the architecture
architecture = MLP(10, hidden_layers=[15, 20])
architecture = MLP(10, hidden_layers=[20, 40])
# Loss for the softmax
loss = BaseLoss(tf.nn.sparse_softmax_cross_entropy_with_logits, tf.reduce_mean)
......
......@@ -74,6 +74,7 @@ setup(
# scripts should be declared using this entry:
'console_scripts': [
'compute_statistics.py = bob.learn.tensorflow.script.compute_statistics:main'
],
},
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment