Commit 055b4354 authored by Tiago Pereira's avatar Tiago Pereira
Browse files

Updating Triplet network

parent b8b71630
Pipeline #8654 failed with stages
in 8 minutes and 50 seconds
......@@ -75,7 +75,7 @@ class Disk(Base):
numpy.random.seed(seed)
# TODO: very bad solution to deal with bob.shape images an tf shape images
self.bob_shape = tuple([input_shape[2]] + list(input_shape[0:2]))
self.bob_shape = tuple([input_shape[3]] + list(input_shape[1:3]))
def load_from_file(self, file_name):
d = bob.io.base.load(file_name)
......@@ -111,11 +111,13 @@ class Disk(Base):
Correspondent labels
"""
shape = [self.batch_size] + list(self.input_shape[1:])
# Shuffling samples
indexes = numpy.array(range(self.data.shape[0]))
numpy.random.shuffle(indexes)
selected_data = numpy.zeros(shape=self.shape)
selected_data = numpy.zeros(shape=shape)
for i in range(self.batch_size):
file_name = self.data[indexes[i]]
......
......@@ -73,7 +73,7 @@ class SiameseDisk(Siamese, Disk):
numpy.random.seed(seed)
# TODO: very bad solution to deal with bob.shape images an tf shape images
self.bob_shape = tuple([input_shape[2]] + list(input_shape[0:2]))
self.bob_shape = tuple([input_shape[3]] + list(input_shape[1:3]))
def get_batch(self):
"""
......@@ -84,13 +84,14 @@ class SiameseDisk(Siamese, Disk):
**Return**
"""
shape = [self.batch_size] + list(self.input_shape[1:])
sample_l = numpy.zeros(shape=self.shape, dtype='float32')
sample_r = numpy.zeros(shape=self.shape, dtype='float32')
labels_siamese = numpy.zeros(shape=self.shape[0], dtype='float32')
sample_l = numpy.zeros(shape=shape, dtype='float32')
sample_r = numpy.zeros(shape=shape, dtype='float32')
labels_siamese = numpy.zeros(shape=shape[0], dtype='float32')
genuine = True
for i in range(self.shape[0]):
for i in range(shape[0]):
file_name, file_name_p = self.get_genuine_or_not(self.data, self.labels, genuine=genuine)
sample_l[i, ...] = self.normalize_sample(self.load_from_file(str(file_name)))
sample_r[i, ...] = self.normalize_sample(self.load_from_file(str(file_name_p)))
......
......@@ -20,44 +20,23 @@ class Triplet(Base):
def __init__(self, **kwargs):
super(Triplet, self).__init__(**kwargs)
self.data2_placeholder = None
self.data3_placeholder = None
def set_placeholders(self, data, data2, data3):
self.data_placeholder = data
self.data2_placeholder = data2
self.data3_placeholder = data3
def get_placeholders(self, name=""):
def create_placeholders(self):
"""
Returns a place holder with the size of your batch
"""
if self.data_placeholder is None:
self.data_placeholder = tf.placeholder(tf.float32, shape=self.shape, name=name+"_anchor")
if self.data2_placeholder is None:
self.data2_placeholder = tf.placeholder(tf.float32, shape=self.shape, name=name+"_positive")
Create place holder instances
if self.data3_placeholder is None:
self.data3_placeholder = tf.placeholder(tf.float32, shape=self.shape, name=name+"_negative")
return [self.data_placeholder, self.data2_placeholder, self.data3_placeholder]
def get_placeholders_forprefetch(self, name=""):
:return:
"""
Returns a place holder with the size of your batch
"""
if self.data_placeholder is None:
self.data_placeholder = tf.placeholder(tf.float32, shape=tuple([None] + list(self.shape[1:])), name=name)
if self.data2_placeholder is None:
self.data2_placeholder = tf.placeholder(tf.float32, shape=tuple([None] + list(self.shape[1:])), name=name)
with tf.name_scope("Input"):
self.data_ph = {}
self.data_ph['anchor'] = tf.placeholder(tf.float32, shape=self.input_shape, name="anchor")
self.data_ph['positive'] = tf.placeholder(tf.float32, shape=self.input_shape, name="positive")
self.data_ph['negative'] = tf.placeholder(tf.float32, shape=self.input_shape, name="negative")
if self.data3_placeholder is None:
self.data3_placeholder = tf.placeholder(tf.float32, shape=tuple([None] + list(self.shape[1:])), name=name)
# If prefetch, setup the queue to feed data
if self.prefetch:
raise ValueError("There is no prefetch for siamease networks")
return [self.data_placeholder, self.data2_placeholder, self.data3_placeholder]
def get_one_triplet(self, input_data, input_labels):
# Getting a pair of clients
......
......@@ -77,7 +77,7 @@ class TripletDisk(Triplet, Disk):
numpy.random.seed(seed)
# TODO: very bad solution to deal with bob.shape images an tf shape images
self.bob_shape = tuple([input_shape[2]] + list(input_shape[0:2]))
self.bob_shape = tuple([input_shape[3]] + list(input_shape[1:3]))
def get_batch(self):
"""
......@@ -89,11 +89,13 @@ class TripletDisk(Triplet, Disk):
**Return**
"""
sample_a = numpy.zeros(shape=self.shape, dtype='float32')
sample_p = numpy.zeros(shape=self.shape, dtype='float32')
sample_n = numpy.zeros(shape=self.shape, dtype='float32')
shape = [self.batch_size] + list(self.input_shape[1:])
for i in range(self.shape[0]):
sample_a = numpy.zeros(shape=shape, dtype='float32')
sample_p = numpy.zeros(shape=shape, dtype='float32')
sample_n = numpy.zeros(shape=shape, dtype='float32')
for i in range(shape[0]):
file_name_a, file_name_p, file_name_n = self.get_one_triplet(self.data, self.labels)
sample_a[i, ...] = self.normalize_sample(self.load_from_file(str(file_name_a)))
sample_p[i, ...] = self.normalize_sample(self.load_from_file(str(file_name_p)))
......
......@@ -45,8 +45,8 @@ class TripletMemory(Triplet, Memory):
"""
def __init__(self, data, labels,
input_shape,
input_dtype="float64",
input_shape=[None, 28, 28, 1],
input_dtype="float32",
batch_size=1,
seed=10,
data_augmentation=None,
......@@ -77,11 +77,13 @@ class TripletMemory(Triplet, Memory):
**Return**
"""
sample_a = numpy.zeros(shape=self.shape, dtype='float32')
sample_p = numpy.zeros(shape=self.shape, dtype='float32')
sample_n = numpy.zeros(shape=self.shape, dtype='float32')
shape = [self.batch_size] + list(self.input_shape[1:])
for i in range(self.shape[0]):
sample_a = numpy.zeros(shape=shape, dtype='float32')
sample_p = numpy.zeros(shape=shape, dtype='float32')
sample_n = numpy.zeros(shape=shape, dtype='float32')
for i in range(shape[0]):
sample_a[i, ...], sample_p[i, ...], sample_n[i, ...] = self.get_one_triplet(self.data, self.labels)
# Applying the data augmentation
......
......@@ -99,11 +99,13 @@ class TripletWithFastSelectionDisk(Triplet, Disk, OnlineSampling):
**Return**
"""
sample_a = numpy.zeros(shape=self.shape, dtype='float32')
sample_p = numpy.zeros(shape=self.shape, dtype='float32')
sample_n = numpy.zeros(shape=self.shape, dtype='float32')
shape = [self.batch_size] + list(self.input_shape[1:])
for i in range(self.shape[0]):
sample_a = numpy.zeros(shape=shape, dtype='float32')
sample_p = numpy.zeros(shape=shape, dtype='float32')
sample_n = numpy.zeros(shape=shape, dtype='float32')
for i in range(shape[0]):
file_name_a, file_name_p, file_name_n = self.get_one_triplet(self.data, self.labels)
sample_a[i, ...] = self.normalize_sample(self.load_from_file(str(file_name_a)))
sample_p[i, ...] = self.normalize_sample(self.load_from_file(str(file_name_p)))
......
......@@ -86,11 +86,13 @@ class TripletWithSelectionDisk(Triplet, Disk, OnlineSampling):
**Return**
"""
sample_a = numpy.zeros(shape=self.shape, dtype='float32')
sample_p = numpy.zeros(shape=self.shape, dtype='float32')
sample_n = numpy.zeros(shape=self.shape, dtype='float32')
shape = [self.batch_size] + list(self.input_shape[1:])
for i in range(self.shape[0]):
sample_a = numpy.zeros(shape=shape, dtype='float32')
sample_p = numpy.zeros(shape=shape, dtype='float32')
sample_n = numpy.zeros(shape=shape, dtype='float32')
for i in range(shape[0]):
file_name_a, file_name_p, file_name_n = self.get_one_triplet(self.data, self.labels)
sample_a[i, ...] = self.normalize_sample(self.load_from_file(str(file_name_a)))
sample_p[i, ...] = self.normalize_sample(self.load_from_file(str(file_name_p)))
......
......@@ -99,6 +99,8 @@ class TripletWithSelectionMemory(Triplet, Memory, OnlineSampling):
**Return**
"""
shape = [self.batch_size] + list(self.input_shape[1:])
# Selecting the classes used in the selection
indexes = numpy.random.choice(len(self.possible_labels), self.total_identities, replace=False)
samples_per_identity = numpy.ceil(self.batch_size/float(self.total_identities))
......@@ -108,10 +110,10 @@ class TripletWithSelectionMemory(Triplet, Memory, OnlineSampling):
anchor_labels = numpy.hstack((anchor_labels,numpy.ones(samples_per_identity) * self.possible_labels[indexes[i]]))
anchor_labels = anchor_labels[0:self.batch_size]
samples_a = numpy.zeros(shape=self.shape, dtype='float32')
samples_a = numpy.zeros(shape=shape, dtype='float32')
# Computing the embedding
for i in range(self.shape[0]):
for i in range(shape[0]):
samples_a[i, ...] = self.get_anchor(anchor_labels[i])
embedding_a = self.project(samples_a)
......@@ -136,7 +138,9 @@ class TripletWithSelectionMemory(Triplet, Memory, OnlineSampling):
"""
Get the a random set of positive pairs
"""
samples_p = numpy.zeros(shape=self.shape, dtype='float32')
shape = [self.batch_size] + list(self.input_shape[1:])
samples_p = numpy.zeros(shape=shape, dtype='float32')
for i in range(self.shape[0]):
l = anchor_labels[i]
indexes = numpy.where(self.labels == l)[0]
......@@ -147,7 +151,7 @@ class TripletWithSelectionMemory(Triplet, Memory, OnlineSampling):
# Computing the distances
d_anchor_positive = []
for i in range(self.shape[0]):
for i in range(shape[0]):
d_anchor_positive.append(euclidean(embedding_a[i, :], embedding_p[i, :]))
return samples_p, embedding_p, d_anchor_positive
......@@ -157,6 +161,8 @@ class TripletWithSelectionMemory(Triplet, Memory, OnlineSampling):
Get the the semi-hard negative
"""
shape = [self.batch_size] + list(self.input_shape[1:])
# Shuffling all the dataset
indexes = range(len(self.labels))
numpy.random.shuffle(indexes)
......@@ -167,9 +173,8 @@ class TripletWithSelectionMemory(Triplet, Memory, OnlineSampling):
indexes = indexes[0:negative_samples_search]
# Loading samples for the semi-hard search
shape = tuple([len(indexes)] + list(self.shape[1:]))
temp_samples_n = numpy.zeros(shape=shape, dtype='float32')
samples_n = numpy.zeros(shape=self.shape, dtype='float32')
samples_n = numpy.zeros(shape=shape, dtype='float32')
for i in range(shape[0]):
temp_samples_n[i, ...] = self.normalize_sample(self.data[indexes[i], ...])
......@@ -180,7 +185,7 @@ class TripletWithSelectionMemory(Triplet, Memory, OnlineSampling):
d_anchor_negative = cdist(embedding_a, embedding_temp_n, metric='euclidean')
# Selecting the negative samples
for i in range(self.shape[0]):
for i in range(shape[0]):
label = anchor_labels[i]
possible_candidates = [d if d > d_anchor_positive[i] else numpy.inf for d in d_anchor_negative[i]]
......
......@@ -178,11 +178,13 @@ def test_tripletcnn_trainer():
# Creating datashufflers
train_data_shuffler = TripletMemory(train_data, train_labels,
input_shape=[28, 28, 1],
batch_size=batch_size)
input_shape=[None, 28, 28, 1],
batch_size=batch_size,
normalizer=ScaleFactor())
validation_data_shuffler = TripletMemory(validation_data, validation_labels,
input_shape=[28, 28, 1],
batch_size=validation_batch_size)
input_shape=[None, 28, 28, 1],
batch_size=validation_batch_size,
normalizer=ScaleFactor())
directory = "./temp/tripletcnn"
......@@ -192,24 +194,27 @@ def test_tripletcnn_trainer():
# Loss for the Siamese
loss = TripletLoss(margin=4.)
input_pl = train_data_shuffler("data")
graph = {}
graph['anchor'] = architecture(input_pl['anchor'])
graph['positive'] = architecture(input_pl['positive'])
graph['negative'] = architecture(input_pl['negative'])
# One graph trainer
trainer = TripletTrainer(architecture=architecture,
loss=loss,
trainer = TripletTrainer(train_data_shuffler,
iterations=iterations,
prefetch=False,
analizer=None,
learning_rate=constant(0.05, name="triplet_lr"),
optimizer=tf.train.AdamOptimizer(name="adam_triplet"),
temp_dir=directory
)
trainer.create_network_from_scratch(graph=graph,
loss=loss,
learning_rate=constant(0.01, name="regular_lr"),
optimizer=tf.train.GradientDescentOptimizer(0.01),)
trainer.train(train_data_shuffler)
# Testing
eer = dummy_experiment(validation_data_shuffler, architecture)
# At least 80% of accuracy
assert eer < 0.25
embedding = Embedding(train_data_shuffler("data", from_queue=False)['anchor'], graph['anchor'])
eer = dummy_experiment(validation_data_shuffler, embedding)
assert eer < 0.15
shutil.rmtree(directory)
del architecture
......
......@@ -44,131 +44,96 @@ def test_memory_shuffler():
train_data, train_labels, validation_data, validation_labels = load_mnist()
train_data = numpy.reshape(train_data, (train_data.shape[0], 28, 28, 1))
batch_shape = [16, 28, 28, 1]
shape = [None, 28, 28, 1]
data_shuffler = Memory(train_data, train_labels,
input_shape=batch_shape[1:],
batch_size=batch_shape[0])
input_shape=shape,
batch_size=16)
batch = data_shuffler.get_batch()
assert len(batch) == 2
assert batch[0].shape == tuple(batch_shape)
assert batch[1].shape[0] == batch_shape[0]
placeholders = data_shuffler.get_placeholders(name="train")
assert placeholders[0].get_shape().as_list() == batch_shape
assert placeholders[1].get_shape().as_list()[0] == batch_shape[0]
assert batch[0].shape == (16, 28, 28, 1)
def test_siamesememory_shuffler():
train_data, train_labels, validation_data, validation_labels = load_mnist()
train_data = numpy.reshape(train_data, (train_data.shape[0], 28, 28, 1))
batch_shape = [16, 28, 28, 1]
batch_shape = [None, 28, 28, 1]
data_shuffler = SiameseMemory(train_data, train_labels,
input_shape=batch_shape[1:],
batch_size=batch_shape[0])
input_shape=batch_shape,
batch_size=16)
batch = data_shuffler.get_batch()
assert len(batch) == 3
assert batch[0].shape == tuple(batch_shape)
assert batch[1].shape == tuple(batch_shape)
assert batch[2].shape[0] == batch_shape[0]
placeholders = data_shuffler.get_placeholders(name="train")
assert placeholders[0].get_shape().as_list() == batch_shape
assert placeholders[1].get_shape().as_list() == batch_shape
assert placeholders[2].get_shape().as_list()[0] == batch_shape[0]
assert batch[0].shape == (16, 28, 28, 1)
assert batch[1].shape == (16, 28, 28, 1)
def test_tripletmemory_shuffler():
train_data, train_labels, validation_data, validation_labels = load_mnist()
train_data = numpy.reshape(train_data, (train_data.shape[0], 28, 28, 1))
batch_shape = [16, 28, 28, 1]
batch_shape = [None, 28, 28, 1]
data_shuffler = TripletMemory(train_data, train_labels,
input_shape=batch_shape[1:],
batch_size=batch_shape[0])
input_shape=batch_shape,
batch_size=16)
batch = data_shuffler.get_batch()
assert len(batch) == 3
assert batch[0].shape == tuple(batch_shape)
assert batch[1].shape == tuple(batch_shape)
assert batch[2].shape == tuple(batch_shape)
placeholders = data_shuffler.get_placeholders(name="train")
assert placeholders[0].get_shape().as_list() == batch_shape
assert placeholders[1].get_shape().as_list() == batch_shape
assert placeholders[2].get_shape().as_list() == batch_shape
assert batch[0].shape == (16, 28, 28, 1)
assert batch[1].shape == (16, 28, 28, 1)
assert batch[2].shape == (16, 28, 28, 1)
def test_disk_shuffler():
train_data, train_labels = get_dummy_files()
batch_shape = [2, 125, 125, 3]
batch_shape = [None, 125, 125, 3]
data_shuffler = Disk(train_data, train_labels,
input_shape=batch_shape[1:],
batch_size=batch_shape[0])
input_shape=batch_shape,
batch_size=2)
batch = data_shuffler.get_batch()
assert len(batch) == 2
assert batch[0].shape == tuple(batch_shape)
assert batch[1].shape[0] == batch_shape[0]
placeholders = data_shuffler.get_placeholders(name="train")
assert placeholders[0].get_shape().as_list() == batch_shape
assert placeholders[1].get_shape().as_list()[0] == batch_shape[0]
assert batch[0].shape == (2, 125, 125, 3)
def test_siamesedisk_shuffler():
train_data, train_labels = get_dummy_files()
batch_shape = [2, 125, 125, 3]
batch_shape = [None, 125, 125, 3]
data_shuffler = SiameseDisk(train_data, train_labels,
input_shape=batch_shape[1:],
batch_size=batch_shape[0])
input_shape=batch_shape,
batch_size=2)
batch = data_shuffler.get_batch()
assert len(batch) == 3
assert batch[0].shape == tuple(batch_shape)
assert batch[1].shape == tuple(batch_shape)
assert batch[2].shape[0] == batch_shape[0]
placeholders = data_shuffler.get_placeholders(name="train")
assert placeholders[0].get_shape().as_list() == batch_shape
assert placeholders[1].get_shape().as_list() == batch_shape
assert placeholders[2].get_shape().as_list()[0] == batch_shape[0]
assert batch[0].shape == (2, 125, 125, 3)
assert batch[1].shape == (2, 125, 125, 3)
def test_tripletdisk_shuffler():
train_data, train_labels = get_dummy_files()
batch_shape = [1, 125, 125, 3]
batch_shape = [None, 125, 125, 3]
data_shuffler = TripletDisk(train_data, train_labels,
input_shape=batch_shape[1:],
batch_size=batch_shape[0])
input_shape=batch_shape,
batch_size=1)
batch = data_shuffler.get_batch()
assert len(batch) == 3
assert batch[0].shape == tuple(batch_shape)
assert batch[1].shape == tuple(batch_shape)
assert batch[2].shape == tuple(batch_shape)
placeholders = data_shuffler.get_placeholders(name="train")
assert placeholders[0].get_shape().as_list() == batch_shape
assert placeholders[1].get_shape().as_list() == batch_shape
assert placeholders[2].get_shape().as_list() == batch_shape
assert batch[0].shape == (1, 125, 125, 3)
assert batch[1].shape == (1, 125, 125, 3)
assert batch[2].shape == (1, 125, 125, 3)
def test_triplet_fast_selection_disk_shuffler():
......@@ -188,11 +153,6 @@ def test_triplet_fast_selection_disk_shuffler():
assert batch[1].shape == tuple(batch_shape)
assert batch[2].shape == tuple(batch_shape)
placeholders = data_shuffler.get_placeholders(name="train")
assert placeholders[0].get_shape().as_list() == batch_shape
assert placeholders[1].get_shape().as_list() == batch_shape
assert placeholders[2].get_shape().as_list() == batch_shape
def test_triplet_selection_disk_shuffler():
train_data, train_labels = get_dummy_files()
......
......@@ -212,4 +212,3 @@ class SiameseTrainer(Trainer):
tf.summary.scalar('within_class_loss', self.predictor[2])
tf.summary.scalar('lr', self.learning_rate)
return tf.summary.merge_all()
......@@ -9,8 +9,10 @@ import threading
from ..analyzers import ExperimentAnalizer
from ..network import SequenceNetwork
from .Trainer import Trainer
from ..analyzers import SoftmaxAnalizer
import os
from bob.learn.tensorflow.utils.session import Session
import bob.core
import logging
logger = logging.getLogger("bob.learn")
......@@ -61,192 +63,124 @@ class TripletTrainer(Trainer):
"""
def __init__(self,
architecture,
optimizer=tf.train.AdamOptimizer(),
use_gpu=False,
loss=None,
temp_dir="cnn",
# Learning rate
learning_rate=None,
train_data_shuffler,
###### training options ##########
convergence_threshold=0.01,
iterations=5000,
snapshot=500,
validation_snapshot=100,
prefetch=False,
## Analizer
analizer=ExperimentAnalizer(),
analizer=SoftmaxAnalizer(),
model_from_file="",
# Temporatu dir
temp_dir="siamese_cnn",
verbosity_level=2
):
super(TripletTrainer, self).__init__(
architecture=architecture,
optimizer=optimizer,
use_gpu=use_gpu,
loss=loss,
temp_dir=temp_dir,
# Learning rate
learning_rate=learning_rate,
train_data_shuffler,
###### training options ##########
convergence_threshold=convergence_threshold,
iterations=iterations,
snapshot=snapshot,
validation_snapshot=validation_snapshot,
prefetch=prefetch,
iterations=5000,
snapshot=500,