Redesiging the datashuffler

parent be59f56f
......@@ -5,9 +5,10 @@
import numpy
import tensorflow as tf
import bob.ip.base
class BaseDataShuffler(object):
class Base(object):
def __init__(self, data, labels,
input_shape,
input_dtype="float64",
......@@ -36,7 +37,6 @@ class BaseDataShuffler(object):
self.shape = tuple([batch_size] + input_shape)
self.input_shape = tuple(input_shape)
self.labels = labels
self.possible_labels = list(set(self.labels))
......@@ -47,51 +47,9 @@ class BaseDataShuffler(object):
self.indexes = numpy.array(range(self.n_samples))
numpy.random.shuffle(self.indexes)
# TODO: Reorganize the datas hufflers for siamese and triplets
self.data_placeholder = None
self.data2_placeholder = None
self.data3_placeholder = None
self.label_placeholder = None
def get_placeholders_forprefetch(self, name=""):
"""
Returns a place holder with the size of your batch
"""
if self.data_placeholder is None:
self.data_placeholder = tf.placeholder(tf.float32, shape=tuple([None] + list(self.shape[1:])), name=name)
self.label_placeholder = tf.placeholder(tf.int64, shape=[None, ])
return self.data_placeholder, self.label_placeholder
def get_placeholders_pair_forprefetch(self, name=""):
"""
Returns a place holder with the size of your batch
"""
if self.data_placeholder is None:
self.data_placeholder = tf.placeholder(tf.float32, shape=tuple([None] + list(self.shape[1:])), name=name)
if self.data2_placeholder is None:
self.data2_placeholder = tf.placeholder(tf.float32, shape=tuple([None] + list(self.shape[1:])), name=name)
if self.label_placeholder is None:
self.label_placeholder = tf.placeholder(tf.int64, shape=[None, ])
return self.data_placeholder, self.data2_placeholder, self.label_placeholder
def get_placeholders_triplet_forprefetch(self, name=""):
"""
Returns a place holder with the size of your batch
"""
if self.data_placeholder is None:
self.data_placeholder = tf.placeholder(tf.float32, shape=tuple([None] + list(self.shape[1:])), name=name)
if self.data2_placeholder is None:
self.data2_placeholder = tf.placeholder(tf.float32, shape=tuple([None] + list(self.shape[1:])), name=name)
if self.data3_placeholder is None:
self.data3_placeholder = tf.placeholder(tf.float32, shape=tuple([None] + list(self.shape[1:])), name=name)
return self.data_placeholder, self.data2_placeholder, self.data3_placeholder
def get_placeholders(self, name=""):
"""
Returns a place holder with the size of your batch
......@@ -103,91 +61,59 @@ class BaseDataShuffler(object):
if self.label_placeholder is None:
self.label_placeholder = tf.placeholder(tf.int64, shape=self.shape[0])
return self.data_placeholder, self.label_placeholder
return [self.data_placeholder, self.label_placeholder]
def get_placeholders_pair(self, name=""):
def get_placeholders_forprefetch(self, name=""):
"""
Returns a place holder with the size of your batch
"""
if self.data_placeholder is None:
self.data_placeholder = tf.placeholder(tf.float32, shape=self.shape, name=name+"_right")
if self.data2_placeholder is None:
self.data2_placeholder = tf.placeholder(tf.float32, shape=self.shape, name=name+"_left")
if self.label_placeholder is None:
self.label_placeholder = tf.placeholder(tf.int64, shape=self.shape[0], name=name+"_label")
return self.data_placeholder, self.data2_placeholder, self.label_placeholder
self.data_placeholder = tf.placeholder(tf.float32, shape=tuple([None] + list(self.shape[1:])), name=name)
self.label_placeholder = tf.placeholder(tf.int64, shape=[None, ])
return [self.data_placeholder, self.label_placeholder]
def get_placeholders_triplet(self, name=""):
def bob2skimage(self, bob_image):
"""
Returns a place holder with the size of your batch
Convert bob color image to the skcit image
"""
if self.data_placeholder is None:
self.data_placeholder = tf.placeholder(tf.float32, shape=self.shape, name=name+"_anchor")
if self.data2_placeholder is None:
self.data2_placeholder = tf.placeholder(tf.float32, shape=self.shape, name=name+"_positive")
if self.data3_placeholder is None:
self.data3_placeholder = tf.placeholder(tf.float32, shape=self.shape, name=name+"_negative")
skimage = numpy.zeros(shape=(bob_image.shape[1], bob_image.shape[2], 3))
return self.data_placeholder, self.data2_placeholder, self.data3_placeholder
skimage[:, :, 0] = bob_image[0, :, :] # Copying red
skimage[:, :, 1] = bob_image[1, :, :] # Copying green
skimage[:, :, 2] = bob_image[2, :, :] # Copying blue
def get_genuine_or_not(self, input_data, input_labels, genuine=True):
return skimage
if genuine:
# Getting a client
index = numpy.random.randint(len(self.possible_labels))
index = int(self.possible_labels[index])
# Getting the indexes of the data from a particular client
indexes = numpy.where(input_labels == index)[0]
numpy.random.shuffle(indexes)
# Picking a pair
data = input_data[indexes[0], ...]
data_p = input_data[indexes[1], ...]
def rescale(self, data):
"""
Reescale a single sample with input_shape
"""
# if self.input_shape != data.shape:
if self.bob_shape != data.shape:
# TODO: Implement a better way to do this reescaling
# If it is gray scale
if self.input_shape[2] == 1:
copy = data[:, :, 0].copy()
dst = numpy.zeros(shape=self.input_shape[0:2])
bob.ip.base.scale(copy, dst)
dst = numpy.reshape(dst, self.input_shape)
else:
# dst = numpy.resize(data, self.bob_shape) # Scaling with numpy, because bob is c,w,d instead of w,h,c
dst = numpy.zeros(shape=self.bob_shape)
# TODO: LAME SOLUTION
if data.shape[0] != 3: # GRAY SCALE IMAGES IN A RGB DATABASE
step_data = numpy.zeros(shape=(3, data.shape[0], data.shape[1]))
step_data[0, ...] = data[:, :]
step_data[1, ...] = data[:, :]
step_data[2, ...] = data[:, :]
data = step_data
bob.ip.base.scale(data, dst)
return dst
else:
# Picking a pair of labels from different clients
index = numpy.random.choice(len(self.possible_labels), 2, replace=False)
index[0] = self.possible_labels[int(index[0])]
index[1] = self.possible_labels[int(index[1])]
# Getting the indexes of the two clients
indexes = numpy.where(input_labels == index[0])[0]
indexes_p = numpy.where(input_labels == index[1])[0]
numpy.random.shuffle(indexes)
numpy.random.shuffle(indexes_p)
# Picking a pair
data = input_data[indexes[0], ...]
data_p = input_data[indexes_p[0], ...]
return data, data_p
def get_one_triplet(self, input_data, input_labels):
# Getting a pair of clients
index = numpy.random.choice(len(self.possible_labels), 2, replace=False)
index[0] = self.possible_labels[index[0]]
index[1] = self.possible_labels[index[1]]
# Getting the indexes of the data from a particular client
indexes = numpy.where(input_labels == index[0])[0]
numpy.random.shuffle(indexes)
# Picking a positive pair
data_anchor = input_data[indexes[0], ...]
data_positive = input_data[indexes[1], ...]
# Picking a negative sample
indexes = numpy.where(input_labels == index[1])[0]
numpy.random.shuffle(indexes)
data_negative = input_data[indexes[0], ...]
return data_anchor, data_positive, data_negative
return data
#!/usr/bin/env python
# vim: set fileencoding=utf-8 :
# @author: Tiago de Freitas Pereira <tiago.pereira@idiap.ch>
# @date: Wed 11 May 2016 09:39:36 CEST
import numpy
import bob.io.base
import bob.io.image
import bob.ip.base
import bob.core
from .Base import Base
logger = bob.core.log.setup("bob.learn.tensorflow")
class Disk(Base):
def __init__(self, data, labels,
input_shape,
input_dtype="float64",
scale=True,
batch_size=1):
"""
Shuffler that deal with file list
**Parameters**
data:
labels:
input_shape: Shape of the input. `input_shape != data.shape`, the data will be reshaped
input_dtype="float64":
scale=True:
batch_size=1:
"""
if isinstance(data, list):
data = numpy.array(data)
if isinstance(labels, list):
labels = numpy.array(labels)
super(Disk, self).__init__(
data=data,
labels=labels,
input_shape=input_shape,
input_dtype=input_dtype,
scale=scale,
batch_size=batch_size
)
# TODO: very bad solution to deal with bob.shape images an tf shape images
self.bob_shape = tuple([input_shape[2]] + list(input_shape[0:2]))
def load_from_file(self, file_name, shape):
d = bob.io.base.load(file_name)
if d.shape[0] != 3 and self.input_shape[2] != 3: # GRAY SCALE IMAGE
data = numpy.zeros(shape=(d.shape[0], d.shape[1], 1))
data[:, :, 0] = d
data = self.rescale(data)
else:
d = self.rescale(d)
data = self.bob2skimage(d)
# Checking NaN
if numpy.sum(numpy.isnan(data)) > 0:
logger.warning("######### Image {0} has noise #########".format(file_name))
return data
def get_batch(self):
# Shuffling samples
indexes = numpy.array(range(self.data.shape[0]))
numpy.random.shuffle(indexes)
selected_data = numpy.zeros(shape=self.shape)
for i in range(self.batch_size):
file_name = self.data[indexes[i]]
data = self.load_from_file(file_name, self.shape)
selected_data[i, ...] = data
if self.scale:
selected_data[i, ...] *= self.scale_value
selected_labels = self.labels[indexes[0:self.batch_size]]
return [selected_data.astype("float32"), selected_labels.astype("int64")]
#!/usr/bin/env python
# vim: set fileencoding=utf-8 :
# @author: Tiago de Freitas Pereira <tiago.pereira@idiap.ch>
# @date: Wed 11 May 2016 09:39:36 CEST
import numpy
from .Base import Base
import tensorflow as tf
class Memory(Base):
def __init__(self, data, labels,
input_shape,
input_dtype="float64",
scale=True,
batch_size=1):
"""
Shuffler that deal with memory datasets
**Parameters**
data:
labels:
perc_train:
scale:
train_batch_size:
validation_batch_size:
"""
super(Memory, self).__init__(
data=data,
labels=labels,
input_shape=input_shape,
input_dtype=input_dtype,
scale=scale,
batch_size=batch_size
)
self.data = self.data.astype(input_dtype)
if self.scale:
self.data *= self.scale_value
def get_batch(self):
# Shuffling samples
indexes = numpy.array(range(self.data.shape[0]))
numpy.random.shuffle(indexes)
selected_data = self.data[indexes[0:self.batch_size], :, :, :]
selected_labels = self.labels[indexes[0:self.batch_size]]
return [selected_data.astype("float32"), selected_labels.astype("int64")]
#!/usr/bin/env python
# vim: set fileencoding=utf-8 :
# @author: Tiago de Freitas Pereira <tiago.pereira@idiap.ch>
# @date: Wed 11 May 2016 09:39:36 CEST
import numpy
from .Base import Base
import tensorflow as tf
class Siamese(Base):
"""
Siamese Shuffler base class
"""
def __init__(self, **kwargs):
super(Siamese, self).__init__(**kwargs)
self.data2_placeholder = None
def get_placeholders(self, name=""):
"""
Returns a place holder with the size of your batch
"""
if self.data_placeholder is None:
self.data_placeholder = tf.placeholder(tf.float32, shape=self.shape, name=name+"_right")
if self.data2_placeholder is None:
self.data2_placeholder = tf.placeholder(tf.float32, shape=self.shape, name=name+"_left")
if self.label_placeholder is None:
self.label_placeholder = tf.placeholder(tf.int64, shape=self.shape[0], name=name+"_label")
return [self.data_placeholder, self.data2_placeholder, self.label_placeholder]
def get_genuine_or_not(self, input_data, input_labels, genuine=True):
if genuine:
# Getting a client
index = numpy.random.randint(len(self.possible_labels))
index = int(self.possible_labels[index])
# Getting the indexes of the data from a particular client
indexes = numpy.where(input_labels == index)[0]
numpy.random.shuffle(indexes)
# Picking a pair
data = input_data[indexes[0], ...]
data_p = input_data[indexes[1], ...]
else:
# Picking a pair of labels from different clients
index = numpy.random.choice(len(self.possible_labels), 2, replace=False)
index[0] = self.possible_labels[int(index[0])]
index[1] = self.possible_labels[int(index[1])]
# Getting the indexes of the two clients
indexes = numpy.where(input_labels == index[0])[0]
indexes_p = numpy.where(input_labels == index[1])[0]
numpy.random.shuffle(indexes)
numpy.random.shuffle(indexes_p)
# Picking a pair
data = input_data[indexes[0], ...]
data_p = input_data[indexes_p[0], ...]
return data, data_p
......@@ -7,6 +7,9 @@ import numpy
import bob.io.base
import bob.io.image
import bob.ip.base
import bob.core
logger = bob.core.log.setup("bob.learn.tensorflow")
import tensorflow as tf
from .BaseDataShuffler import BaseDataShuffler
......@@ -64,6 +67,10 @@ class TextDataShuffler(BaseDataShuffler):
d = self.rescale(d)
data = self.bob2skimage(d)
# Checking NaN
if numpy.sum(numpy.isnan(data)) > 0:
logger.warning("######### Image {0} has noise #########".format(file_name))
return data
def bob2skimage(self, bob_image):
......
......@@ -4,18 +4,13 @@
# @date: Wed 11 May 2016 09:39:36 CEST
import numpy
import tensorflow as tf
from .BaseDataShuffler import BaseDataShuffler
def scale_mean_norm(data, scale=0.00390625):
mean = numpy.mean(data)
data = (data - mean) * scale
return data, mean
from .Memory import Memory
from .Siamese import Siamese
import tensorflow as tf
class MemoryDataShuffler(BaseDataShuffler):
class SiameseMemory(Siamese, Memory):
def __init__(self, data, labels,
input_shape,
......@@ -34,7 +29,7 @@ class MemoryDataShuffler(BaseDataShuffler):
validation_batch_size:
"""
super(MemoryDataShuffler, self).__init__(
super(SiameseMemory, self).__init__(
data=data,
labels=labels,
input_shape=input_shape,
......@@ -47,18 +42,7 @@ class MemoryDataShuffler(BaseDataShuffler):
if self.scale:
self.data *= self.scale_value
def get_batch(self):
# Shuffling samples
indexes = numpy.array(range(self.data.shape[0]))
numpy.random.shuffle(indexes)
selected_data = self.data[indexes[0:self.batch_size], :, :, :]
selected_labels = self.labels[indexes[0:self.batch_size]]
return selected_data, selected_labels.astype("int64")
def get_pair(self, zero_one_labels=True):
def get_batch(self, zero_one_labels=True):
"""
Get a random pair of samples
......@@ -80,24 +64,4 @@ class MemoryDataShuffler(BaseDataShuffler):
labels_siamese[i] = -1 if genuine else +1
genuine = not genuine
return data, data_p, labels_siamese
def get_random_triplet(self):
"""
Get a random triplet
**Parameters**
is_target_set_train: Defining the target set to get the batch
**Return**
"""
data_a = numpy.zeros(shape=self.shape, dtype='float32')
data_p = numpy.zeros(shape=self.shape, dtype='float32')
data_n = numpy.zeros(shape=self.shape, dtype='float32')
for i in range(self.shape[0]):
data_a[i, ...], data_p[i, ...], data_n[i, ...] = self.get_one_triplet(self.data, self.labels)
return data_a, data_p, data_n
return [data, data_p, labels_siamese]
#!/usr/bin/env python
# vim: set fileencoding=utf-8 :
# @author: Tiago de Freitas Pereira <tiago.pereira@idiap.ch>
# @date: Wed 11 May 2016 09:39:36 CEST
import numpy
from .Base import Base
class Triplet(Base):
"""
Triplet Shuffler base class
"""
def __init__(self, **kwargs):
super(Triplet, self).__init__(**kwargs)
self.data2_placeholder = None
self.data3_placeholder = None
def get_one_triplet(self, input_data, input_labels):
# Getting a pair of clients
index = numpy.random.choice(len(self.possible_labels), 2, replace=False)
index[0] = self.possible_labels[index[0]]
index[1] = self.possible_labels[index[1]]
# Getting the indexes of the data from a particular client
indexes = numpy.where(input_labels == index[0])[0]
numpy.random.shuffle(indexes)
# Picking a positive pair
data_anchor = input_data[indexes[0], ...]
data_positive = input_data[indexes[1], ...]
# Picking a negative sample
indexes = numpy.where(input_labels == index[1])[0]
numpy.random.shuffle(indexes)
data_negative = input_data[indexes[0], ...]
return data_anchor, data_positive, data_negative
#!/usr/bin/env python
# vim: set fileencoding=utf-8 :
# @author: Tiago de Freitas Pereira <tiago.pereira@idiap.ch>
# @date: Wed 11 May 2016 09:39:36 CEST
import numpy
import bob.io.base
import bob.io.image
import bob.ip.base
import bob.core
logger = bob.core.log.setup("bob.learn.tensorflow")
import tensorflow as tf
from .BaseDataShuffler import BaseDataShuffler
#def scale_mean_norm(data, scale=0.00390625):
# mean = numpy.mean(data)
# data = (data - mean) * scale
# return data, mean
class TextDataShuffler(BaseDataShuffler):
def __init__(self, data, labels,
input_shape,
input_dtype="float64",
scale=True,
batch_size=1):
"""
Shuffler that deal with file list
**Parameters**
data:
labels:
input_shape: Shape of the input. `input_shape != data.shape`, the data will be reshaped
input_dtype="float64":
scale=True:
batch_size=1:
"""
if isinstance(data, list):
data = numpy.array(data)
if isinstance(labels, list):
labels = numpy.array(labels)
super(TextDataShuffler, self).__init__(
data=data,
labels=labels,
input_shape=input_shape,
input_dtype=input_dtype,
scale=scale,
batch_size=batch_size
)
# TODO: very bad solution to deal with bob.shape images an tf shape images
self.bob_shape = tuple([input_shape[2]] + list(input_shape[0:2]))
def load_from_file(self, file_name, shape):
d = bob.io.base.load(file_name)
if d.shape[0] != 3 and self.input_shape[2] != 3: # GRAY SCALE IMAGE
data = numpy.zeros(shape=(d.shape[0], d.shape[1], 1))
data[:, :, 0] = d
data = self.rescale(data)
else:
d = self.rescale(d)
data = self.bob2skimage(d)
# Checking NaN
if numpy.sum(numpy.isnan(data)) > 0:
logger.warning("######### Image {0} has noise #########".format(file_name))
return data
def bob2skimage(self, bob_image):
"""
Convert bob color image to the skcit image
"""
skimage = numpy.zeros(shape=(bob_image.shape[1], bob_image.shape[2], 3))
skimage[:, :, 0] = bob_image[0, :, :] #Copying red
skimage[:, :, 1] = bob_image[1, :, :] #Copying green
skimage[:, :, 2] = bob_image[2, :, :] #Copying blue
return skimage
def get_batch(self):
# Shuffling samples
indexes = numpy.array(range(self.data.shape[0]))
numpy.random.shuffle(indexes)
selected_data = numpy.zeros(shape=self.shape)
for i in range(self.batch_size):
file_name = self.data[indexes[i]]
data = self.load_from_file(file_name, self.shape)
selected_data[i, ...] = data
if self.scale:
selected_data[i, ...] *= self.scale_value
selected_labels = self.labels[indexes[0:self.batch_size]]
return selected_data.astype("float32"), selected_labels
def rescale(self, data):
"""
Reescale a single sample with input_shape
"""
#if self.input_shape != data.shape:
if self.bob_shape != data.shape:
# TODO: Implement a better way to do this reescaling
# If it is gray scale
if self.input_shape[2] == 1:
copy = data[:, :, 0].copy()
dst = numpy.zeros(shape=self.input_shape[0:2])
bob.ip.base.scale(copy, dst)
dst = numpy.reshape(dst, self.input_shape)
else:
#dst = numpy.resize(data, self.bob_shape) # Scaling with numpy, because bob is c,w,d instead of w,h,c
dst = numpy.zeros(shape=self.bob_shape)
# TODO: LAME SOLUTION
if data.shape[0] != 3: # GRAY SCALE IMAGES IN A RGB DATABASE
step_data = numpy.zeros(shape=(3, data.shape[0], data.shape[1]))
step_data[0, ...] = data[:, :]
step_data[1, ...] = data[:, :]
step_data[2, ...] = data[:, :]