Commit 70408dca authored by Tiago de Freitas Pereira's avatar Tiago de Freitas Pereira Committed by Amir MOHAMMADI

Removing legacy code [skip ci]

Removing legacy code
parent 97e81067
#!/usr/bin/env python
# vim: set fileencoding=utf-8 :
# @author: Tiago de Freitas Pereira <tiago.pereira@idiap.ch>
# @date: Tue 09 Aug 2016 15:33 CEST
"""
Neural net work error rates analizer
"""
import numpy
import bob.measure
from tensorflow.core.framework import summary_pb2
from scipy.spatial.distance import cosine
from bob.learn.tensorflow.datashuffler import Memory, Disk
class ExperimentAnalizer:
"""
Analizer.
I don't know if this is the best way to do, but what this class do is the following.
As an enrollment sample, averare all the TRAINING samples for one particular class.
The probing is done with the validation set
"""
def __init__(self, convergence_threshold=0.01,
convergence_reference='eer'):
"""
Use the CNN as feature extractor for a n-class classification
** Parameters **
convergence_threshold:
convergence_reference:
"""
self.data_shuffler = None
self.network = None
self.session = None
# Statistics
self.eer = []
self.far10 = []
self.far100 = []
self.far1000 = []
def __call__(self, data_shuffler, network, session):
#if self.data_shuffler is None:
# self.data_shuffler = data_shuffler
# self.network = network
# self.session = session
# Getting the base class. Recipe extracted from
# http://stackoverflow.com/questions/5516263/creating-an-object-from-a-base-class-object-in-python/5516330#5516330
if isinstance(data_shuffler, Memory):
base_data_shuffler = object.__new__(Memory)
base_data_shuffler.__dict__ = data_shuffler.__dict__.copy()
else:
base_data_shuffler = object.__new__(Disk)
base_data_shuffler.__dict__ = data_shuffler.__dict__.copy()
# Extracting features for enrollment
enroll_data, enroll_labels = base_data_shuffler.get_batch()
enroll_features = network(enroll_data)
del enroll_data
# Extracting features for probing
probe_data, probe_labels = base_data_shuffler.get_batch()
probe_features = network(probe_data)
del probe_data
# Creating models
models = []
for i in range(len(base_data_shuffler.possible_labels)):
indexes_model = numpy.where(
enroll_labels == data_shuffler.possible_labels[i])[0]
models.append(
numpy.mean(enroll_features[indexes_model, :], axis=0))
# Probing
positive_scores = numpy.zeros(shape=0)
negative_scores = numpy.zeros(shape=0)
for i in range(len(base_data_shuffler.possible_labels)):
#for i in self.data_shuffler.possible_labels:
# Positive scoring
indexes = probe_labels == base_data_shuffler.possible_labels[i]
positive_data = probe_features[indexes, :]
p = [
cosine(models[i], positive_data[j])
for j in range(positive_data.shape[0])
]
positive_scores = numpy.hstack((positive_scores, p))
# negative scoring
indexes = probe_labels != base_data_shuffler.possible_labels[i]
negative_data = probe_features[indexes, :]
n = [
cosine(models[i], negative_data[j])
for j in range(negative_data.shape[0])
]
negative_scores = numpy.hstack((negative_scores, n))
return self.__compute_tensorflow_summary((-1) * negative_scores,
(-1) * positive_scores)
def __compute_tensorflow_summary(self, negative_scores, positive_scores):
"""
Compute some stats with the scores, such as:
- EER
- FAR 10
- FAR 100
- FAR 1000
- RANK 1
- RANK 10
**Parameters**
negative_scores:
positive_scores:
"""
summaries = []
# Compute EER
threshold = bob.measure.eer_threshold(negative_scores, positive_scores)
far, frr = bob.measure.farfrr(negative_scores, positive_scores,
threshold)
eer = (far + frr) / 2.
summaries.append(
summary_pb2.Summary.Value(tag="EER", simple_value=eer))
self.eer.append(eer)
# Computing FAR 10
threshold = bob.measure.far_threshold(
negative_scores, positive_scores, far_value=0.1)
far, frr = bob.measure.farfrr(negative_scores, positive_scores,
threshold)
summaries.append(
summary_pb2.Summary.Value(tag="FAR 10", simple_value=frr))
self.far10.append(frr)
# Computing FAR 100
threshold = bob.measure.far_threshold(
negative_scores, positive_scores, far_value=0.01)
far, frr = bob.measure.farfrr(negative_scores, positive_scores,
threshold)
summaries.append(
summary_pb2.Summary.Value(tag="FAR 100", simple_value=frr))
self.far100.append(frr)
# Computing FAR 1000
threshold = bob.measure.far_threshold(
negative_scores, positive_scores, far_value=0.001)
far, frr = bob.measure.farfrr(negative_scores, positive_scores,
threshold)
summaries.append(
summary_pb2.Summary.Value(tag="FAR 1000", simple_value=frr))
self.far1000.append(frr)
return summary_pb2.Summary(value=summaries)
#!/usr/bin/env python
# vim: set fileencoding=utf-8 :
# @author: Tiago de Freitas Pereira <tiago.pereira@idiap.ch>
# @date: Tue 09 Aug 2016 15:33 CEST
"""
Neural net work error rates analizer
"""
import numpy
from tensorflow.core.framework import summary_pb2
class SoftmaxAnalizer(object):
"""
Analizer.
"""
def __init__(self):
"""
Softmax analizer
"""
pass
def __call__(self, data_shuffler, network, session):
data, labels = data_shuffler.get_batch()
predictions = numpy.argmax(
session.run(
network.inference_graph,
feed_dict={
network.inference_placeholder: data[:]
}), 1)
accuracy = 100. * numpy.sum(
predictions == labels) / predictions.shape[0]
summaries = [(summary_pb2.Summary.Value(
tag="accuracy_validation", simple_value=float(accuracy)))]
return summary_pb2.Summary(value=summaries)
from .ExperimentAnalizer import ExperimentAnalizer
from .SoftmaxAnalizer import SoftmaxAnalizer
# gets sphinx autodoc done right - don't remove it
def __appropriate__(*args):
"""Says object was actually declared here, an not on the import module.
Parameters:
*args: An iterable of objects to modify
Resolves `Sphinx referencing issues
<https://github.com/sphinx-doc/sphinx/issues/3048>`
"""
for obj in args:
obj.__module__ = __name__
__appropriate__(
ExperimentAnalizer,
SoftmaxAnalizer,
)
__all__ = [_ for _ in dir() if not _.startswith('_')]
#!/usr/bin/env python
# vim: set fileencoding=utf-8 :
# @author: Tiago de Freitas Pereira <tiago.pereira@idiap.ch>
# @date: Wed 11 May 2016 09:39:36 CEST
import numpy
import tensorflow as tf
import bob.ip.base
import numpy
import six
class Base(object):
"""
The class provide base functionalities to shuffle the data to train a neural network
**Parameters**
data:
Input data to be trainer
labels:
Labels. These labels should be set from 0..1
input_shape:
The shape of the inputs
input_dtype:
The type of the data,
batch_size:
Batch size
seed:
The seed of the random number generator
data_augmentation:
The algorithm used for data augmentation. Look :py:class:`bob.learn.tensorflow.datashuffler.DataAugmentation`
normalizer:
The algorithm used for feature scaling. Look :py:class:`bob.learn.tensorflow.datashuffler.ScaleFactor`, :py:class:`bob.learn.tensorflow.datashuffler.Linear` and :py:class:`bob.learn.tensorflow.datashuffler.MeanOffset`
prefetch:
Do prefetch?
prefetch_capacity:
"""
def __init__(self,
data,
labels,
input_shape=[None, 28, 28, 1],
input_dtype="float32",
batch_size=32,
seed=10,
data_augmentation=None,
normalizer=None,
prefetch=False,
prefetch_capacity=50,
prefetch_threads=5):
# Setting the seed for the pseudo random number generator
self.seed = seed
numpy.random.seed(seed)
self.normalizer = normalizer
self.input_dtype = input_dtype
# TODO: Check if the bacth size is higher than the input data
self.batch_size = batch_size
# Preparing the inputs
self.data = data
self.input_shape = tuple(input_shape)
self.labels = labels
self.possible_labels = list(set(self.labels))
# Computing the data samples fro train and validation
self.n_samples = len(self.labels)
# Shuffling all the indexes
self.indexes = numpy.array(range(self.n_samples))
numpy.random.shuffle(self.indexes)
# Use data data augmentation?
self.data_augmentation = data_augmentation
# Preparing placeholders
self.data_ph = None
self.label_ph = None
# Prefetch variables
self.prefetch = prefetch
self.prefetch_capacity = prefetch_capacity
self.prefetch_threads = prefetch_threads
self.data_ph_from_queue = None
self.label_ph_from_queue = None
self.batch_generator = None
self.epoch = 0
def create_placeholders(self):
"""
Create place holder instances
:return:
"""
with tf.name_scope("Input"):
self.data_ph = tf.placeholder(
tf.float32, shape=self.input_shape, name="data")
self.label_ph = tf.placeholder(
tf.int64, shape=[None], name="label")
# If prefetch, setup the queue to feed data
if self.prefetch:
queue = tf.FIFOQueue(
capacity=self.prefetch_capacity,
dtypes=[tf.float32, tf.int64],
shapes=[self.input_shape[1:], []])
# Fetching the place holders from the queue
self.enqueue_op = queue.enqueue_many(
[self.data_ph, self.label_ph])
self.data_ph_from_queue, self.label_ph_from_queue = queue.dequeue_many(
self.batch_size)
else:
self.data_ph_from_queue = self.data_ph
self.label_ph_from_queue = self.label_ph
def __call__(self, element, from_queue=False):
"""
Return the necessary placeholder
"""
if not element in ["data", "label"]:
raise ValueError(
"Value '{0}' invalid. Options available are {1}".format(
element, self.placeholder_options))
# If None, create the placeholders from scratch
if self.data_ph is None:
self.create_placeholders()
if element == "data":
if from_queue:
return self.data_ph_from_queue
else:
return self.data_ph
else:
if from_queue:
return self.label_ph_from_queue
else:
return self.label_ph
def bob2skimage(self, bob_image):
"""
Convert bob color image to the skcit image
"""
skimage = numpy.zeros(
shape=(bob_image.shape[1], bob_image.shape[2], bob_image.shape[0]))
for i in range(bob_image.shape[0]):
skimage[:, :, i] = bob_image[i, :, :]
return skimage
def skimage2bob(self, sk_image):
"""
Convert bob color image to the skcit image
"""
bob_image = numpy.zeros(
shape=(sk_image.shape[2], sk_image.shape[0], sk_image.shape[1]))
for i in range(bob_image.shape[0]):
bob_image[i, :, :] = sk_image[:, :, i] # Copying red
return bob_image
def rescale(self, data):
"""
Reescale a single sample with input_shape
"""
# if self.input_shape != data.shape:
if self.bob_shape != data.shape:
# TODO: Implement a better way to do this reescaling
# If it is gray scale
if self.input_shape[3] == 1:
copy = data[:, :, 0].copy()
dst = numpy.zeros(shape=self.input_shape[1:3])
bob.ip.base.scale(copy, dst)
dst = numpy.reshape(dst, self.input_shape[1:4])
else:
#dst = numpy.resize(data, self.bob_shape) # Scaling with numpy, because bob is c,w,d instead of w,h,c
dst = numpy.zeros(shape=(data.shape[0], data.shape[1], 3))
dst[:, :, 0] = data[:, :, 0]
dst[:, :, 1] = data[:, :, 0]
dst[:, :, 2] = data[:, :, 0]
# TODO: LAME SOLUTION
#if data.shape[0] != 3: # GRAY SCALE IMAGES IN A RGB DATABASE
# step_data = numpy.zeros(shape=(3, data.shape[0], data.shape[1]))
#step_data = numpy.zeros(shape=(3, data.shape[0], data.shape[1]))
#step_data[0, ...] = data[:, :, 0]
#step_data[1, ...] = data[:, :, 0]
#step_data[2, ...] = data[:, :, 0]
#data = step_data
#dst = numpy.zeros(shape=(self.bob_shape))
#bob.ip.base.scale(data, dst)
return dst
else:
return data
def normalize_sample(self, x):
"""
Normalize the sample.
For the time being I'm only scaling from 0-1
"""
if self.normalizer is None:
return x
else:
return self.normalizer(x)
def _aggregate_batch(self, data_holder, use_list=False):
size = len(data_holder[0])
result = []
for k in range(size):
if use_list:
result.append([x[k] for x in data_holder])
else:
dt = data_holder[0][k]
if type(dt) in [int, bool]:
tp = 'int64'
elif type(dt) == float:
tp = self.input_dtype
else:
try:
tp = dt.dtype
except:
raise TypeError("Unsupported type to batch: {}".format(
type(dt)))
try:
result.append(
numpy.asarray([x[k] for x in data_holder], dtype=tp))
except KeyboardInterrupt:
raise
except:
#logger.exception("Cannot batch data. Perhaps they are of inconsistent shape?")
import IPython as IP
IP.embed(config=IP.terminal.ipapp.load_default_config())
return result
def get_batch(self):
"""
Shuffle the Memory dataset and get a random batch.
** Returns **
data:
Selected samples
labels:
Correspondent labels
"""
if self.batch_generator is None:
self.batch_generator = self._fetch_batch()
holder = []
try:
for i in range(self.batch_size):
data = six.next(self.batch_generator)
holder.append(data)
if len(holder) == self.batch_size:
return self._aggregate_batch(holder, False)
except StopIteration:
self.batch_generator = None
self.epoch += 1
# If we have left data in the epoch, return
if len(holder) > 0:
return self._aggregate_batch(holder, False)
else:
self.batch_generator = self._fetch_batch()
data = six.next(self.batch_generator)
holder.append(data)
return self._aggregate_batch(holder, False)
#!/usr/bin/env python
# vim: set fileencoding=utf-8 :
# @author: Tiago de Freitas Pereira <tiago.pereira@idiap.ch>
# @date: Wed 11 May 2016 09:39:36 CEST
import numpy
import bob.io.base
import bob.io.image
import bob.ip.base
import bob.core
from .Base import Base
logger = bob.core.log.setup("bob.learn.tensorflow")
class Disk(Base):
"""
This datashuffler deal with databases that are stored in the disk.
The data is loaded on the fly,.
**Parameters**
data:
Input data to be trainer
labels:
Labels. These labels should be set from 0..1
input_shape:
The shape of the inputs
input_dtype:
The type of the data,
batch_size:
Batch size
seed:
The seed of the random number generator
data_augmentation:
The algorithm used for data augmentation. Look :py:class:`bob.learn.tensorflow.datashuffler.DataAugmentation`
normalizer:
The algorithm used for feature scaling. Look :py:class:`bob.learn.tensorflow.datashuffler.ScaleFactor`, :py:class:`bob.learn.tensorflow.datashuffler.Linear` and :py:class:`bob.learn.tensorflow.datashuffler.MeanOffset`
"""
def __init__(self,
data,
labels,
input_shape,
input_dtype="float32",
batch_size=1,
seed=10,
data_augmentation=None,
normalizer=None,
prefetch=False,
prefetch_capacity=10,
prefetch_threads=5):
if isinstance(data, list):
data = numpy.array(data)
if isinstance(labels, list):
labels = numpy.array(labels)
super(Disk, self).__init__(
data=data,
labels=labels,
input_shape=input_shape,
input_dtype=input_dtype,
batch_size=batch_size,
seed=seed,
data_augmentation=data_augmentation,
normalizer=normalizer,
prefetch=prefetch,
prefetch_capacity=prefetch_capacity,
prefetch_threads=prefetch_threads)
# Seting the seed
numpy.random.seed(seed)
# TODO: very bad solution to deal with bob.shape images an tf shape images
self.bob_shape = tuple([input_shape[3]] + list(input_shape[1:3]))
def load_from_file(self, file_name):
d = bob.io.base.load(file_name)
# Applying the data augmentation
if self.data_augmentation is not None:
d = self.data_augmentation(d)
if d.shape[0] != 3 and self.input_shape[2] != 3: # GRAY SCALE IMAGE
data = numpy.zeros(shape=(d.shape[0], d.shape[1], 1))
data[:, :, 0] = d
data = self.rescale(data)
else:
d = self.rescale(d)
data = self.bob2skimage(d)
# Checking NaN
if numpy.sum(numpy.isnan(data)) > 0:
logger.warning(
"######### Sample {0} has noise #########".format(file_name))
return data
def _fetch_batch(self):
"""
Shuffle the Disk dataset, get a random batch and load it on the fly.
** Returns **
data:
Selected samples
labels:
Correspondent labels
"""
# Shuffling samples
indexes = numpy.array(range(self.data.shape[0]))
numpy.random.shuffle(indexes)
#selected_data = numpy.zeros(shape=shape)
for i in indexes:
file_name = self.data[i]
data = self.load_from_file(file_name)
if self.data_augmentation is not None:
data = self.skimage2bob(data)
data = self.data_augmentation(data)
data = self.bob2skimage(data)
if self.normalize_sample is not None:
data = self.normalize_sample(data)
data = data.astype(self.input_dtype)
label = self.labels[i]
yield [data, label]
#!/usr/bin/env python
# vim: set fileencoding=utf-8 :
# @author: Pavel Korshunov <pavel.korshunov@idiap.ch>
# @date: Wed 19 Oct 23:43:22 2016
import numpy