Documenting

parent c02d5c12
# see https://docs.python.org/3/library/pkgutil.html # see https://docs.python.org/3/library/pkgutil.html
from pkgutil import extend_path from pkgutil import extend_path
__path__ = extend_path(__path__, __name__) __path__ = extend_path(__path__, __name__)
from bob.learn.tensorflow import analyzers
from bob.learn.tensorflow import datashuffler
from bob.learn.tensorflow import initialization
from bob.learn.tensorflow import layers
from bob.learn.tensorflow import loss
from bob.learn.tensorflow import network
from bob.learn.tensorflow import trainers
# see https://docs.python.org/3/library/pkgutil.html
from pkgutil import extend_path
__path__ = extend_path(__path__, __name__)
from .ExperimentAnalizer import ExperimentAnalizer from .ExperimentAnalizer import ExperimentAnalizer
from .SoftmaxAnalizer import SoftmaxAnalizer from .SoftmaxAnalizer import SoftmaxAnalizer
# gets sphinx autodoc done right - don't remove it # gets sphinx autodoc done right - don't remove it
__all__ = [_ for _ in dir() if not _.startswith('_')] def __appropriate__(*args):
"""Says object was actually declared here, an not on the import module.
Parameters:
*args: An iterable of objects to modify
Resolves `Sphinx referencing issues
<https://github.com/sphinx-doc/sphinx/issues/3048>`
"""
for obj in args: obj.__module__ = __name__
__appropriate__(
ExperimentAnalizer,
SoftmaxAnalizer,
)
__all__ = [_ for _ in dir() if not _.startswith('_')]
...@@ -11,30 +11,30 @@ from bob.learn.tensorflow.datashuffler.Normalizer import Linear ...@@ -11,30 +11,30 @@ from bob.learn.tensorflow.datashuffler.Normalizer import Linear
class Base(object): class Base(object):
"""
The class provide base functionalities to shuffle the data to train a neural network
**Parameters**
data: Input data to be trainer
labels: Labels. These labels should be set from 0..1
input_shape: The shape of the inputs
input_dtype: The type of the data,
batch_size: Batch size
seed: The seed of the random number generator
data_augmentation: The algorithm used for data augmentation. Look :py:class:`bob.learn.tensorflow.datashuffler.DataAugmentation`
normalizer: The algorithm used for feature scaling. Look :py:class:`bob.learn.tensorflow.datashuffler.ScaleFactor`, :py:class:`bob.learn.tensorflow.datashuffler.Linear` and :py:class:`bob.learn.tensorflow.datashuffler.MeanOffset`
"""
def __init__(self, data, labels, def __init__(self, data, labels,
input_shape, input_shape,
input_dtype="float64", input_dtype="float64",
scale=True,
batch_size=1, batch_size=1,
seed=10, seed=10,
data_augmentation=None, data_augmentation=None,
normalizer=Linear()): normalizer=Linear()):
"""
The class provide base functionalities to shuffle the data before to train a neural network
**Parameters**
data:
labels:
perc_train:
scale:
train_batch_size:
validation_batch_size:
seed: Seed for the random number generator
"""
self.seed = seed self.seed = seed
numpy.random.seed(seed) numpy.random.seed(seed)
self.scale = scale
self.normalizer = normalizer self.normalizer = normalizer
self.input_dtype = input_dtype self.input_dtype = input_dtype
...@@ -65,6 +65,12 @@ class Base(object): ...@@ -65,6 +65,12 @@ class Base(object):
self.data_placeholder = data self.data_placeholder = data
self.label_placeholder = label self.label_placeholder = label
def get_batch(self):
"""
Shuffle dataset and get a random batch.
"""
raise NotImplementedError("Method not implemented in this level. You should use one of the derived classes.")
def get_placeholders(self, name=""): def get_placeholders(self, name=""):
""" """
Returns a place holder with the size of your batch Returns a place holder with the size of your batch
......
...@@ -15,26 +15,29 @@ from bob.learn.tensorflow.datashuffler.Normalizer import Linear ...@@ -15,26 +15,29 @@ from bob.learn.tensorflow.datashuffler.Normalizer import Linear
class Disk(Base): class Disk(Base):
"""
This datashuffler deal with databases that are stored in the disk.
The data is loaded on the fly,.
**Parameters**
data: Input data to be trainer
labels: Labels. These labels should be set from 0..1
input_shape: The shape of the inputs
input_dtype: The type of the data,
batch_size: Batch size
seed: The seed of the random number generator
data_augmentation: The algorithm used for data augmentation. Look :py:class:`bob.learn.tensorflow.datashuffler.DataAugmentation`
normalizer: The algorithm used for feature scaling. Look :py:class:`bob.learn.tensorflow.datashuffler.ScaleFactor`, :py:class:`bob.learn.tensorflow.datashuffler.Linear` and :py:class:`bob.learn.tensorflow.datashuffler.MeanOffset`
"""
def __init__(self, data, labels, def __init__(self, data, labels,
input_shape, input_shape,
input_dtype="float64", input_dtype="float64",
scale=True,
batch_size=1, batch_size=1,
seed=10, seed=10,
data_augmentation=None, data_augmentation=None,
normalizer=Linear()): normalizer=Linear()):
"""
This datashuffler deal with databases that are stored in the disk.
The data is loaded on the fly,.
**Parameters**
data:
labels:
input_shape: Shape of the input. `input_shape != data.shape`, the data will be reshaped
input_dtype="float64":
scale=True:
batch_size=1:
"""
if isinstance(data, list): if isinstance(data, list):
data = numpy.array(data) data = numpy.array(data)
...@@ -47,7 +50,6 @@ class Disk(Base): ...@@ -47,7 +50,6 @@ class Disk(Base):
labels=labels, labels=labels,
input_shape=input_shape, input_shape=input_shape,
input_dtype=input_dtype, input_dtype=input_dtype,
scale=scale,
batch_size=batch_size, batch_size=batch_size,
seed=seed, seed=seed,
data_augmentation=data_augmentation, data_augmentation=data_augmentation,
...@@ -80,7 +82,15 @@ class Disk(Base): ...@@ -80,7 +82,15 @@ class Disk(Base):
return data return data
def get_batch(self, noise=False): def get_batch(self):
"""
Shuffle the Disk dataset, get a random batch and load it on the fly.
** Returns **
data: Selected samples
labels: Correspondent labels
"""
# Shuffling samples # Shuffling samples
indexes = numpy.array(range(self.data.shape[0])) indexes = numpy.array(range(self.data.shape[0]))
......
...@@ -11,32 +11,33 @@ import tensorflow as tf ...@@ -11,32 +11,33 @@ import tensorflow as tf
class Memory(Base): class Memory(Base):
"""
This datashuffler deal with memory databases that are stored in a :py:class`numpy.array`
**Parameters**
data: Input data to be trainer
labels: Labels. These labels should be set from 0..1
input_shape: The shape of the inputs
input_dtype: The type of the data,
batch_size: Batch size
seed: The seed of the random number generator
data_augmentation: The algorithm used for data augmentation. Look :py:class:`bob.learn.tensorflow.datashuffler.DataAugmentation`
normalizer: The algorithm used for feature scaling. Look :py:class:`bob.learn.tensorflow.datashuffler.ScaleFactor`, :py:class:`bob.learn.tensorflow.datashuffler.Linear` and :py:class:`bob.learn.tensorflow.datashuffler.MeanOffset`
"""
def __init__(self, data, labels, def __init__(self, data, labels,
input_shape, input_shape,
input_dtype="float64", input_dtype="float64",
scale=True,
batch_size=1, batch_size=1,
seed=10, seed=10,
data_augmentation=None, data_augmentation=None,
normalizer=Linear()): normalizer=Linear()):
"""
This datashuffler deal with databases that are stored in a :py:class`numpy.array`
**Parameters**
data:
labels:
perc_train:
scale:
train_batch_size:
validation_batch_size:
"""
super(Memory, self).__init__( super(Memory, self).__init__(
data=data, data=data,
labels=labels, labels=labels,
input_shape=input_shape, input_shape=input_shape,
input_dtype=input_dtype, input_dtype=input_dtype,
scale=scale,
batch_size=batch_size, batch_size=batch_size,
seed=seed, seed=seed,
data_augmentation=data_augmentation, data_augmentation=data_augmentation,
...@@ -47,7 +48,14 @@ class Memory(Base): ...@@ -47,7 +48,14 @@ class Memory(Base):
self.data = self.data.astype(input_dtype) self.data = self.data.astype(input_dtype)
def get_batch(self): def get_batch(self):
"""
Shuffle the Memory dataset and get a random batch.
** Returns **
data: Selected samples
labels: Correspondent labels
"""
# Shuffling samples # Shuffling samples
indexes = numpy.array(range(self.data.shape[0])) indexes = numpy.array(range(self.data.shape[0]))
numpy.random.shuffle(indexes) numpy.random.shuffle(indexes)
......
...@@ -15,6 +15,7 @@ class Siamese(Base): ...@@ -15,6 +15,7 @@ class Siamese(Base):
The first two are the batch data, and the last is the label. Either `0` for samples from the same class or `1` The first two are the batch data, and the last is the label. Either `0` for samples from the same class or `1`
for samples from different class. for samples from different class.
""" """
def __init__(self, **kwargs): def __init__(self, **kwargs):
......
...@@ -14,25 +14,27 @@ from bob.learn.tensorflow.datashuffler.Normalizer import Linear ...@@ -14,25 +14,27 @@ from bob.learn.tensorflow.datashuffler.Normalizer import Linear
class SiameseDisk(Siamese, Disk): class SiameseDisk(Siamese, Disk):
"""
This :py:class:`bob.learn.tensorflow.datashuffler.Siamese` datashuffler deal with databases that are stored in the disk.
The data is loaded on the fly,.
**Parameters**
data: Input data to be trainer
labels: Labels. These labels should be set from 0..1
input_shape: The shape of the inputs
input_dtype: The type of the data,
batch_size: Batch size
seed: The seed of the random number generator
data_augmentation: The algorithm used for data augmentation. Look :py:class:`bob.learn.tensorflow.datashuffler.DataAugmentation`
normalizer: The algorithm used for feature scaling. Look :py:class:`bob.learn.tensorflow.datashuffler.ScaleFactor`, :py:class:`bob.learn.tensorflow.datashuffler.Linear` and :py:class:`bob.learn.tensorflow.datashuffler.MeanOffset`
"""
def __init__(self, data, labels, def __init__(self, data, labels,
input_shape, input_shape,
input_dtype="float64", input_dtype="float64",
scale=True,
batch_size=1, batch_size=1,
seed=10, seed=10,
data_augmentation=None, data_augmentation=None,
normalizer=Linear()): normalizer=Linear()):
"""
Shuffler that deal with file list
**Parameters**
data:
labels:
input_shape: Shape of the input. `input_shape != data.shape`, the data will be reshaped
input_dtype="float64":
scale=True:
batch_size=1:
"""
if isinstance(data, list): if isinstance(data, list):
data = numpy.array(data) data = numpy.array(data)
...@@ -45,7 +47,6 @@ class SiameseDisk(Siamese, Disk): ...@@ -45,7 +47,6 @@ class SiameseDisk(Siamese, Disk):
labels=labels, labels=labels,
input_shape=input_shape, input_shape=input_shape,
input_dtype=input_dtype, input_dtype=input_dtype,
scale=scale,
batch_size=batch_size, batch_size=batch_size,
seed=seed, seed=seed,
data_augmentation=data_augmentation, data_augmentation=data_augmentation,
......
...@@ -10,34 +10,36 @@ from .Siamese import Siamese ...@@ -10,34 +10,36 @@ from .Siamese import Siamese
import tensorflow as tf import tensorflow as tf
from bob.learn.tensorflow.datashuffler.Normalizer import Linear from bob.learn.tensorflow.datashuffler.Normalizer import Linear
class SiameseMemory(Siamese, Memory): class SiameseMemory(Siamese, Memory):
"""
This :py:class:`bob.learn.tensorflow.datashuffler.Siamese` datashuffler deal with databases that are in Memory
The data is loaded on the fly.
**Parameters**
data: Input data to be trainer
labels: Labels. These labels should be set from 0..1
input_shape: The shape of the inputs
input_dtype: The type of the data,
batch_size: Batch size
seed: The seed of the random number generator
data_augmentation: The algorithm used for data augmentation. Look :py:class:`bob.learn.tensorflow.datashuffler.DataAugmentation`
normalizer: The algorithm used for feature scaling. Look :py:class:`bob.learn.tensorflow.datashuffler.ScaleFactor`, :py:class:`bob.learn.tensorflow.datashuffler.Linear` and :py:class:`bob.learn.tensorflow.datashuffler.MeanOffset`
"""
def __init__(self, data, labels, def __init__(self, data, labels,
input_shape, input_shape,
input_dtype="float", input_dtype="float",
scale=True,
batch_size=1, batch_size=1,
seed=10, seed=10,
data_augmentation=None, data_augmentation=None,
normalizer=Linear()): normalizer=Linear()):
"""
Shuffler that deal with memory datasets
**Parameters**
data:
labels:
perc_train:
scale:
train_batch_size:
validation_batch_size:
"""
super(SiameseMemory, self).__init__( super(SiameseMemory, self).__init__(
data=data, data=data,
labels=labels, labels=labels,
input_shape=input_shape, input_shape=input_shape,
input_dtype=input_dtype, input_dtype=input_dtype,
scale=scale,
batch_size=batch_size, batch_size=batch_size,
seed=seed, seed=seed,
data_augmentation=data_augmentation, data_augmentation=data_augmentation,
......
...@@ -15,6 +15,7 @@ class Triplet(Base): ...@@ -15,6 +15,7 @@ class Triplet(Base):
The first element is the batch for the anchor, the second one is the batch for the positive class, w.r.t the The first element is the batch for the anchor, the second one is the batch for the positive class, w.r.t the
anchor, and the last one is the batch for the negative class , w.r.t the anchor. anchor, and the last one is the batch for the negative class , w.r.t the anchor.
""" """
def __init__(self, **kwargs): def __init__(self, **kwargs):
......
...@@ -16,26 +16,30 @@ from .Disk import Disk ...@@ -16,26 +16,30 @@ from .Disk import Disk
from .Triplet import Triplet from .Triplet import Triplet
from bob.learn.tensorflow.datashuffler.Normalizer import Linear from bob.learn.tensorflow.datashuffler.Normalizer import Linear
class TripletDisk(Triplet, Disk): class TripletDisk(Triplet, Disk):
"""
This :py:class:`bob.learn.tensorflow.datashuffler.Triplet` datashuffler deal with databases that are stored in the disk.
The data is loaded on the fly.
**Parameters**
data: Input data to be trainer
labels: Labels. These labels should be set from 0..1
input_shape: The shape of the inputs
input_dtype: The type of the data,
batch_size: Batch size
seed: The seed of the random number generator
data_augmentation: The algorithm used for data augmentation. Look :py:class:`bob.learn.tensorflow.datashuffler.DataAugmentation`
normalizer: The algorithm used for feature scaling. Look :py:class:`bob.learn.tensorflow.datashuffler.ScaleFactor`, :py:class:`bob.learn.tensorflow.datashuffler.Linear` and :py:class:`bob.learn.tensorflow.datashuffler.MeanOffset`
"""
def __init__(self, data, labels, def __init__(self, data, labels,
input_shape, input_shape,
input_dtype="float64", input_dtype="float64",
scale=True,
batch_size=1, batch_size=1,
seed=10, seed=10,
data_augmentation=None, data_augmentation=None,
normalizer=Linear()): normalizer=Linear()):
"""
Shuffler that deal with file list
**Parameters**
data:
labels:
input_shape: Shape of the input. `input_shape != data.shape`, the data will be reshaped
input_dtype="float64":
scale=True:
batch_size=1:
"""
if isinstance(data, list): if isinstance(data, list):
data = numpy.array(data) data = numpy.array(data)
...@@ -48,7 +52,6 @@ class TripletDisk(Triplet, Disk): ...@@ -48,7 +52,6 @@ class TripletDisk(Triplet, Disk):
labels=labels, labels=labels,
input_shape=input_shape, input_shape=input_shape,
input_dtype=input_dtype, input_dtype=input_dtype,
scale=scale,
batch_size=batch_size, batch_size=batch_size,
data_augmentation=data_augmentation, data_augmentation=data_augmentation,
normalizer=normalizer normalizer=normalizer
......
...@@ -10,34 +10,36 @@ from .Memory import Memory ...@@ -10,34 +10,36 @@ from .Memory import Memory
from Triplet import Triplet from Triplet import Triplet
from bob.learn.tensorflow.datashuffler.Normalizer import Linear from bob.learn.tensorflow.datashuffler.Normalizer import Linear
class TripletMemory(Triplet, Memory): class TripletMemory(Triplet, Memory):
"""
This :py:class:`bob.learn.tensorflow.datashuffler.Triplet` datashuffler deal with databases that are stored in memory
The data is loaded on the fly.
**Parameters**
data: Input data to be trainer
labels: Labels. These labels should be set from 0..1
input_shape: The shape of the inputs
input_dtype: The type of the data,
batch_size: Batch size
seed: The seed of the random number generator
data_augmentation: The algorithm used for data augmentation. Look :py:class:`bob.learn.tensorflow.datashuffler.DataAugmentation`
normalizer: The algorithm used for feature scaling. Look :py:class:`bob.learn.tensorflow.datashuffler.ScaleFactor`, :py:class:`bob.learn.tensorflow.datashuffler.Linear` and :py:class:`bob.learn.tensorflow.datashuffler.MeanOffset`
"""
def __init__(self, data, labels, def __init__(self, data, labels,
input_shape, input_shape,
input_dtype="float64", input_dtype="float64",
scale=True,
batch_size=1, batch_size=1,
seed=10, seed=10,
data_augmentation=None, data_augmentation=None,
normalizer=Linear()): normalizer=Linear()):
"""
Shuffler that deal with memory datasets
**Parameters**
data:
labels:
perc_train:
scale:
train_batch_size:
validation_batch_size:
"""
super(TripletMemory, self).__init__( super(TripletMemory, self).__init__(
data=data, data=data,
labels=labels, labels=labels,
input_shape=input_shape, input_shape=input_shape,
input_dtype=input_dtype, input_dtype=input_dtype,
scale=scale,
batch_size=batch_size, batch_size=batch_size,
seed=seed, seed=seed,
data_augmentation=data_augmentation, data_augmentation=data_augmentation,
......
...@@ -6,18 +6,20 @@ ...@@ -6,18 +6,20 @@
import numpy import numpy
import tensorflow as tf import tensorflow as tf
from .Disk import Disk from Disk import Disk
from .Triplet import Triplet from Triplet import Triplet
from .OnlineSampling import OnLineSampling from OnlineSampling import OnLineSampling
from scipy.spatial.distance import euclidean, cdist from scipy.spatial.distance import euclidean, cdist
import logging import logging
logger = logging.getLogger("bob.learn.tensorflow") logger = logging.getLogger("bob.learn.tensorflow")
from bob.learn.tensorflow.datashuffler.Normalizer import Linear from bob.learn.tensorflow.datashuffler.Normalizer import Linear
class TripletWithFastSelectionDisk(Triplet, Disk, OnLineSampling): class TripletWithFastSelectionDisk(Triplet, Disk, OnLineSampling):
""" """
This data shuffler generates triplets from :py:class:`bob.learn.tensorflow.datashuffler.Memory` shufflers. This data shuffler generates triplets from :py:class:`bob.learn.tensorflow.datashuffler.Triplet` and
:py:class:`bob.learn.tensorflow.datashuffler.Disk` shufflers.
The selection of the triplets is inspired in the paper: The selection of the triplets is inspired in the paper:
...@@ -33,20 +35,19 @@ class TripletWithFastSelectionDisk(Triplet, Disk, OnLineSampling): ...@@ -33,20 +35,19 @@ class TripletWithFastSelectionDisk(Triplet, Disk, OnLineSampling):
argmin(||f(x_a) - f(x_p)||^2 < ||f(x_a) - f(x_n)||^2 argmin(||f(x_a) - f(x_p)||^2 < ||f(x_a) - f(x_n)||^2
**Parameters** **Parameters**
data: data: Input data to be trainer
labels: labels: Labels. These labels should be set from 0..1
perc_train: input_shape: The shape of the inputs
scale: input_dtype: The type of the data,
train_batch_size: batch_size: Batch size
validation_batch_size: seed: The seed of the random number generator
data_augmentation: data_augmentation: The algorithm used for data augmentation. Look :py:class:`bob.learn.tensorflow.datashuffler.DataAugmentation`
total_identities: Number of identities inside of the batch normalizer: The algorithm used for feature scaling. Look :py:class:`bob.learn.tensorflow.datashuffler.ScaleFactor`, :py:class:`bob.learn.tensorflow.datashuffler.Linear` and :py:class:`bob.learn.tensorflow.datashuffler.MeanOffset`
""" """
def __init__(self, data, labels, def __init__(self, data, labels,
input_shape, input_shape,
input_dtype="float64", input_dtype="float64",
scale=True,
batch_size=1, batch_size=1,
seed=10, seed=10,
data_augmentation=None, data_augmentation=None,
...@@ -58,7 +59,6 @@ class TripletWithFastSelectionDisk(Triplet, Disk, OnLineSampling): ...@@ -58,7 +59,6 @@ class TripletWithFastSelectionDisk(Triplet, Disk, OnLineSampling):
labels=labels, labels=labels,
input_shape=input_shape, input_shape=input_shape,
input_dtype=input_dtype, input_dtype=input_dtype,
scale=scale,
batch_size=batch_size, batch_size=batch_size,
seed=seed, seed=seed,
data_augmentation=data_augmentation, data_augmentation=data_augmentation,
......
...@@ -16,33 +16,21 @@ import logging ...@@ -16,33 +16,21 @@ import logging
logger = logging.getLogger("bob.learn.tensorflow") logger = logging.getLogger("bob.learn.tensorflow")
from bob.learn.tensorflow.datashuffler.Normalizer import Linear from bob.learn.tensorflow.datashuffler.Normalizer import Linear
class TripletWithSelectionDisk(Triplet, Disk, OnLineSampling): class TripletWithSelectionDisk(Triplet, Disk, OnLineSampling):
""" """
This data shuffler generates triplets from :py:class:`bob.learn.tensorflow.datashuffler.Memory` shufflers. This data shuffler generates triplets from :py:class:`bob.learn.tensorflow.datashuffler.Triplet` shufflers.
The selection of the triplets are random.
The selection of the triplets is inspired in the paper:
Schroff, Florian, Dmitry Kalenichenko, and James Philbin.
"Facenet: A unified embedding for face recognition and clustering." Proceedings of the IEEE Conference on
Computer Vision and Pattern Recognition. 2015.
In this shuffler, the triplets are selected as the following:
1. Select M identities