Documenting

parent c02d5c12
# see https://docs.python.org/3/library/pkgutil.html
from pkgutil import extend_path
__path__ = extend_path(__path__, __name__)
from bob.learn.tensorflow import analyzers
from bob.learn.tensorflow import datashuffler
from bob.learn.tensorflow import initialization
from bob.learn.tensorflow import layers
from bob.learn.tensorflow import loss
from bob.learn.tensorflow import network
from bob.learn.tensorflow import trainers
# see https://docs.python.org/3/library/pkgutil.html
from pkgutil import extend_path
__path__ = extend_path(__path__, __name__)
from .ExperimentAnalizer import ExperimentAnalizer
from .SoftmaxAnalizer import SoftmaxAnalizer
# gets sphinx autodoc done right - don't remove it
__all__ = [_ for _ in dir() if not _.startswith('_')]
def __appropriate__(*args):
"""Says object was actually declared here, an not on the import module.
Parameters:
*args: An iterable of objects to modify
Resolves `Sphinx referencing issues
<https://github.com/sphinx-doc/sphinx/issues/3048>`
"""
for obj in args: obj.__module__ = __name__
__appropriate__(
ExperimentAnalizer,
SoftmaxAnalizer,
)
__all__ = [_ for _ in dir() if not _.startswith('_')]
......@@ -11,30 +11,30 @@ from bob.learn.tensorflow.datashuffler.Normalizer import Linear
class Base(object):
"""
The class provide base functionalities to shuffle the data to train a neural network
**Parameters**
data: Input data to be trainer
labels: Labels. These labels should be set from 0..1
input_shape: The shape of the inputs
input_dtype: The type of the data,
batch_size: Batch size
seed: The seed of the random number generator
data_augmentation: The algorithm used for data augmentation. Look :py:class:`bob.learn.tensorflow.datashuffler.DataAugmentation`
normalizer: The algorithm used for feature scaling. Look :py:class:`bob.learn.tensorflow.datashuffler.ScaleFactor`, :py:class:`bob.learn.tensorflow.datashuffler.Linear` and :py:class:`bob.learn.tensorflow.datashuffler.MeanOffset`
"""
def __init__(self, data, labels,
input_shape,
input_dtype="float64",
scale=True,
batch_size=1,
seed=10,
data_augmentation=None,
normalizer=Linear()):
"""
The class provide base functionalities to shuffle the data before to train a neural network
**Parameters**
data:
labels:
perc_train:
scale:
train_batch_size:
validation_batch_size:
seed: Seed for the random number generator
"""
self.seed = seed
numpy.random.seed(seed)
self.scale = scale
self.normalizer = normalizer
self.input_dtype = input_dtype
......@@ -65,6 +65,12 @@ class Base(object):
self.data_placeholder = data
self.label_placeholder = label
def get_batch(self):
"""
Shuffle dataset and get a random batch.
"""
raise NotImplementedError("Method not implemented in this level. You should use one of the derived classes.")
def get_placeholders(self, name=""):
"""
Returns a place holder with the size of your batch
......
......@@ -15,26 +15,29 @@ from bob.learn.tensorflow.datashuffler.Normalizer import Linear
class Disk(Base):
"""
This datashuffler deal with databases that are stored in the disk.
The data is loaded on the fly,.
**Parameters**
data: Input data to be trainer
labels: Labels. These labels should be set from 0..1
input_shape: The shape of the inputs
input_dtype: The type of the data,
batch_size: Batch size
seed: The seed of the random number generator
data_augmentation: The algorithm used for data augmentation. Look :py:class:`bob.learn.tensorflow.datashuffler.DataAugmentation`
normalizer: The algorithm used for feature scaling. Look :py:class:`bob.learn.tensorflow.datashuffler.ScaleFactor`, :py:class:`bob.learn.tensorflow.datashuffler.Linear` and :py:class:`bob.learn.tensorflow.datashuffler.MeanOffset`
"""
def __init__(self, data, labels,
input_shape,
input_dtype="float64",
scale=True,
batch_size=1,
seed=10,
data_augmentation=None,
normalizer=Linear()):
"""
This datashuffler deal with databases that are stored in the disk.
The data is loaded on the fly,.
**Parameters**
data:
labels:
input_shape: Shape of the input. `input_shape != data.shape`, the data will be reshaped
input_dtype="float64":
scale=True:
batch_size=1:
"""
if isinstance(data, list):
data = numpy.array(data)
......@@ -47,7 +50,6 @@ class Disk(Base):
labels=labels,
input_shape=input_shape,
input_dtype=input_dtype,
scale=scale,
batch_size=batch_size,
seed=seed,
data_augmentation=data_augmentation,
......@@ -80,7 +82,15 @@ class Disk(Base):
return data
def get_batch(self, noise=False):
def get_batch(self):
"""
Shuffle the Disk dataset, get a random batch and load it on the fly.
** Returns **
data: Selected samples
labels: Correspondent labels
"""
# Shuffling samples
indexes = numpy.array(range(self.data.shape[0]))
......
......@@ -11,32 +11,33 @@ import tensorflow as tf
class Memory(Base):
"""
This datashuffler deal with memory databases that are stored in a :py:class`numpy.array`
**Parameters**
data: Input data to be trainer
labels: Labels. These labels should be set from 0..1
input_shape: The shape of the inputs
input_dtype: The type of the data,
batch_size: Batch size
seed: The seed of the random number generator
data_augmentation: The algorithm used for data augmentation. Look :py:class:`bob.learn.tensorflow.datashuffler.DataAugmentation`
normalizer: The algorithm used for feature scaling. Look :py:class:`bob.learn.tensorflow.datashuffler.ScaleFactor`, :py:class:`bob.learn.tensorflow.datashuffler.Linear` and :py:class:`bob.learn.tensorflow.datashuffler.MeanOffset`
"""
def __init__(self, data, labels,
input_shape,
input_dtype="float64",
scale=True,
batch_size=1,
seed=10,
data_augmentation=None,
normalizer=Linear()):
"""
This datashuffler deal with databases that are stored in a :py:class`numpy.array`
**Parameters**
data:
labels:
perc_train:
scale:
train_batch_size:
validation_batch_size:
"""
super(Memory, self).__init__(
data=data,
labels=labels,
input_shape=input_shape,
input_dtype=input_dtype,
scale=scale,
batch_size=batch_size,
seed=seed,
data_augmentation=data_augmentation,
......@@ -47,7 +48,14 @@ class Memory(Base):
self.data = self.data.astype(input_dtype)
def get_batch(self):
"""
Shuffle the Memory dataset and get a random batch.
** Returns **
data: Selected samples
labels: Correspondent labels
"""
# Shuffling samples
indexes = numpy.array(range(self.data.shape[0]))
numpy.random.shuffle(indexes)
......
......@@ -15,6 +15,7 @@ class Siamese(Base):
The first two are the batch data, and the last is the label. Either `0` for samples from the same class or `1`
for samples from different class.
"""
def __init__(self, **kwargs):
......
......@@ -14,25 +14,27 @@ from bob.learn.tensorflow.datashuffler.Normalizer import Linear
class SiameseDisk(Siamese, Disk):
"""
This :py:class:`bob.learn.tensorflow.datashuffler.Siamese` datashuffler deal with databases that are stored in the disk.
The data is loaded on the fly,.
**Parameters**
data: Input data to be trainer
labels: Labels. These labels should be set from 0..1
input_shape: The shape of the inputs
input_dtype: The type of the data,
batch_size: Batch size
seed: The seed of the random number generator
data_augmentation: The algorithm used for data augmentation. Look :py:class:`bob.learn.tensorflow.datashuffler.DataAugmentation`
normalizer: The algorithm used for feature scaling. Look :py:class:`bob.learn.tensorflow.datashuffler.ScaleFactor`, :py:class:`bob.learn.tensorflow.datashuffler.Linear` and :py:class:`bob.learn.tensorflow.datashuffler.MeanOffset`
"""
def __init__(self, data, labels,
input_shape,
input_dtype="float64",
scale=True,
batch_size=1,
seed=10,
data_augmentation=None,
normalizer=Linear()):
"""
Shuffler that deal with file list
**Parameters**
data:
labels:
input_shape: Shape of the input. `input_shape != data.shape`, the data will be reshaped
input_dtype="float64":
scale=True:
batch_size=1:
"""
if isinstance(data, list):
data = numpy.array(data)
......@@ -45,7 +47,6 @@ class SiameseDisk(Siamese, Disk):
labels=labels,
input_shape=input_shape,
input_dtype=input_dtype,
scale=scale,
batch_size=batch_size,
seed=seed,
data_augmentation=data_augmentation,
......
......@@ -10,34 +10,36 @@ from .Siamese import Siamese
import tensorflow as tf
from bob.learn.tensorflow.datashuffler.Normalizer import Linear
class SiameseMemory(Siamese, Memory):
"""
This :py:class:`bob.learn.tensorflow.datashuffler.Siamese` datashuffler deal with databases that are in Memory
The data is loaded on the fly.
**Parameters**
data: Input data to be trainer
labels: Labels. These labels should be set from 0..1
input_shape: The shape of the inputs
input_dtype: The type of the data,
batch_size: Batch size
seed: The seed of the random number generator
data_augmentation: The algorithm used for data augmentation. Look :py:class:`bob.learn.tensorflow.datashuffler.DataAugmentation`
normalizer: The algorithm used for feature scaling. Look :py:class:`bob.learn.tensorflow.datashuffler.ScaleFactor`, :py:class:`bob.learn.tensorflow.datashuffler.Linear` and :py:class:`bob.learn.tensorflow.datashuffler.MeanOffset`
"""
def __init__(self, data, labels,
input_shape,
input_dtype="float",
scale=True,
batch_size=1,
seed=10,
data_augmentation=None,
normalizer=Linear()):
"""
Shuffler that deal with memory datasets
**Parameters**
data:
labels:
perc_train:
scale:
train_batch_size:
validation_batch_size:
"""
super(SiameseMemory, self).__init__(
data=data,
labels=labels,
input_shape=input_shape,
input_dtype=input_dtype,
scale=scale,
batch_size=batch_size,
seed=seed,
data_augmentation=data_augmentation,
......
......@@ -15,6 +15,7 @@ class Triplet(Base):
The first element is the batch for the anchor, the second one is the batch for the positive class, w.r.t the
anchor, and the last one is the batch for the negative class , w.r.t the anchor.
"""
def __init__(self, **kwargs):
......
......@@ -16,26 +16,30 @@ from .Disk import Disk
from .Triplet import Triplet
from bob.learn.tensorflow.datashuffler.Normalizer import Linear
class TripletDisk(Triplet, Disk):
"""
This :py:class:`bob.learn.tensorflow.datashuffler.Triplet` datashuffler deal with databases that are stored in the disk.
The data is loaded on the fly.
**Parameters**
data: Input data to be trainer
labels: Labels. These labels should be set from 0..1
input_shape: The shape of the inputs
input_dtype: The type of the data,
batch_size: Batch size
seed: The seed of the random number generator
data_augmentation: The algorithm used for data augmentation. Look :py:class:`bob.learn.tensorflow.datashuffler.DataAugmentation`
normalizer: The algorithm used for feature scaling. Look :py:class:`bob.learn.tensorflow.datashuffler.ScaleFactor`, :py:class:`bob.learn.tensorflow.datashuffler.Linear` and :py:class:`bob.learn.tensorflow.datashuffler.MeanOffset`
"""
def __init__(self, data, labels,
input_shape,
input_dtype="float64",
scale=True,
batch_size=1,
seed=10,
data_augmentation=None,
normalizer=Linear()):
"""
Shuffler that deal with file list
**Parameters**
data:
labels:
input_shape: Shape of the input. `input_shape != data.shape`, the data will be reshaped
input_dtype="float64":
scale=True:
batch_size=1:
"""
if isinstance(data, list):
data = numpy.array(data)
......@@ -48,7 +52,6 @@ class TripletDisk(Triplet, Disk):
labels=labels,
input_shape=input_shape,
input_dtype=input_dtype,
scale=scale,
batch_size=batch_size,
data_augmentation=data_augmentation,
normalizer=normalizer
......
......@@ -10,34 +10,36 @@ from .Memory import Memory
from Triplet import Triplet
from bob.learn.tensorflow.datashuffler.Normalizer import Linear
class TripletMemory(Triplet, Memory):
"""
This :py:class:`bob.learn.tensorflow.datashuffler.Triplet` datashuffler deal with databases that are stored in memory
The data is loaded on the fly.
**Parameters**
data: Input data to be trainer
labels: Labels. These labels should be set from 0..1
input_shape: The shape of the inputs
input_dtype: The type of the data,
batch_size: Batch size
seed: The seed of the random number generator
data_augmentation: The algorithm used for data augmentation. Look :py:class:`bob.learn.tensorflow.datashuffler.DataAugmentation`
normalizer: The algorithm used for feature scaling. Look :py:class:`bob.learn.tensorflow.datashuffler.ScaleFactor`, :py:class:`bob.learn.tensorflow.datashuffler.Linear` and :py:class:`bob.learn.tensorflow.datashuffler.MeanOffset`
"""
def __init__(self, data, labels,
input_shape,
input_dtype="float64",
scale=True,
batch_size=1,
seed=10,
data_augmentation=None,
normalizer=Linear()):
"""
Shuffler that deal with memory datasets
**Parameters**
data:
labels:
perc_train:
scale:
train_batch_size:
validation_batch_size:
"""
super(TripletMemory, self).__init__(
data=data,
labels=labels,
input_shape=input_shape,
input_dtype=input_dtype,
scale=scale,
batch_size=batch_size,
seed=seed,
data_augmentation=data_augmentation,
......
......@@ -6,18 +6,20 @@
import numpy
import tensorflow as tf
from .Disk import Disk
from .Triplet import Triplet
from .OnlineSampling import OnLineSampling
from Disk import Disk
from Triplet import Triplet
from OnlineSampling import OnLineSampling
from scipy.spatial.distance import euclidean, cdist
import logging
logger = logging.getLogger("bob.learn.tensorflow")
from bob.learn.tensorflow.datashuffler.Normalizer import Linear
class TripletWithFastSelectionDisk(Triplet, Disk, OnLineSampling):
"""
This data shuffler generates triplets from :py:class:`bob.learn.tensorflow.datashuffler.Memory` shufflers.
This data shuffler generates triplets from :py:class:`bob.learn.tensorflow.datashuffler.Triplet` and
:py:class:`bob.learn.tensorflow.datashuffler.Disk` shufflers.
The selection of the triplets is inspired in the paper:
......@@ -33,20 +35,19 @@ class TripletWithFastSelectionDisk(Triplet, Disk, OnLineSampling):
argmin(||f(x_a) - f(x_p)||^2 < ||f(x_a) - f(x_n)||^2
**Parameters**
data:
labels:
perc_train:
scale:
train_batch_size:
validation_batch_size:
data_augmentation:
total_identities: Number of identities inside of the batch
data: Input data to be trainer
labels: Labels. These labels should be set from 0..1
input_shape: The shape of the inputs
input_dtype: The type of the data,
batch_size: Batch size
seed: The seed of the random number generator
data_augmentation: The algorithm used for data augmentation. Look :py:class:`bob.learn.tensorflow.datashuffler.DataAugmentation`
normalizer: The algorithm used for feature scaling. Look :py:class:`bob.learn.tensorflow.datashuffler.ScaleFactor`, :py:class:`bob.learn.tensorflow.datashuffler.Linear` and :py:class:`bob.learn.tensorflow.datashuffler.MeanOffset`
"""
def __init__(self, data, labels,
input_shape,
input_dtype="float64",
scale=True,
batch_size=1,
seed=10,
data_augmentation=None,
......@@ -58,7 +59,6 @@ class TripletWithFastSelectionDisk(Triplet, Disk, OnLineSampling):
labels=labels,
input_shape=input_shape,
input_dtype=input_dtype,
scale=scale,
batch_size=batch_size,
seed=seed,
data_augmentation=data_augmentation,
......
......@@ -16,33 +16,21 @@ import logging
logger = logging.getLogger("bob.learn.tensorflow")
from bob.learn.tensorflow.datashuffler.Normalizer import Linear
class TripletWithSelectionDisk(Triplet, Disk, OnLineSampling):
"""
This data shuffler generates triplets from :py:class:`bob.learn.tensorflow.datashuffler.Memory` shufflers.
The selection of the triplets is inspired in the paper:
Schroff, Florian, Dmitry Kalenichenko, and James Philbin.
"Facenet: A unified embedding for face recognition and clustering." Proceedings of the IEEE Conference on
Computer Vision and Pattern Recognition. 2015.
In this shuffler, the triplets are selected as the following:
1. Select M identities
2. Get N pairs anchor-positive (for each M identities) such that the argmax(anchor, positive)
3. For each pair anchor-positive, find the "semi-hard" negative samples such that
argmin(||f(x_a) - f(x_p)||^2 < ||f(x_a) - f(x_n)||^2
This data shuffler generates triplets from :py:class:`bob.learn.tensorflow.datashuffler.Triplet` shufflers.
The selection of the triplets are random.
**Parameters**
data:
labels:
perc_train:
scale:
train_batch_size:
validation_batch_size:
data_augmentation:
total_identities: Number of identities inside of the batch
data: Input data to be trainer
labels: Labels. These labels should be set from 0..1
input_shape: The shape of the inputs
input_dtype: The type of the data,
batch_size: Batch size
seed: The seed of the random number generator
data_augmentation: The algorithm used for data augmentation. Look :py:class:`bob.learn.tensorflow.datashuffler.DataAugmentation`
normalizer: The algorithm used for feature scaling. Look :py:class:`bob.learn.tensorflow.datashuffler.ScaleFactor`, :py:class:`bob.learn.tensorflow.datashuffler.Linear` and :py:class:`bob.learn.tensorflow.datashuffler.MeanOffset`
"""
def __init__(self, data, labels,
......
......@@ -6,16 +6,17 @@
import numpy
import tensorflow as tf
from .Memory import Memory
from .Triplet import Triplet
from .OnlineSampling import OnLineSampling
from OnlineSampling import OnLineSampling
from Memory import Memory
from Triplet import Triplet
from scipy.spatial.distance import euclidean
from bob.learn.tensorflow.datashuffler.Normalizer import Linear
class TripletWithSelectionMemory(Triplet, Memory, OnLineSampling):
"""
This data shuffler generates triplets from :py:class:`bob.learn.tensorflow.datashuffler.Memory` shufflers.
This data shuffler generates triplets from :py:class:`bob.learn.tensorflow.datashuffler.Triplet` and
:py:class:`bob.learn.tensorflow.datashuffler.Memory` shufflers.
The selection of the triplets is inspired in the paper:
......@@ -30,16 +31,15 @@ class TripletWithSelectionMemory(Triplet, Memory, OnLineSampling):
3. For each pair anchor-positive, find the "semi-hard" negative samples such that
argmin(||f(x_a) - f(x_p)||^2 < ||f(x_a) - f(x_n)||^2
**Parameters**
data:
labels:
perc_train:
scale:
train_batch_size:
validation_batch_size:
data_augmentation:
total_identities: Number of identities inside of the batch
data: Input data to be trainer
labels: Labels. These labels should be set from 0..1
input_shape: The shape of the inputs
input_dtype: The type of the data,
batch_size: Batch size
seed: The seed of the random number generator
data_augmentation: The algorithm used for data augmentation. Look :py:class:`bob.learn.tensorflow.datashuffler.DataAugmentation`
normalizer: The algorithm used for feature scaling. Look :py:class:`bob.learn.tensorflow.datashuffler.ScaleFactor`, :py:class:`bob.learn.tensorflow.datashuffler.Linear` and :py:class:`bob.learn.tensorflow.datashuffler.MeanOffset`
"""
def __init__(self, data, labels,
......
# see https://docs.python.org/3/library/pkgutil.html
from pkgutil import extend_path
__path__ = extend_path(__path__, __name__)
from .Base import Base
from .Siamese import Siamese
from .Triplet import Triplet
from .Memory import Memory
from .Disk import Disk
from .OnlineSampling import OnLineSampling
from .SiameseMemory import SiameseMemory
from .TripletMemory import TripletMemory
......@@ -21,5 +20,38 @@ from .ImageAugmentation import ImageAugmentation
from .Normalizer import ScaleFactor, MeanOffset, Linear
# gets sphinx autodoc done right - don't remove it
__all__ = [_ for _ in dir() if not _.startswith('_')]
def __appropriate__(*args):
"""Says object was actually declared here, an not on the import module.
Parameters:
*args: An iterable of objects to modify
Resolves `Sphinx referencing issues
<https://github.com/sphinx-doc/sphinx/issues/3048>`
"""
for obj in args: obj.__module__ = __name__
__appropriate__(
Base,
Siamese,
Triplet,
Memory,
Disk,
OnlineSampling,
SiameseMemory,
TripletMemory,
TripletWithSelectionMemory,
TripletWithFastSelectionDisk,
SiameseDisk,
TripletDisk,
TripletWithSelectionDisk,
DataAugmentation,
ImageAugmentation,
ScaleFactor, MeanOffset, Linear
)
__all__ = [_ for _ in dir() if not _.startswith('_')]
\ No newline at end of file
......@@ -12,7 +12,14 @@ import tensorflow as tf
class Gaussian(Initialization):
"""
Implements Gaussian normalization
Implements Gaussian Initialization
** Parameters **
mean: Mean of the gaussian
std: Standard deviation
seed: Seed of the random number generator
use_gpu: Place the variables in the GPU?
"""
def __init__(self, mean=0.,
......@@ -25,6 +32,15 @@ class Gaussian(Initialization):
super(Gaussian, self).__init__(seed, use_gpu=use_gpu)
def __call__(self, shape, name, scope):
"""
Create the gaussian initialized variables
** Parameters **
shape: Shape of the variable
name: Name of the variable
scope: Tensorflow scope name
"""
if len(shape) == 4:
in_out = shape[0] * shape[1] * shape[2] + shape[3]
......
......@@ -27,6 +27,15 @@ class SimplerXavier(Initialization):
super(SimplerXavier, self).__init__(seed, use_gpu=use_gpu)
def __call__(self, shape, name, scope):
"""
Create the gaussian initialized variables
** Parameters **
shape: Shape of the variable