Compare revisions

61a31463 · 61a31463 · 61a31463 · 61a31463 · 68cfd39a · 61a31463
--- a/bob/learn/em/ml_gmm_trainer.cpp
+++ b/bob/learn/em/ml_gmm_trainer.cpp
@@ -17,7 +17,7 @@ static inline bool f(PyObject* o){return o != 0 && PyObject_IsTrue(o) > 0;}  /*

 static auto ML_GMMTrainer_doc = bob::extension::ClassDoc(
  BOB_EXT_MODULE_PREFIX ".ML_GMMTrainer",
-  "This class implements the maximum likelihood M-step of the expectation-maximisation algorithm for a GMM Machine."
+  "This class implements the maximum likelihood M-step (:ref:`MLE <mle>`) of the expectation-maximisation algorithm for a GMM Machine."
 ).add_constructor(
  bob::extension::FunctionDoc(
    "__init__",

--- a/bob/learn/em/test/test_kmeans_trainer.py
+++ b/bob/learn/em/test/test_kmeans_trainer.py
@@ -15,159 +15,179 @@ from bob.io.base.test_utils import datafile

 from bob.learn.em import KMeansMachine, KMeansTrainer

+
 def equals(x, y, epsilon):
-  return (abs(x - y) < epsilon).all()
+    return (abs(x - y) < epsilon).all()
+

 def kmeans_plus_plus(machine, data, seed):
-  """Python implementation of K-Means++ (initialization)"""
-  n_data = data.shape[0]
-  rng = bob.core.random.mt19937(seed)
-  u = bob.core.random.uniform('int32', 0, n_data-1)
-  index = u(rng)
-  machine.set_mean(0, data[index,:])
-  weights = numpy.zeros(shape=(n_data,), dtype=numpy.float64)
-
-  for m in range(1,machine.dim_c):
-    for s in range(n_data):
-      s_cur = data[s,:]
-      w_cur = machine.get_distance_from_mean(s_cur, 0)
-      for i in range(m):
-        w_cur = min(machine.get_distance_from_mean(s_cur, i), w_cur)
-      weights[s] = w_cur
-    weights *= weights
-    weights /= numpy.sum(weights)
-    d = bob.core.random.discrete('int32', weights)
-    index = d(rng)
-    machine.set_mean(m, data[index,:])
+    """Python implementation of K-Means++ (initialization)"""
+    n_data = data.shape[0]
+    rng = bob.core.random.mt19937(seed)
+    u = bob.core.random.uniform('int32', 0, n_data - 1)
+    index = u(rng)
+    machine.set_mean(0, data[index, :])
+    weights = numpy.zeros(shape=(n_data,), dtype=numpy.float64)
+
+    for m in range(1, machine.dim_c):
+        for s in range(n_data):
+            s_cur = data[s, :]
+            w_cur = machine.get_distance_from_mean(s_cur, 0)
+            for i in range(m):
+                w_cur = min(machine.get_distance_from_mean(s_cur, i), w_cur)
+            weights[s] = w_cur
+        weights *= weights
+        weights /= numpy.sum(weights)
+        d = bob.core.random.discrete('int32', weights)
+        index = d(rng)
+        machine.set_mean(m, data[index, :])


 def NormalizeStdArray(path):
-  array = bob.io.base.load(path).astype('float64')
-  std = array.std(axis=0)
-  return (array/std, std)
+    array = bob.io.base.load(path).astype('float64')
+    std = array.std(axis=0)
+    return (array / std, std)
+

 def multiplyVectorsByFactors(matrix, vector):
-  for i in range(0, matrix.shape[0]):
-    for j in range(0, matrix.shape[1]):
-      matrix[i, j] *= vector[j]
+    for i in range(0, matrix.shape[0]):
+        for j in range(0, matrix.shape[1]):
+            matrix[i, j] *= vector[j]
+

 def flipRows(array):
-  if len(array.shape) == 2:
-    return numpy.array([numpy.array(array[1, :]), numpy.array(array[0, :])], 'float64')
-  elif len(array.shape) == 1:
-    return numpy.array([array[1], array[0]], 'float64')
-  else:
-    raise Exception('Input type not supportd by flipRows')
+    if len(array.shape) == 2:
+        return numpy.array([numpy.array(array[1, :]), numpy.array(array[0, :])], 'float64')
+    elif len(array.shape) == 1:
+        return numpy.array([array[1], array[0]], 'float64')
+    else:
+        raise Exception('Input type not supportd by flipRows')
+

 if hasattr(KMeansTrainer, 'KMEANS_PLUS_PLUS'):
-  def test_kmeans_plus_plus():
+    def test_kmeans_plus_plus():
+        # Tests the K-Means++ initialization
+        dim_c = 5
+        dim_d = 7
+        n_samples = 150
+        data = numpy.random.randn(n_samples, dim_d)
+        seed = 0
+
+        # C++ implementation
+        machine = KMeansMachine(dim_c, dim_d)
+        trainer = KMeansTrainer()
+        trainer.rng = bob.core.random.mt19937(seed)
+        trainer.initialization_method = 'KMEANS_PLUS_PLUS'
+        trainer.initialize(machine, data)
+
+        # Python implementation
+        py_machine = KMeansMachine(dim_c, dim_d)
+        kmeans_plus_plus(py_machine, data, seed)
+        assert equals(machine.means, py_machine.means, 1e-8)

-    # Tests the K-Means++ initialization
-    dim_c = 5
-    dim_d = 7
-    n_samples = 150
-    data = numpy.random.randn(n_samples,dim_d)
-    seed = 0

-    # C++ implementation
+def test_kmeans_noduplicate():
+    # Data/dimensions
+    dim_c = 2
+    dim_d = 3
+    seed = 0
+    data = numpy.array([[1, 2, 3], [1, 2, 3], [1, 2, 3], [4, 5, 6.]])
+    # Defines machine and trainer
    machine = KMeansMachine(dim_c, dim_d)
    trainer = KMeansTrainer()
-    trainer.rng = bob.core.random.mt19937(seed)
-    trainer.initialization_method = 'KMEANS_PLUS_PLUS'
-    trainer.initialize(machine, data)
-
-    # Python implementation
-    py_machine = KMeansMachine(dim_c, dim_d)
-    kmeans_plus_plus(py_machine, data, seed)
-    assert equals(machine.means, py_machine.means, 1e-8)
-
-def test_kmeans_noduplicate():
-  # Data/dimensions
-  dim_c = 2
-  dim_d = 3
-  seed = 0
-  data = numpy.array([[1,2,3],[1,2,3],[1,2,3],[4,5,6.]])
-  # Defines machine and trainer
-  machine = KMeansMachine(dim_c, dim_d)
-  trainer = KMeansTrainer()
-  rng = bob.core.random.mt19937(seed)
-  trainer.initialization_method = 'RANDOM_NO_DUPLICATE'
-  trainer.initialize(machine, data, rng)
-  # Makes sure that the two initial mean vectors selected are different
-  assert equals(machine.get_mean(0), machine.get_mean(1), 1e-8) == False
+    rng = bob.core.random.mt19937(seed)
+    trainer.initialization_method = 'RANDOM_NO_DUPLICATE'
+    trainer.initialize(machine, data, rng)
+    # Makes sure that the two initial mean vectors selected are different
+    assert equals(machine.get_mean(0), machine.get_mean(1), 1e-8) == False


 def test_kmeans_a():
+    # Trains a KMeansMachine
+    # This files contains draws from two 1D Gaussian distributions:
+    #   * 100 samples from N(-10,1)
+    #   * 100 samples from N(10,1)
+    data = bob.io.base.load(datafile("samplesFrom2G_f64.hdf5", __name__, path="../data/"))

-  # Trains a KMeansMachine
-  # This files contains draws from two 1D Gaussian distributions:
-  #   * 100 samples from N(-10,1)
-  #   * 100 samples from N(10,1)
-  data = bob.io.base.load(datafile("samplesFrom2G_f64.hdf5", __name__, path="../data/"))
-
-  machine = KMeansMachine(2, 1)
-
-  trainer = KMeansTrainer()
-  #trainer.train(machine, data)
-  bob.learn.em.train(trainer,machine,data)
+    machine = KMeansMachine(2, 1)

-  [variances, weights] = machine.get_variances_and_weights_for_each_cluster(data)
-  variances_b = numpy.ndarray(shape=(2,1), dtype=numpy.float64)
-  weights_b = numpy.ndarray(shape=(2,), dtype=numpy.float64)
-  machine.__get_variances_and_weights_for_each_cluster_init__(variances_b, weights_b)
-  machine.__get_variances_and_weights_for_each_cluster_acc__(data, variances_b, weights_b)
-  machine.__get_variances_and_weights_for_each_cluster_fin__(variances_b, weights_b)
-  m1 = machine.get_mean(0)
-  m2 = machine.get_mean(1)
+    trainer = KMeansTrainer()
+    # trainer.train(machine, data)
+    bob.learn.em.train(trainer, machine, data)
+
+    [variances, weights] = machine.get_variances_and_weights_for_each_cluster(data)
+    variances_b = numpy.ndarray(shape=(2, 1), dtype=numpy.float64)
+    weights_b = numpy.ndarray(shape=(2,), dtype=numpy.float64)
+    machine.__get_variances_and_weights_for_each_cluster_init__(variances_b, weights_b)
+    machine.__get_variances_and_weights_for_each_cluster_acc__(data, variances_b, weights_b)
+    machine.__get_variances_and_weights_for_each_cluster_fin__(variances_b, weights_b)
+    m1 = machine.get_mean(0)
+    m2 = machine.get_mean(1)
+
+    ## Check means [-10,10] / variances [1,1] / weights [0.5,0.5]
+    if (m1 < m2):
+        means = numpy.array(([m1[0], m2[0]]), 'float64')
+    else:
+        means = numpy.array(([m2[0], m1[0]]), 'float64')
+    assert equals(means, numpy.array([-10., 10.]), 2e-1)
+    assert equals(variances, numpy.array([1., 1.]), 2e-1)
+    assert equals(weights, numpy.array([0.5, 0.5]), 1e-3)
+
+    assert equals(variances, variances_b, 1e-8)
+    assert equals(weights, weights_b, 1e-8)

-  ## Check means [-10,10] / variances [1,1] / weights [0.5,0.5]
-  if(m1<m2): means=numpy.array(([m1[0],m2[0]]), 'float64')
-  else: means=numpy.array(([m2[0],m1[0]]), 'float64')
-  assert equals(means, numpy.array([-10.,10.]), 2e-1)
-  assert equals(variances, numpy.array([1.,1.]), 2e-1)
-  assert equals(weights, numpy.array([0.5,0.5]), 1e-3)

-  assert equals(variances, variances_b, 1e-8)
-  assert equals(weights, weights_b, 1e-8)
+def test_kmeans_b():
+    # Trains a KMeansMachine
+    (arStd, std) = NormalizeStdArray(datafile("faithful.torch3.hdf5", __name__, path="../data/"))

+    machine = KMeansMachine(2, 2)

+    trainer = KMeansTrainer()
+    # trainer.seed = 1337
+    bob.learn.em.train(trainer, machine, arStd, convergence_threshold=0.001)

-def test_kmeans_b():
+    [variances, weights] = machine.get_variances_and_weights_for_each_cluster(arStd)

-  # Trains a KMeansMachine
-  (arStd,std) = NormalizeStdArray(datafile("faithful.torch3.hdf5", __name__, path="../data/"))
+    means = numpy.array(machine.means)
+    variances = numpy.array(variances)

-  machine = KMeansMachine(2, 2)
+    multiplyVectorsByFactors(means, std)
+    multiplyVectorsByFactors(variances, std ** 2)

-  trainer = KMeansTrainer()
-  #trainer.seed = 1337
-  bob.learn.em.train(trainer,machine, arStd, convergence_threshold=0.001)
+    gmmWeights = bob.io.base.load(datafile('gmm.init_weights.hdf5', __name__, path="../data/"))
+    gmmMeans = bob.io.base.load(datafile('gmm.init_means.hdf5', __name__, path="../data/"))
+    gmmVariances = bob.io.base.load(datafile('gmm.init_variances.hdf5', __name__, path="../data/"))

-  [variances, weights] = machine.get_variances_and_weights_for_each_cluster(arStd)
+    if (means[0, 0] < means[1, 0]):
+        means = flipRows(means)
+        variances = flipRows(variances)
+        weights = flipRows(weights)

-  means = numpy.array(machine.means)
-  variances = numpy.array(variances)
+    assert equals(means, gmmMeans, 1e-3)
+    assert equals(weights, gmmWeights, 1e-3)
+    assert equals(variances, gmmVariances, 1e-3)

-  multiplyVectorsByFactors(means, std)
-  multiplyVectorsByFactors(variances, std ** 2)
+    # Check that there is no duplicate means during initialization
+    machine = KMeansMachine(2, 1)
+    trainer = KMeansTrainer()
+    trainer.initialization_method = 'RANDOM_NO_DUPLICATE'
+    data = numpy.array([[1.], [1.], [1.], [1.], [1.], [1.], [2.], [3.]])
+    bob.learn.em.train(trainer, machine, data)
+    assert (numpy.isnan(machine.means).any()) == False

-  gmmWeights = bob.io.base.load(datafile('gmm.init_weights.hdf5', __name__, path="../data/"))
-  gmmMeans = bob.io.base.load(datafile('gmm.init_means.hdf5', __name__, path="../data/"))
-  gmmVariances = bob.io.base.load(datafile('gmm.init_variances.hdf5', __name__, path="../data/"))

-  if (means[0, 0] < means[1, 0]):
-    means = flipRows(means)
-    variances = flipRows(variances)
-    weights = flipRows(weights)
+def test_trainer_execption():
+    from nose.tools import assert_raises

-  assert equals(means, gmmMeans, 1e-3)
-  assert equals(weights, gmmWeights, 1e-3)
-  assert equals(variances, gmmVariances, 1e-3)
+    # Testing Inf
+    machine = KMeansMachine(2, 2)
+    data = numpy.array([[1.0, 2.0], [2, 3.], [1, 1.], [2, 5.], [numpy.inf, 1.0]])
+    trainer = KMeansTrainer()
+    assert_raises(ValueError, bob.learn.em.train, trainer, machine, data, 10)

-  # Check that there is no duplicate means during initialization
-  machine = KMeansMachine(2, 1)
-  trainer = KMeansTrainer()
-  trainer.initialization_method = 'RANDOM_NO_DUPLICATE'
-  data = numpy.array([[1.], [1.], [1.], [1.], [1.], [1.], [2.], [3.]])
-  bob.learn.em.train(trainer, machine, data)
-  assert (numpy.isnan(machine.means).any()) == False
+    # Testing Nan
+    machine = KMeansMachine(2, 2)
+    data = numpy.array([[1.0, 2.0], [2, 3.], [1, numpy.nan], [2, 5.], [2.0, 1.0]])
+    trainer = KMeansTrainer()
+    assert_raises(ValueError, bob.learn.em.train, trainer, machine, data, 10)
--- a/bob/learn/em/train.py
+++ b/bob/learn/em/train.py
@@ -7,112 +7,125 @@
 import numpy
 import bob.learn.em
 import logging
+
 logger = logging.getLogger('bob.learn.em')

-def train(trainer, machine, data, max_iterations = 50, convergence_threshold=None, initialize=True, rng=None):
-
-  """
-  Trains a machine given a trainer and the proper data
-
-  **Parameters**:
-    trainer : one of :py:class:`KMeansTrainer`, :py:class:`MAP_GMMTrainer`, :py:class:`ML_GMMTrainer`, :py:class:`ISVTrainer`, :py:class:`IVectorTrainer`, :py:class:`PLDATrainer`, :py:class:`EMPCATrainer`
-      A trainer mechanism
-    machine : one of :py:class:`KMeansMachine`, :py:class:`GMMMachine`, :py:class:`ISVBase`, :py:class:`IVectorMachine`, :py:class:`PLDAMachine`, :py:class:`bob.learn.linear.Machine`
-      A container machine
-    data : array_like <float, 2D>
-      The data to be trained
-    max_iterations : int
-      The maximum number of iterations to train a machine
-    convergence_threshold : float
-      The convergence threshold to train a machine. If None, the training procedure will stop with the iterations criteria
-    initialize : bool
-      If True, runs the initialization procedure
-    rng :  :py:class:`bob.core.random.mt19937`
-      The Mersenne Twister mt19937 random generator used for the initialization of subspaces/arrays before the EM loop
-  """
-  #Initialization
-  if initialize:
-    if rng is not None:
-      trainer.initialize(machine, data, rng)
-    else:
-      trainer.initialize(machine, data)
-
-  trainer.e_step(machine, data)
-  average_output          = 0
-  average_output_previous = 0
-
-  if hasattr(trainer,"compute_likelihood"):
-    average_output          = trainer.compute_likelihood(machine)
-
-  for i in range(max_iterations):
-    logger.info("Iteration = %d/%d", i, max_iterations)
-    average_output_previous = average_output
-    trainer.m_step(machine, data)
+
+def train(trainer, machine, data, max_iterations=50, convergence_threshold=None, initialize=True, rng=None,
+          check_inputs=True):
+    """
+    Trains a machine given a trainer and the proper data
+
+    **Parameters**:
+      trainer : one of :py:class:`KMeansTrainer`, :py:class:`MAP_GMMTrainer`, :py:class:`ML_GMMTrainer`, :py:class:`ISVTrainer`, :py:class:`IVectorTrainer`, :py:class:`PLDATrainer`, :py:class:`EMPCATrainer`
+        A trainer mechanism
+      machine : one of :py:class:`KMeansMachine`, :py:class:`GMMMachine`, :py:class:`ISVBase`, :py:class:`IVectorMachine`, :py:class:`PLDAMachine`, :py:class:`bob.learn.linear.Machine`
+        A container machine
+      data : array_like <float, 2D>
+        The data to be trained
+      max_iterations : int
+        The maximum number of iterations to train a machine
+      convergence_threshold : float
+        The convergence threshold to train a machine. If None, the training procedure will stop with the iterations criteria
+      initialize : bool
+        If True, runs the initialization procedure
+      rng :  :py:class:`bob.core.random.mt19937`
+        The Mersenne Twister mt19937 random generator used for the initialization of subspaces/arrays before the EM loop
+      check_inputs:
+         Shallow checks in the inputs. Check for inf and NaN
+    """
+
+    if check_inputs and type(data) is numpy.ndarray:
+
+        if numpy.isinf(numpy.sum(data)):
+            raise ValueError("Please, check your inputs; numpy.inf detected in `data` ")
+
+        if numpy.isnan(numpy.sum(data)):
+            raise ValueError("Please, check your inputs; numpy.nan detected in `data` ")
+
+    # Initialization
+    if initialize:
+        if rng is not None:
+            trainer.initialize(machine, data, rng)
+        else:
+            trainer.initialize(machine, data)
+
    trainer.e_step(machine, data)
-    
-    if hasattr(trainer,"compute_likelihood"):
-      average_output = trainer.compute_likelihood(machine)
-      
-      if type(machine) is bob.learn.em.KMeansMachine:
-        logger.info("average euclidean distance = %f", average_output)
-      else:
-        logger.info("log likelihood = %f", average_output)
-      
-      convergence_value = abs((average_output_previous - average_output)/average_output_previous)
-      logger.info("convergence value = %f",convergence_value)
-    
-      #Terminates if converged (and likelihood computation is set)
-      if convergence_threshold!=None and convergence_value <= convergence_threshold:
-        break
-  if hasattr(trainer,"finalize"):
-    trainer.finalize(machine, data)
+    average_output = 0
+    average_output_previous = 0
+
+    if hasattr(trainer, "compute_likelihood"):
+        average_output = trainer.compute_likelihood(machine)
+
+    for i in range(max_iterations):
+        logger.info("Iteration = %d/%d", i, max_iterations)
+        average_output_previous = average_output
+        trainer.m_step(machine, data)
+        trainer.e_step(machine, data)
+
+        if hasattr(trainer, "compute_likelihood"):
+            average_output = trainer.compute_likelihood(machine)
+
+            if type(machine) is bob.learn.em.KMeansMachine:
+                logger.info("average euclidean distance = %f", average_output)
+            else:
+                logger.info("log likelihood = %f", average_output)
+
+            convergence_value = abs((average_output_previous - average_output) / average_output_previous)
+            logger.info("convergence value = %f", convergence_value)
+
+            # Terminates if converged (and likelihood computation is set)
+            if convergence_threshold != None and convergence_value <= convergence_threshold:
+                break
+    if hasattr(trainer, "finalize"):
+        trainer.finalize(machine, data)


 def train_jfa(trainer, jfa_base, data, max_iterations=10, initialize=True, rng=None):
-  """
-  Trains a :py:class:`bob.learn.em.JFABase` given a :py:class:`bob.learn.em.JFATrainer` and the proper data
-
-  **Parameters**:
-    trainer : :py:class:`bob.learn.em.JFATrainer`
-      A JFA trainer mechanism
-    jfa_base : :py:class:`bob.learn.em.JFABase`
-      A container machine
-    data : [[:py:class:`bob.learn.em.GMMStats`]]
-      The data to be trained
-    max_iterations : int
-      The maximum number of iterations to train a machine
-    initialize : bool
-      If True, runs the initialization procedure
-    rng :  :py:class:`bob.core.random.mt19937`
-      The Mersenne Twister mt19937 random generator used for the initialization of subspaces/arrays before the EM loops
-  """
-
-  if initialize:
-    if rng is not None:
-      trainer.initialize(jfa_base, data, rng)
-    else:
-      trainer.initialize(jfa_base, data)
-
-  #V Subspace
-  logger.info("V subspace estimation...")
-  for i in range(max_iterations):
-    logger.info("Iteration = %d/%d", i, max_iterations)
-    trainer.e_step_v(jfa_base, data)
-    trainer.m_step_v(jfa_base, data)
-  trainer.finalize_v(jfa_base, data)
-
-  #U subspace
-  logger.info("U subspace estimation...")  
-  for i in range(max_iterations):
-    logger.info("Iteration = %d/%d", i, max_iterations)
-    trainer.e_step_u(jfa_base, data)
-    trainer.m_step_u(jfa_base, data)
-  trainer.finalize_u(jfa_base, data)
-
-  # D subspace
-  logger.info("D subspace estimation...")  
-  for i in range(max_iterations):
-    logger.info("Iteration = %d/%d", i, max_iterations)
-    trainer.e_step_d(jfa_base, data)
-    trainer.m_step_d(jfa_base, data)
-  trainer.finalize_d(jfa_base, data)
+    """
+    Trains a :py:class:`bob.learn.em.JFABase` given a :py:class:`bob.learn.em.JFATrainer` and the proper data
+
+    **Parameters**:
+      trainer : :py:class:`bob.learn.em.JFATrainer`
+        A JFA trainer mechanism
+      jfa_base : :py:class:`bob.learn.em.JFABase`
+        A container machine
+      data : [[:py:class:`bob.learn.em.GMMStats`]]
+        The data to be trained
+      max_iterations : int
+        The maximum number of iterations to train a machine
+      initialize : bool
+        If True, runs the initialization procedure
+      rng :  :py:class:`bob.core.random.mt19937`
+        The Mersenne Twister mt19937 random generator used for the initialization of subspaces/arrays before the EM loops
+    """
+
+    if initialize:
+        if rng is not None:
+            trainer.initialize(jfa_base, data, rng)
+        else:
+            trainer.initialize(jfa_base, data)
+
+    # V Subspace
+    logger.info("V subspace estimation...")
+    for i in range(max_iterations):
+        logger.info("Iteration = %d/%d", i, max_iterations)
+        trainer.e_step_v(jfa_base, data)
+        trainer.m_step_v(jfa_base, data)
+    trainer.finalize_v(jfa_base, data)
+
+    # U subspace
+    logger.info("U subspace estimation...")
+    for i in range(max_iterations):
+        logger.info("Iteration = %d/%d", i, max_iterations)
+        trainer.e_step_u(jfa_base, data)
+        trainer.m_step_u(jfa_base, data)
+    trainer.finalize_u(jfa_base, data)
+
+    # D subspace
+    logger.info("D subspace estimation...")
+    for i in range(max_iterations):
+        logger.info("Iteration = %d/%d", i, max_iterations)
+        trainer.e_step_d(jfa_base, data)
+        trainer.m_step_d(jfa_base, data)
+    trainer.finalize_d(jfa_base, data)
--- a/bob/learn/em/ztnorm.cpp
+++ b/bob/learn/em/ztnorm.cpp
@@ -12,7 +12,7 @@
 /*** zt_norm ***/
 bob::extension::FunctionDoc zt_norm = bob::extension::FunctionDoc(
  "ztnorm",
-  "Normalise raw scores with ZT-Norm."
+  "Normalise raw scores with :ref:`ZT-Norm <ztnorm>`."
  "Assume that znorm and tnorm have no common subject id.",
  0,
  true
@@ -72,7 +72,7 @@ PyObject* PyBobLearnEM_ztNorm(PyObject*, PyObject* args, PyObject* kwargs) {
 /*** t_norm ***/
 bob::extension::FunctionDoc t_norm = bob::extension::FunctionDoc(
  "tnorm",
-  "Normalise raw scores with T-Norm",
+  "Normalise raw scores with :ref:`T-Norm <tnorm>`",
  0,
  true
 )
@@ -109,7 +109,7 @@ PyObject* PyBobLearnEM_tNorm(PyObject*, PyObject* args, PyObject* kwargs) {
 /*** z_norm ***/
 bob::extension::FunctionDoc z_norm = bob::extension::FunctionDoc(
  "znorm",
-  "Normalise raw scores with Z-Norm",
+  "Normalise raw scores with :ref:`Z-Norm <znorm>`",
  0,
  true
 )

--- a/bootstrap-buildout.py
+++ b/bootstrap-buildout.py
-##############################################################################
-#
-# Copyright (c) 2006 Zope Foundation and Contributors.
-# All Rights Reserved.
-#
-# This software is subject to the provisions of the Zope Public License,
-# Version 2.1 (ZPL).  A copy of the ZPL should accompany this distribution.
-# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
-# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
-# FOR A PARTICULAR PURPOSE.
-#
-##############################################################################
-"""Bootstrap a buildout-based project
-
-Simply run this script in a directory containing a buildout.cfg.
-The script accepts buildout command-line options, so you can
-use the -c option to specify an alternate configuration file.
-"""
-
-import os
-import shutil
-import sys
-import tempfile
-
-from optparse import OptionParser
-
-__version__ = '2015-07-01'
-# See zc.buildout's changelog if this version is up to date.
-
-tmpeggs = tempfile.mkdtemp(prefix='bootstrap-')
-
-usage = '''\
-[DESIRED PYTHON FOR BUILDOUT] bootstrap.py [options]
-
-Bootstraps a buildout-based project.
-
-Simply run this script in a directory containing a buildout.cfg, using the
-Python that you want bin/buildout to use.
-
-Note that by using --find-links to point to local resources, you can keep
-this script from going over the network.
-'''
-
-parser = OptionParser(usage=usage)
-parser.add_option("--version",
-                  action="store_true", default=False,
-                  help=("Return bootstrap.py version."))
-parser.add_option("-t", "--accept-buildout-test-releases",
-                  dest='accept_buildout_test_releases',
-                  action="store_true", default=False,
-                  help=("Normally, if you do not specify a --version, the "
-                        "bootstrap script and buildout gets the newest "
-                        "*final* versions of zc.buildout and its recipes and "
-                        "extensions for you.  If you use this flag, "
-                        "bootstrap and buildout will get the newest releases "
-                        "even if they are alphas or betas."))
-parser.add_option("-c", "--config-file",
-                  help=("Specify the path to the buildout configuration "
-                        "file to be used."))
-parser.add_option("-f", "--find-links",
-                  help=("Specify a URL to search for buildout releases"))
-parser.add_option("--allow-site-packages",
-                  action="store_true", default=False,
-                  help=("Let bootstrap.py use existing site packages"))
-parser.add_option("--buildout-version",
-                  help="Use a specific zc.buildout version")
-parser.add_option("--setuptools-version",
-                  help="Use a specific setuptools version")
-parser.add_option("--setuptools-to-dir",
-                  help=("Allow for re-use of existing directory of "
-                        "setuptools versions"))
-
-options, args = parser.parse_args()
-if options.version:
-    print("bootstrap.py version %s" % __version__)
-    sys.exit(0)
-
-
-######################################################################
-# load/install setuptools
-
-try:
-    from urllib.request import urlopen
-except ImportError:
-    from urllib2 import urlopen
-
-ez = {}
-if os.path.exists('ez_setup.py'):
-    exec(open('ez_setup.py').read(), ez)
-else:
-    exec(urlopen('https://bootstrap.pypa.io/ez_setup.py').read(), ez)
-
-if not options.allow_site_packages:
-    # ez_setup imports site, which adds site packages
-    # this will remove them from the path to ensure that incompatible versions
-    # of setuptools are not in the path
-    import site
-    # inside a virtualenv, there is no 'getsitepackages'.
-    # We can't remove these reliably
-    if hasattr(site, 'getsitepackages'):
-        for sitepackage_path in site.getsitepackages():
-            # Strip all site-packages directories from sys.path that
-            # are not sys.prefix; this is because on Windows
-            # sys.prefix is a site-package directory.
-            if sitepackage_path != sys.prefix:
-                sys.path[:] = [x for x in sys.path
-                               if sitepackage_path not in x]
-
-setup_args = dict(to_dir=tmpeggs, download_delay=0)
-
-if options.setuptools_version is not None:
-    setup_args['version'] = options.setuptools_version
-if options.setuptools_to_dir is not None:
-    setup_args['to_dir'] = options.setuptools_to_dir
-
-ez['use_setuptools'](**setup_args)
-import setuptools
-import pkg_resources
-
-# This does not (always?) update the default working set.  We will
-# do it.
-for path in sys.path:
-    if path not in pkg_resources.working_set.entries:
-        pkg_resources.working_set.add_entry(path)
-
-######################################################################
-# Install buildout
-
-ws = pkg_resources.working_set
-
-setuptools_path = ws.find(
-    pkg_resources.Requirement.parse('setuptools')).location
-
-# Fix sys.path here as easy_install.pth added before PYTHONPATH
-cmd = [sys.executable, '-c',
-       'import sys; sys.path[0:0] = [%r]; ' % setuptools_path +
-       'from setuptools.command.easy_install import main; main()',
-       '-mZqNxd', tmpeggs]
-
-find_links = os.environ.get(
-    'bootstrap-testing-find-links',
-    options.find_links or
-    ('http://downloads.buildout.org/'
-     if options.accept_buildout_test_releases else None)
-    )
-if find_links:
-    cmd.extend(['-f', find_links])
-
-requirement = 'zc.buildout'
-version = options.buildout_version
-if version is None and not options.accept_buildout_test_releases:
-    # Figure out the most recent final version of zc.buildout.
-    import setuptools.package_index
-    _final_parts = '*final-', '*final'
-
-    def _final_version(parsed_version):
-        try:
-            return not parsed_version.is_prerelease
-        except AttributeError:
-            # Older setuptools
-            for part in parsed_version:
-                if (part[:1] == '*') and (part not in _final_parts):
-                    return False
-            return True
-
-    index = setuptools.package_index.PackageIndex(
-        search_path=[setuptools_path])
-    if find_links:
-        index.add_find_links((find_links,))
-    req = pkg_resources.Requirement.parse(requirement)
-    if index.obtain(req) is not None:
-        best = []
-        bestv = None
-        for dist in index[req.project_name]:
-            distv = dist.parsed_version
-            if _final_version(distv):
-                if bestv is None or distv > bestv:
-                    best = [dist]
-                    bestv = distv
-                elif distv == bestv:
-                    best.append(dist)
-        if best:
-            best.sort()
-            version = best[-1].version
-if version:
-    requirement = '=='.join((requirement, version))
-cmd.append(requirement)
-
-import subprocess
-if subprocess.call(cmd) != 0:
-    raise Exception(
-        "Failed to execute command:\n%s" % repr(cmd)[1:-1])
-
-######################################################################
-# Import and run buildout
-
-ws.add_entry(tmpeggs)
-ws.require(requirement)
-import zc.buildout.buildout
-
-if not [a for a in args if '=' not in a]:
-    args.append('bootstrap')
-
-# if -c was provided, we push it back into args for buildout' main function
-if options.config_file is not None:
-    args[0:0] = ['-c', options.config_file]
-
-zc.buildout.buildout.main(args)
-shutil.rmtree(tmpeggs)
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -25,6 +25,7 @@ extensions = [
    'sphinx.ext.intersphinx',
    'sphinx.ext.napoleon',
    'sphinx.ext.viewcode',
+    'matplotlib.sphinxext.plot_directive'
    ]

 import sphinx
@@ -231,7 +232,6 @@ autodoc_member_order = 'bysource'
 autodoc_default_flags = [
  'members',
  'undoc-members',
-  'inherited-members',
  'show-inheritance',
  ]


--- a/doc/guide.rst
+++ b/doc/guide.rst
--- a/doc/index.rst
+++ b/doc/index.rst
 .. vim: set fileencoding=utf-8 :
-.. Tiago de Freitas Pereira <tiago.pereira@idiap.ch>
-.. Tue 17 Feb 2015 13:50:06 CET
-..
-.. Copyright (C) 2011-2014 Idiap Research Institute, Martigny, Switzerland

 .. _bob.learn.em:

@@ -10,12 +6,12 @@
 Expectation Maximization Machine Learning Tools
 ================================================

-The EM algorithm is an iterative method that estimates parameters for statistical models, where the model depends on unobserved latent variables. The EM iteration alternates between performing an expectation (E) step, which creates a function for the expectation of the log-likelihood evaluated using the current estimate for the parameters, and a maximization (M) step, which computes parameters maximizing the expected log-likelihood found on the E step. These parameter-estimates are then used to determine the distribution of the latent variables in the next E step [WikiEM]_. 
+This package is a part of Bob_. It implements a general EM algorithm and
+includes implementations of the following algorithms:

-The package includes the machine definition per se and a selection of different trainers for specialized purposes:
+ - K-Means
 - Maximum Likelihood (ML)
 - Maximum a Posteriori (MAP)
- - K-Means
 - Inter Session Variability Modelling (ISV)
 - Joint Factor Analysis (JFA)
 - Total Variability Modeling (iVectors)
@@ -31,7 +27,7 @@ Documentation

   guide
   py_api
-   
+
 References
 -----------

@@ -47,7 +43,9 @@ References
 ..   [Roweis1998] Roweis, Sam. "EM algorithms for PCA and SPCA." Advances in neural information processing systems (1998): 626-632.

 ..   [WikiEM] `Expectation Maximization <http://en.wikipedia.org/wiki/Expectation%E2%80%93maximization_algorithm>`_
-
+..   [Glembek2009] Glembek, Ondrej, et al. "Comparison of scoring methods used in speaker recognition with joint factor analysis." Acoustics, Speech and Signal Processing, 2009. ICASSP 2009. IEEE International Conference on. IEEE, 2009.
+..   [Auckenthaler2000] Auckenthaler, Roland, Michael Carey, and Harvey Lloyd-Thomas. "Score normalization for text-independent speaker verification systems." Digital Signal Processing 10.1 (2000): 42-54.
+..   [Mariethoz2005] Mariethoz, Johnny, and Samy Bengio. "A unified framework for score normalization techniques applied to text-independent speaker verification." IEEE signal processing letters 12.7 (2005): 532-535.


 Indices and tables

--- a/doc/links.rst
+++ b/doc/links.rst
@@ -12,6 +12,7 @@
 .. _blitz++: http://www.oonumerics.org/blitz
 .. _bob's idiap guide: https://gitlab.idiap.ch/bob/bob/wikis/Using-Bob-at-Idiap
 .. _bob's website: https://www.idiap.ch/software/bob
+.. _bob: https://www.idiap.ch/software/bob
 .. _boost: http://www.boost.org
 .. _buildbot: http://trac.buildbot.net
 .. _buildout: http://pypi.python.org/pypi/zc.buildout/

--- a/doc/plot/plot_ISV.py
+++ b/doc/plot/plot_ISV.py
+import bob.db.iris
+import bob.learn.em
+import bob.learn.linear
+import matplotlib.pyplot as plt
+import numpy
+numpy.random.seed(2)  # FIXING A SEED
+
+
+def train_ubm(features, n_gaussians):
+    """
+    Train UBM
+
+     **Parameters**
+       features: 2D numpy array with the features
+
+       n_gaussians: Number of Gaussians
+
+    """
+    input_size = features.shape[1]
+
+    kmeans_machine = bob.learn.em.KMeansMachine(int(n_gaussians), input_size)
+    ubm = bob.learn.em.GMMMachine(int(n_gaussians), input_size)
+
+    # The K-means clustering is firstly used to used to estimate the initial
+    # means, the final variances and the final weights for each gaussian
+    # component
+    kmeans_trainer = bob.learn.em.KMeansTrainer('RANDOM_NO_DUPLICATE')
+    bob.learn.em.train(kmeans_trainer, kmeans_machine, features)
+
+    # Getting the means, weights and the variances for each cluster. This is a
+    # very good estimator for the ML
+    (variances, weights) = kmeans_machine.get_variances_and_weights_for_each_cluster(features)
+    means = kmeans_machine.means
+
+    # initialize the UBM with the output of kmeans
+    ubm.means = means
+    ubm.variances = variances
+    ubm.weights = weights
+
+    # Creating the ML Trainer. We will adapt only the means
+    trainer = bob.learn.em.ML_GMMTrainer(
+        update_means=True, update_variances=False, update_weights=False)
+    bob.learn.em.train(trainer, ubm, features)
+
+    return ubm
+
+
+def isv_train(features, ubm):
+    """
+    Train U matrix
+
+    **Parameters**
+      features: List of :py:class:`bob.learn.em.GMMStats` organized by class
+
+      n_gaussians: UBM (:py:class:`bob.learn.em.GMMMachine`)
+
+    """
+
+    stats = []
+    for user in features:
+        user_stats = []
+        for f in user:
+            s = bob.learn.em.GMMStats(ubm.shape[0], ubm.shape[1])
+            ubm.acc_statistics(f, s)
+            user_stats.append(s)
+        stats.append(user_stats)
+
+    relevance_factor = 4
+    subspace_dimension_of_u = 1
+
+    isvbase = bob.learn.em.ISVBase(ubm, subspace_dimension_of_u)
+    trainer = bob.learn.em.ISVTrainer(relevance_factor)
+    # trainer.rng = bob.core.random.mt19937(int(self.init_seed))
+    bob.learn.em.train(trainer, isvbase, stats, max_iterations=50)
+
+    return isvbase
+
+
+# GENERATING DATA
+data_per_class = bob.db.iris.data()
+setosa = numpy.column_stack(
+    (data_per_class['setosa'][:, 0], data_per_class['setosa'][:, 3]))
+versicolor = numpy.column_stack(
+    (data_per_class['versicolor'][:, 0], data_per_class['versicolor'][:, 3]))
+virginica = numpy.column_stack(
+    (data_per_class['virginica'][:, 0], data_per_class['virginica'][:, 3]))
+data = numpy.vstack((setosa, versicolor, virginica))
+
+# TRAINING THE PRIOR
+ubm = train_ubm(data, 3)
+isvbase = isv_train([setosa, versicolor, virginica], ubm)
+
+# Variability direction
+u0 = isvbase.u[0:2, 0] / numpy.linalg.norm(isvbase.u[0:2, 0])
+u1 = isvbase.u[2:4, 0] / numpy.linalg.norm(isvbase.u[2:4, 0])
+u2 = isvbase.u[4:6, 0] / numpy.linalg.norm(isvbase.u[4:6, 0])
+
+figure, ax = plt.subplots()
+plt.scatter(setosa[:, 0], setosa[:, 1], c="darkcyan", label="setosa")
+plt.scatter(versicolor[:, 0], versicolor[:, 1],
+            c="goldenrod", label="versicolor")
+plt.scatter(virginica[:, 0], virginica[:, 1], c="dimgrey", label="virginica")
+
+plt.scatter(ubm.means[:, 0], ubm.means[:, 1], c="blue",
+            marker="x", label="centroids - mle")
+# plt.scatter(ubm.means[:, 0], ubm.means[:, 1], c="blue",
+#             marker=".", label="within class varibility", s=0.01)
+
+ax.arrow(ubm.means[0, 0], ubm.means[0, 1], u0[0], u0[1],
+         fc="k", ec="k", head_width=0.05, head_length=0.1)
+ax.arrow(ubm.means[1, 0], ubm.means[1, 1], u1[0], u1[1],
+         fc="k", ec="k", head_width=0.05, head_length=0.1)
+ax.arrow(ubm.means[2, 0], ubm.means[2, 1], u2[0], u2[1],
+         fc="k", ec="k", head_width=0.05, head_length=0.1)
+plt.text(ubm.means[0, 0] + u0[0], ubm.means[0, 1] +
+         u0[1] - 0.1, r'$\mathbf{U}_1$', fontsize=15)
+plt.text(ubm.means[1, 0] + u1[0], ubm.means[1, 1] +
+         u1[1] - 0.1, r'$\mathbf{U}_2$', fontsize=15)
+plt.text(ubm.means[2, 0] + u2[0], ubm.means[2, 1] +
+         u2[1] - 0.1, r'$\mathbf{U}_3$', fontsize=15)
+
+plt.xticks([], [])
+plt.yticks([], [])
+
+# plt.grid(True)
+plt.xlabel('Sepal length')
+plt.ylabel('Petal width')
+plt.legend()
+plt.tight_layout()
+plt.show()
--- a/doc/plot/plot_JFA.py
+++ b/doc/plot/plot_JFA.py
+import bob.db.iris
+import bob.learn.em
+import bob.learn.linear
+import matplotlib.pyplot as plt
+import numpy
+numpy.random.seed(2)  # FIXING A SEED
+
+
+def train_ubm(features, n_gaussians):
+    """
+    Train UBM
+
+     **Parameters**
+       features: 2D numpy array with the features
+
+       n_gaussians: Number of Gaussians
+
+    """
+
+    input_size = features.shape[1]
+
+    kmeans_machine = bob.learn.em.KMeansMachine(int(n_gaussians), input_size)
+    ubm = bob.learn.em.GMMMachine(int(n_gaussians), input_size)
+
+    # The K-means clustering is firstly used to used to estimate the initial
+    # means, the final variances and the final weights for each gaussian
+    # component
+    kmeans_trainer = bob.learn.em.KMeansTrainer('RANDOM_NO_DUPLICATE')
+    bob.learn.em.train(kmeans_trainer, kmeans_machine, features)
+
+    # Getting the means, weights and the variances for each cluster. This is a
+    # very good estimator for the ML
+    (variances, weights) = kmeans_machine.get_variances_and_weights_for_each_cluster(features)
+    means = kmeans_machine.means
+
+    # initialize the UBM with the output of kmeans
+    ubm.means = means
+    ubm.variances = variances
+    ubm.weights = weights
+
+    # Creating the ML Trainer. We will adapt only the means
+    trainer = bob.learn.em.ML_GMMTrainer(
+        update_means=True, update_variances=False, update_weights=False)
+    bob.learn.em.train(trainer, ubm, features)
+
+    return ubm
+
+
+def jfa_train(features, ubm):
+    """
+     Trains U and V matrix
+
+     **Parameters**
+       features: List of :py:class:`bob.learn.em.GMMStats` organized by class
+
+       n_gaussians: UBM (:py:class:`bob.learn.em.GMMMachine`)
+
+     """
+
+    stats = []
+    for user in features:
+        user_stats = []
+        for f in user:
+            s = bob.learn.em.GMMStats(ubm.shape[0], ubm.shape[1])
+            ubm.acc_statistics(f, s)
+            user_stats.append(s)
+        stats.append(user_stats)
+
+    subspace_dimension_of_u = 1
+    subspace_dimension_of_v = 1
+
+    jfa_base = bob.learn.em.JFABase(
+        ubm, subspace_dimension_of_u, subspace_dimension_of_v)
+    trainer = bob.learn.em.JFATrainer()
+    # trainer.rng = bob.core.random.mt19937(int(self.init_seed))
+    bob.learn.em.train_jfa(trainer, jfa_base, stats, max_iterations=50)
+
+    return jfa_base
+
+
+# GENERATING DATA
+data_per_class = bob.db.iris.data()
+setosa = numpy.column_stack(
+    (data_per_class['setosa'][:, 0], data_per_class['setosa'][:, 3]))
+versicolor = numpy.column_stack(
+    (data_per_class['versicolor'][:, 0], data_per_class['versicolor'][:, 3]))
+virginica = numpy.column_stack(
+    (data_per_class['virginica'][:, 0], data_per_class['virginica'][:, 3]))
+data = numpy.vstack((setosa, versicolor, virginica))
+
+# TRAINING THE PRIOR
+ubm = train_ubm(data, 3)
+jfa_base = jfa_train([setosa, versicolor, virginica], ubm)
+
+# Variability direction U
+u0 = jfa_base.u[0:2, 0] / numpy.linalg.norm(jfa_base.u[0:2, 0])
+u1 = jfa_base.u[2:4, 0] / numpy.linalg.norm(jfa_base.u[2:4, 0])
+u2 = jfa_base.u[4:6, 0] / numpy.linalg.norm(jfa_base.u[4:6, 0])
+
+
+# Variability direction V
+v0 = jfa_base.v[0:2, 0] / numpy.linalg.norm(jfa_base.v[0:2, 0])
+v1 = jfa_base.v[2:4, 0] / numpy.linalg.norm(jfa_base.v[2:4, 0])
+v2 = jfa_base.v[4:6, 0] / numpy.linalg.norm(jfa_base.v[4:6, 0])
+
+
+figure, ax = plt.subplots()
+plt.scatter(setosa[:, 0], setosa[:, 1], c="darkcyan", label="setosa")
+plt.scatter(versicolor[:, 0], versicolor[:, 1],
+            c="goldenrod", label="versicolor")
+plt.scatter(virginica[:, 0], virginica[:, 1], c="dimgrey", label="virginica")
+
+plt.scatter(ubm.means[:, 0], ubm.means[:, 1], c="blue",
+            marker="x", label="centroids - mle")
+# plt.scatter(ubm.means[:, 0], ubm.means[:, 1], c="blue",
+#             marker=".", label="within class varibility", s=0.01)
+
+# U
+ax.arrow(ubm.means[0, 0], ubm.means[0, 1], u0[0], u0[1],
+         fc="k", ec="k", head_width=0.05, head_length=0.1)
+ax.arrow(ubm.means[1, 0], ubm.means[1, 1], u1[0], u1[1],
+         fc="k", ec="k", head_width=0.05, head_length=0.1)
+ax.arrow(ubm.means[2, 0], ubm.means[2, 1], u2[0], u2[1],
+         fc="k", ec="k", head_width=0.05, head_length=0.1)
+plt.text(ubm.means[0, 0] + u0[0], ubm.means[0, 1] +
+         u0[1] - 0.1, r'$\mathbf{U}_1$', fontsize=15)
+plt.text(ubm.means[1, 0] + u1[0], ubm.means[1, 1] +
+         u1[1] - 0.1, r'$\mathbf{U}_2$', fontsize=15)
+plt.text(ubm.means[2, 0] + u2[0], ubm.means[2, 1] +
+         u2[1] - 0.1, r'$\mathbf{U}_3$', fontsize=15)
+
+# V
+ax.arrow(ubm.means[0, 0], ubm.means[0, 1], v0[0], v0[1],
+         fc="k", ec="k", head_width=0.05, head_length=0.1)
+ax.arrow(ubm.means[1, 0], ubm.means[1, 1], v1[0], v1[1],
+         fc="k", ec="k", head_width=0.05, head_length=0.1)
+ax.arrow(ubm.means[2, 0], ubm.means[2, 1], v2[0], v2[1],
+         fc="k", ec="k", head_width=0.05, head_length=0.1)
+plt.text(ubm.means[0, 0] + v0[0], ubm.means[0, 1] +
+         v0[1] - 0.1, r'$\mathbf{V}_1$', fontsize=15)
+plt.text(ubm.means[1, 0] + v1[0], ubm.means[1, 1] +
+         v1[1] - 0.1, r'$\mathbf{V}_2$', fontsize=15)
+plt.text(ubm.means[2, 0] + v2[0], ubm.means[2, 1] +
+         v2[1] - 0.1, r'$\mathbf{V}_3$', fontsize=15)
+
+plt.xticks([], [])
+plt.yticks([], [])
+
+# plt.grid(True)
+plt.xlabel('Sepal length')
+plt.ylabel('Petal width')
+plt.legend(loc=2)
+plt.ylim([-1, 3.5])
+
+plt.tight_layout()
+# plt.show()
--- a/doc/plot/plot_MAP.py
+++ b/doc/plot/plot_MAP.py
+import matplotlib.pyplot as plt
+import bob.db.iris
+import bob.learn.em
+import numpy
+numpy.random.seed(10)
+
+data_per_class = bob.db.iris.data()
+setosa = numpy.column_stack(
+    (data_per_class['setosa'][:, 0], data_per_class['setosa'][:, 3]))
+versicolor = numpy.column_stack(
+    (data_per_class['versicolor'][:, 0], data_per_class['versicolor'][:, 3]))
+virginica = numpy.column_stack(
+    (data_per_class['virginica'][:, 0], data_per_class['virginica'][:, 3]))
+
+data = numpy.vstack((setosa, versicolor, virginica))
+
+# Two clusters with a feature dimensionality of 3
+mle_machine = bob.learn.em.GMMMachine(3, 2)
+mle_machine.means = numpy.array([[5, 3], [4, 2], [7, 3.]])
+
+# Creating some random data centered in
+map_machine = bob.learn.em.GMMMachine(3, 2)
+map_trainer = bob.learn.em.MAP_GMMTrainer(mle_machine, relevance_factor=4)
+bob.learn.em.train(map_trainer, map_machine, data, max_iterations=200,
+                   convergence_threshold=1e-5)  # Train the KMeansMachine
+
+
+figure, ax = plt.subplots()
+# plt.scatter(data[:, 0], data[:, 1], c="olivedrab", label="new data")
+plt.scatter(setosa[:, 0], setosa[:, 1], c="darkcyan", label="setosa")
+plt.scatter(versicolor[:, 0], versicolor[:, 1],
+            c="goldenrod", label="versicolor")
+plt.scatter(virginica[:, 0], virginica[:, 1],
+            c="dimgrey", label="virginica")
+plt.scatter(mle_machine.means[:, 0],
+            mle_machine.means[:, 1], c="blue", marker="x",
+            label="prior centroids - mle", s=60)
+plt.scatter(map_machine.means[:, 0], map_machine.means[:, 1], c="red",
+            marker="^", label="adapted centroids - map", s=60)
+plt.legend()
+plt.xticks([], [])
+plt.yticks([], [])
+ax.set_xlabel("Sepal length")
+ax.set_ylabel("Petal width")
+plt.tight_layout()
+plt.show()
--- a/doc/plot/plot_ML.py
+++ b/doc/plot/plot_ML.py
+import bob.learn.em
+import bob.db.iris
+import numpy
+import matplotlib.pyplot as plt
+
+data_per_class = bob.db.iris.data()
+setosa = numpy.column_stack(
+    (data_per_class['setosa'][:, 0], data_per_class['setosa'][:, 3]))
+versicolor = numpy.column_stack(
+    (data_per_class['versicolor'][:, 0], data_per_class['versicolor'][:, 3]))
+virginica = numpy.column_stack(
+    (data_per_class['virginica'][:, 0], data_per_class['virginica'][:, 3]))
+
+data = numpy.vstack((setosa, versicolor, virginica))
+
+# Two clusters with a feature dimensionality of 3
+machine = bob.learn.em.GMMMachine(3, 2)
+trainer = bob.learn.em.ML_GMMTrainer(True, True, True)
+machine.means = numpy.array([[5, 3], [4, 2], [7, 3.]])
+bob.learn.em.train(trainer, machine, data, max_iterations=200,
+                   convergence_threshold=1e-5)  # Train the KMeansMachine
+
+figure, ax = plt.subplots()
+plt.scatter(setosa[:, 0], setosa[:, 1], c="darkcyan", label="setosa")
+plt.scatter(versicolor[:, 0], versicolor[:, 1],
+            c="goldenrod", label="versicolor")
+plt.scatter(virginica[:, 0], virginica[:, 1],
+            c="dimgrey", label="virginica")
+plt.scatter(machine.means[:, 0],
+            machine.means[:, 1], c="blue", marker="x", label="centroids", s=60)
+plt.legend()
+plt.xticks([], [])
+plt.yticks([], [])
+ax.set_xlabel("Sepal length")
+ax.set_ylabel("Petal width")
+plt.tight_layout()
+plt.show()
--- a/doc/plot/plot_Tnorm.py
+++ b/doc/plot/plot_Tnorm.py
+import matplotlib.pyplot as plt
+import bob.learn.em
+import numpy
+numpy.random.seed(10)
+
+n_clients = 10
+n_scores_per_client = 200
+
+# Defining some fake scores for genuines and impostors
+impostor_scores = numpy.random.normal(-15.5,
+                                      5, (n_scores_per_client, n_clients))
+genuine_scores = numpy.random.normal(0.5, 5, (n_scores_per_client, n_clients))
+
+# Defining the scores for the statistics computation
+t_scores = numpy.random.normal(-5., 5, (n_scores_per_client, n_clients))
+
+# T - Normalizing
+t_norm_impostors = bob.learn.em.tnorm(impostor_scores, t_scores)
+t_norm_genuine = bob.learn.em.tnorm(genuine_scores, t_scores)
+
+# PLOTTING
+figure = plt.subplot(2, 1, 1)
+ax = figure.axes
+plt.title("Raw scores", fontsize=8)
+plt.hist(impostor_scores.reshape(n_scores_per_client * n_clients),
+         label='Impostors', normed=True,
+         color='C1', alpha=0.5, bins=50)
+plt.hist(genuine_scores.reshape(n_scores_per_client * n_clients),
+         label='Genuine', normed=True,
+         color='C0', alpha=0.5, bins=50)
+plt.legend(fontsize=8)
+plt.yticks([], [])
+
+
+figure = plt.subplot(2, 1, 2)
+ax = figure.axes
+plt.title("T-norm scores", fontsize=8)
+plt.hist(t_norm_impostors.reshape(n_scores_per_client * n_clients),
+         label='T-Norm Impostors', normed=True,
+         color='C1', alpha=0.5, bins=50)
+plt.hist(t_norm_genuine.reshape(n_scores_per_client * n_clients),
+         label='T-Norm Genuine', normed=True,
+         color='C0', alpha=0.5, bins=50)
+plt.legend(fontsize=8)
+plt.yticks([], [])
+
+plt.tight_layout()
+plt.show()
--- a/doc/plot/plot_ZTnorm.py
+++ b/doc/plot/plot_ZTnorm.py
+import matplotlib.pyplot as plt
+import bob.learn.em
+import numpy
+numpy.random.seed(10)
+
+n_clients = 10
+n_scores_per_client = 200
+
+# Defining some fake scores for genuines and impostors
+impostor_scores = numpy.random.normal(-15.5,
+                                      5, (n_scores_per_client, n_clients))
+genuine_scores = numpy.random.normal(0.5, 5, (n_scores_per_client, n_clients))
+
+# Defining the scores for the statistics computation
+z_scores = numpy.random.normal(-5., 5, (n_scores_per_client, n_clients))
+t_scores = numpy.random.normal(-6., 5, (n_scores_per_client, n_clients))
+
+# T-normalizing the Z-scores
+zt_scores = bob.learn.em.tnorm(z_scores, t_scores)
+
+# ZT - Normalizing
+zt_norm_impostors = bob.learn.em.ztnorm(
+    impostor_scores, z_scores, t_scores, zt_scores)
+zt_norm_genuine = bob.learn.em.ztnorm(
+    genuine_scores, z_scores, t_scores, zt_scores)
+
+# PLOTTING
+figure = plt.subplot(2, 1, 1)
+ax = figure.axes
+plt.title("Raw scores", fontsize=8)
+plt.hist(impostor_scores.reshape(n_scores_per_client * n_clients),
+         label='Impostors', normed=True,
+         color='C1', alpha=0.5, bins=50)
+plt.hist(genuine_scores.reshape(n_scores_per_client * n_clients),
+         label='Genuine', normed=True,
+         color='C0', alpha=0.5, bins=50)
+plt.legend(fontsize=8)
+plt.yticks([], [])
+
+
+figure = plt.subplot(2, 1, 2)
+ax = figure.axes
+plt.title("T-norm scores", fontsize=8)
+plt.hist(zt_norm_impostors.reshape(n_scores_per_client * n_clients),
+         label='T-Norm Impostors', normed=True,
+         color='C1', alpha=0.5, bins=50)
+plt.hist(zt_norm_genuine.reshape(n_scores_per_client * n_clients),
+         label='T-Norm Genuine', normed=True,
+         color='C0', alpha=0.5, bins=50)
+plt.legend(fontsize=8)
+plt.yticks([], [])
+
+plt.tight_layout()
+plt.show()
--- a/doc/plot/plot_Znorm.py
+++ b/doc/plot/plot_Znorm.py
+import matplotlib.pyplot as plt
+import bob.learn.em
+import numpy
+numpy.random.seed(10)
+
+
+n_clients = 10
+n_scores_per_client = 200
+
+# Defining some fake scores for genuines and impostors
+impostor_scores = numpy.random.normal(-15.5,
+                                      5, (n_scores_per_client, n_clients))
+genuine_scores = numpy.random.normal(0.5, 5, (n_scores_per_client, n_clients))
+
+# Defining the scores for the statistics computation
+z_scores = numpy.random.normal(-5., 5, (n_scores_per_client, n_clients))
+
+# Z - Normalizing
+z_norm_impostors = bob.learn.em.znorm(impostor_scores, z_scores)
+z_norm_genuine = bob.learn.em.znorm(genuine_scores, z_scores)
+
+# PLOTTING
+figure = plt.subplot(2, 1, 1)
+ax = figure.axes
+plt.title("Raw scores", fontsize=8)
+plt.hist(impostor_scores.reshape(n_scores_per_client * n_clients),
+         label='Impostors', normed=True,
+         color='C1', alpha=0.5, bins=50)
+plt.hist(genuine_scores.reshape(n_scores_per_client * n_clients),
+         label='Genuine', normed=True,
+         color='C0', alpha=0.5, bins=50)
+plt.legend(fontsize=8)
+plt.yticks([], [])
+
+
+figure = plt.subplot(2, 1, 2)
+ax = figure.axes
+plt.title("Z-norm scores", fontsize=8)
+plt.hist(z_norm_impostors.reshape(n_scores_per_client * n_clients),
+         label='Z-Norm Impostors', normed=True,
+         color='C1', alpha=0.5, bins=50)
+plt.hist(z_norm_genuine.reshape(n_scores_per_client * n_clients),
+         label='Z-Norm Genuine', normed=True,
+         color='C0', alpha=0.5, bins=50)
+plt.yticks([], [])
+plt.legend(fontsize=8)
+
+plt.tight_layout()
+plt.show()
--- a/doc/plot/plot_iVector.py
+++ b/doc/plot/plot_iVector.py
+import bob.db.iris
+import bob.learn.em
+import bob.learn.linear
+import matplotlib.pyplot as plt
+import numpy
+numpy.random.seed(2)  # FIXING A SEED
+
+
+def train_ubm(features, n_gaussians):
+    """
+    Train UBM
+
+     **Parameters**
+       features: 2D numpy array with the features
+
+       n_gaussians: Number of Gaussians
+
+    """
+
+    input_size = features.shape[1]
+
+    kmeans_machine = bob.learn.em.KMeansMachine(int(n_gaussians), input_size)
+    ubm = bob.learn.em.GMMMachine(int(n_gaussians), input_size)
+
+    # The K-means clustering is firstly used to used to estimate the initial
+    # means, the final variances and the final weights for each gaussian
+    # component
+    kmeans_trainer = bob.learn.em.KMeansTrainer('RANDOM_NO_DUPLICATE')
+    bob.learn.em.train(kmeans_trainer, kmeans_machine, features)
+
+    # Getting the means, weights and the variances for each cluster. This is a
+    # very good estimator for the ML
+    (variances, weights) = kmeans_machine.get_variances_and_weights_for_each_cluster(features)
+    means = kmeans_machine.means
+
+    # initialize the UBM with the output of kmeans
+    ubm.means = means
+    ubm.variances = variances
+    ubm.weights = weights
+
+    # Creating the ML Trainer. We will adapt only the means
+    trainer = bob.learn.em.ML_GMMTrainer(
+        update_means=True, update_variances=False, update_weights=False)
+    bob.learn.em.train(trainer, ubm, features)
+
+    return ubm
+
+
+def ivector_train(features, ubm):
+    """
+     Trains T matrix
+
+     **Parameters**
+       features: List of :py:class:`bob.learn.em.GMMStats`
+
+       n_gaussians: UBM (:py:class:`bob.learn.em.GMMMachine`)
+
+     """
+
+    stats = []
+    for user in features:
+        s = bob.learn.em.GMMStats(ubm.shape[0], ubm.shape[1])
+        for f in user:
+            ubm.acc_statistics(f, s)
+        stats.append(s)
+
+    subspace_dimension_of_t = 2
+
+    ivector_trainer = bob.learn.em.IVectorTrainer(update_sigma=True)
+    ivector_machine = bob.learn.em.IVectorMachine(
+        ubm, subspace_dimension_of_t, 10e-5)
+
+    # train IVector model
+    bob.learn.em.train(ivector_trainer, ivector_machine, stats, 500)
+
+    return ivector_machine
+
+
+def acc_stats(data, gmm):
+    gmm_stats = []
+    for d in data:
+        s = bob.learn.em.GMMStats(gmm.shape[0], gmm.shape[1])
+        gmm.acc_statistics(d, s)
+        gmm_stats.append(s)
+
+    return gmm_stats
+
+
+def compute_ivectors(gmm_stats, ivector_machine):
+    """
+    Given :py:class:`bob.learn.em.GMMStats` and an T matrix, get the iVectors.
+    """
+
+    ivectors = []
+    for g in gmm_stats:
+        ivectors.append(ivector_machine(g))
+
+    return numpy.array(ivectors)
+
+
+# GENERATING DATA
+data_per_class = bob.db.iris.data()
+setosa = numpy.column_stack(
+    (data_per_class['setosa'][:, 0], data_per_class['setosa'][:, 3]))
+versicolor = numpy.column_stack(
+    (data_per_class['versicolor'][:, 0], data_per_class['versicolor'][:, 3]))
+virginica = numpy.column_stack(
+    (data_per_class['virginica'][:, 0], data_per_class['virginica'][:, 3]))
+data = numpy.vstack((setosa, versicolor, virginica))
+
+# TRAINING THE PRIOR
+ubm = train_ubm(data, 3)
+ivector_machine = ivector_train([setosa, versicolor, virginica], ubm)
+
+# Variability direction U
+# t0 = T[0:2, 0] / numpy.linalg.norm(T[0:2, 0])
+# t1 = T[2:4, 0] / numpy.linalg.norm(T[2:4, 0])
+# t2 = T[4:6, 0] / numpy.linalg.norm(T[4:6, 0])
+
+
+# figure, ax = plt.subplots()
+figure = plt.subplot(2, 1, 1)
+ax = figure.axes
+plt.title("Raw fetures")
+plt.scatter(setosa[:, 0], setosa[:, 1], c="darkcyan", label="setosa")
+plt.scatter(versicolor[:, 0], versicolor[:, 1],
+            c="goldenrod", label="versicolor")
+plt.scatter(virginica[:, 0], virginica[:, 1], c="dimgrey", label="virginica")
+# plt.grid(True)
+# plt.xlabel('Sepal length')
+plt.ylabel('Petal width')
+plt.legend(loc=2)
+plt.ylim([-1, 3.5])
+plt.xticks([], [])
+plt.yticks([], [])
+
+
+figure = plt.subplot(2, 1, 2)
+ax = figure.axes
+ivector_setosa = compute_ivectors(acc_stats(setosa, ubm), ivector_machine)
+ivector_versicolor = compute_ivectors(
+    acc_stats(versicolor, ubm), ivector_machine)
+ivector_virginica = compute_ivectors(
+    acc_stats(virginica, ubm), ivector_machine)
+
+
+# Whitening iVectors
+whitening_trainer = bob.learn.linear.WhiteningTrainer()
+whitener_machine = bob.learn.linear.Machine(
+    ivector_setosa.shape[1], ivector_setosa.shape[1])
+whitening_trainer.train(numpy.vstack(
+    (ivector_setosa, ivector_versicolor, ivector_virginica)), whitener_machine)
+ivector_setosa = whitener_machine(ivector_setosa)
+ivector_versicolor = whitener_machine(ivector_versicolor)
+ivector_virginica = whitener_machine(ivector_virginica)
+
+
+# LDA ivectors
+lda_trainer = bob.learn.linear.FisherLDATrainer()
+lda_machine = bob.learn.linear.Machine(
+    ivector_setosa.shape[1], ivector_setosa.shape[1])
+lda_trainer.train([ivector_setosa, ivector_versicolor,
+                   ivector_virginica], lda_machine)
+ivector_setosa = lda_machine(ivector_setosa)
+ivector_versicolor = lda_machine(ivector_versicolor)
+ivector_virginica = lda_machine(ivector_virginica)
+
+
+# WCCN ivectors
+# wccn_trainer = bob.learn.linear.WCCNTrainer()
+# wccn_machine = bob.learn.linear.Machine(
+#     ivector_setosa.shape[1], ivector_setosa.shape[1])
+# wccn_trainer.train([ivector_setosa, ivector_versicolor,
+#                     ivector_virginica], wccn_machine)
+# ivector_setosa = wccn_machine(ivector_setosa)
+# ivector_versicolor = wccn_machine(ivector_versicolor)
+# ivector_virginica = wccn_machine(ivector_virginica)
+
+
+plt.title("First two ivectors")
+plt.scatter(ivector_setosa[:, 0],
+            ivector_setosa[:, 1], c="darkcyan", label="setosa",
+            marker="x")
+plt.scatter(ivector_versicolor[:, 0],
+            ivector_versicolor[:, 1], c="goldenrod", label="versicolor",
+            marker="x")
+plt.scatter(ivector_virginica[:, 0],
+            ivector_virginica[:, 1], c="dimgrey", label="virginica",
+            marker="x")
+
+plt.xticks([], [])
+plt.yticks([], [])
+
+# plt.grid(True)
+# plt.xlabel('Sepal length')
+# plt.ylabel('Petal width')
+plt.legend(loc=2)
+plt.ylim([-1, 3.5])
+
+plt.tight_layout()
+plt.show()
--- a/doc/plot/plot_kmeans.py
+++ b/doc/plot/plot_kmeans.py
+import bob.learn.em
+import bob.db.iris
+import numpy
+import matplotlib.pyplot as plt
+
+data_per_class = bob.db.iris.data()
+setosa = numpy.column_stack(
+    (data_per_class['setosa'][:, 0], data_per_class['setosa'][:, 3]))
+versicolor = numpy.column_stack(
+    (data_per_class['versicolor'][:, 0], data_per_class['versicolor'][:, 3]))
+virginica = numpy.column_stack(
+    (data_per_class['virginica'][:, 0], data_per_class['virginica'][:, 3]))
+
+data = numpy.vstack((setosa, versicolor, virginica))
+
+# Training KMeans
+# Two clusters with a feature dimensionality of 3
+machine = bob.learn.em.KMeansMachine(3, 2)
+trainer = bob.learn.em.KMeansTrainer()
+bob.learn.em.train(trainer, machine, data, max_iterations=200,
+                   convergence_threshold=1e-5)  # Train the KMeansMachine
+
+# Plotting
+figure, ax = plt.subplots()
+plt.scatter(setosa[:, 0],
+            setosa[:, 1], c="darkcyan", label="setosa")
+plt.scatter(versicolor[:, 0],
+            versicolor[:, 1], c="goldenrod", label="versicolor")
+plt.scatter(virginica[:, 0],
+            virginica[:, 1], c="dimgrey", label="virginica")
+plt.scatter(machine.means[:, 0],
+            machine.means[:, 1], c="blue", marker="x", label="centroids",
+            s=60)
+plt.legend()
+plt.xticks([], [])
+plt.yticks([], [])
+ax.set_xlabel("Sepal length")
+ax.set_ylabel("Petal width")
+plt.tight_layout()
--- a/doc/py_api.rst
+++ b/doc/py_api.rst
@@ -17,21 +17,21 @@ Trainers
 ........

 .. autosummary::
-  
+
  bob.learn.em.KMeansTrainer
  bob.learn.em.ML_GMMTrainer
  bob.learn.em.MAP_GMMTrainer
  bob.learn.em.ISVTrainer
-  bob.learn.em.JFATrainer  
+  bob.learn.em.JFATrainer
  bob.learn.em.IVectorTrainer
  bob.learn.em.PLDATrainer
  bob.learn.em.EMPCATrainer
-  
+
 Machines
 ........

-.. autosummary::  
-  
+.. autosummary::
+
  bob.learn.em.KMeansMachine
  bob.learn.em.Gaussian
  bob.learn.em.GMMStats
@@ -43,7 +43,7 @@ Machines
  bob.learn.em.IVectorMachine
  bob.learn.em.PLDABase
  bob.learn.em.PLDAMachine
-  
+
 Functions
 ---------
 .. autosummary::

--- a/requirements.txt
+++ b/requirements.txt
@@ -4,6 +4,6 @@ bob.blitz
 bob.core>2.0.5
 bob.io.base
 bob.sp
-bob.math
+bob.math>2.1.1
 bob.learn.activation
 bob.learn.linear
No results found