diff --git a/.gitignore b/.gitignore
index dae95ef0644140d1c14bdc82855252e51625a8de..155a08205421586e9ff5f5a80fc6957b2bd715d1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -15,5 +15,6 @@ dist
 .nfs*
 .gdb_history
 build
+.DS_Store
 *.egg
 src/
diff --git a/bob/learn/em/gmm_machine.cpp b/bob/learn/em/gmm_machine.cpp
index 061f39f7958f6b668d4640b476caadc14c4ef2eb..17a76a612c97ac04fe5ad20b234b94fc9dc2ea69 100644
--- a/bob/learn/em/gmm_machine.cpp
+++ b/bob/learn/em/gmm_machine.cpp
@@ -15,7 +15,8 @@
 
 static auto GMMMachine_doc = bob::extension::ClassDoc(
   BOB_EXT_MODULE_PREFIX ".GMMMachine",
-  "This class implements a multivariate diagonal Gaussian distribution.",
+  "This class implements the statistical model for multivariate diagonal mixture Gaussian distribution (GMM)."
+  "A GMM is defined as :math:`\\sum_{c=0}^{C} \\omega_c \\mathcal{N}(x | \\mu_c, \\sigma_c)`, where :math:`C` is the number of Gaussian components :math:`\\mu_c`, :math:`\\sigma_c` and :math:`\\omega_c` are respectively the the mean, variance and the weight of each gaussian component :math:`c`.",
   "See Section 2.3.9 of Bishop, \"Pattern recognition and machine learning\", 2006"
 ).add_constructor(
   bob::extension::FunctionDoc(
@@ -744,7 +745,7 @@ static PyObject* PyBobLearnEMGMMMachine_loglikelihood_(PyBobLearnEMGMMMachineObj
 /*** acc_statistics ***/
 static auto acc_statistics = bob::extension::FunctionDoc(
   "acc_statistics",
-  "Accumulate the GMM statistics for this sample(s). Inputs are checked.",
+  "Accumulate the GMM statistics (:py:class:`bob.learn.em.GMMStats)` for this sample(s). Inputs are checked.",
   "",
   true
 )
@@ -780,7 +781,7 @@ static PyObject* PyBobLearnEMGMMMachine_accStatistics(PyBobLearnEMGMMMachineObje
 /*** acc_statistics_ ***/
 static auto acc_statistics_ = bob::extension::FunctionDoc(
   "acc_statistics_",
-  "Accumulate the GMM statistics for this sample(s). Inputs are NOT checked.",
+  "Accumulate the GMM statistics (:py:class:`bob.learn.em.GMMStats)` for this sample(s). Inputs are NOT checked.",
   "",
   true
 )
@@ -853,7 +854,7 @@ static PyObject* PyBobLearnEMGMMMachine_setVarianceThresholds_method(PyBobLearnE
 /*** get_gaussian ***/
 static auto get_gaussian = bob::extension::FunctionDoc(
   "get_gaussian",
-  "Get the specified Gaussian component.",
+  "Get the specified Gaussian (:py:class:`bob.learn.em.Gaussian`) component.",
   ".. note:: An exception is thrown if i is out of range.",
   true
 )
diff --git a/bob/learn/em/include/bob.learn.em/KMeansTrainer.h b/bob/learn/em/include/bob.learn.em/KMeansTrainer.h
index 8845b50984d28a83d47483f122b62a0e57f6ac6e..58bf1bedaff51a3413c5b75cf52a1314e7626a52 100644
--- a/bob/learn/em/include/bob.learn.em/KMeansTrainer.h
+++ b/bob/learn/em/include/bob.learn.em/KMeansTrainer.h
@@ -40,7 +40,7 @@ class KMeansTrainer
     /**
      * @brief Constructor
      */
-    KMeansTrainer(InitializationMethod=RANDOM);
+    KMeansTrainer(InitializationMethod=RANDOM_NO_DUPLICATE);
 
     /**
      * @brief Virtualize destructor
diff --git a/bob/learn/em/isv_base.cpp b/bob/learn/em/isv_base.cpp
index a3f09559a52c1e27a9a8f962db7c76c274a10e98..ac6947422ac5c0c3c7db172d3d401146793a5fa9 100644
--- a/bob/learn/em/isv_base.cpp
+++ b/bob/learn/em/isv_base.cpp
@@ -184,7 +184,7 @@ static auto supervector_length = bob::extension::VariableDoc(
   "int",
   "Returns the supervector length.",
   "NGaussians x NInputs: Number of Gaussian components by the feature dimensionality"
-  "WARNING An exception is thrown if no Universal Background Model has been set yet."
+  "An exception is thrown if no Universal Background Model has been set yet."
   ""
 );
 PyObject* PyBobLearnEMISVBase_getSupervectorLength(PyBobLearnEMISVBaseObject* self, void*) {
diff --git a/bob/learn/em/isv_trainer.cpp b/bob/learn/em/isv_trainer.cpp
index 02f453bbbc1715bdc2375d4c42aa36ee8f7836b0..dbf310d1ee27aa999ad019aa27fef29d720b4036 100644
--- a/bob/learn/em/isv_trainer.cpp
+++ b/bob/learn/em/isv_trainer.cpp
@@ -88,7 +88,7 @@ int list_as_vector(PyObject* list, std::vector<blitz::Array<double,N> >& vec)
 static auto ISVTrainer_doc = bob::extension::ClassDoc(
   BOB_EXT_MODULE_PREFIX ".ISVTrainer",
   "ISVTrainer"
-  "References: [Vogt2008,McCool2013]",
+  "Train Intersession varibility modeling :ref:`ISV <isv>`.",
   ""
 ).add_constructor(
   bob::extension::FunctionDoc(
diff --git a/bob/learn/em/ivector_machine.cpp b/bob/learn/em/ivector_machine.cpp
index da4de2d89526d8d099431d219568e596d09ee328..b101be1debb3fa77b2d23399b25d6ee633ce5899 100644
--- a/bob/learn/em/ivector_machine.cpp
+++ b/bob/learn/em/ivector_machine.cpp
@@ -15,8 +15,8 @@
 
 static auto IVectorMachine_doc = bob::extension::ClassDoc(
   BOB_EXT_MODULE_PREFIX ".IVectorMachine",
-  "An IVectorMachine consists of a Total Variability subspace :math:`T` and allows the extraction of IVector"
-  "References: [Dehak2010]_",
+  "Statistical model for the Total Variability training (:ref:`iVectors <ivector>`)"
+  "",
   ""
 ).add_constructor(
   bob::extension::FunctionDoc(
@@ -189,7 +189,7 @@ static auto supervector_length = bob::extension::VariableDoc(
 
   "Returns the supervector length.",
   "NGaussians x NInputs: Number of Gaussian components by the feature dimensionality"
-  "@warning An exception is thrown if no Universal Background Model has been set yet."
+  "An exception is thrown if no Universal Background Model has been set yet."
   ""
 );
 PyObject* PyBobLearnEMIVectorMachine_getSupervectorLength(PyBobLearnEMIVectorMachineObject* self, void*) {
diff --git a/bob/learn/em/ivector_trainer.cpp b/bob/learn/em/ivector_trainer.cpp
index 33193af77d2eb791cc152c00a3ae99996d2b3c1d..96f2c3af971959710c26fea302f9556321433e14 100644
--- a/bob/learn/em/ivector_trainer.cpp
+++ b/bob/learn/em/ivector_trainer.cpp
@@ -36,9 +36,9 @@ static int extract_GMMStats_1d(PyObject *list,
 static auto IVectorTrainer_doc = bob::extension::ClassDoc(
   BOB_EXT_MODULE_PREFIX ".IVectorTrainer",
   "IVectorTrainer"
-  "An IVectorTrainer to learn a Total Variability subspace :math:`$T$`"
-  " (and eventually a covariance matrix :math:`$\\Sigma$`).",
-  " References: [Dehak2010]"
+  "Trains the Total Variability subspace :math:`$T$` to generate :ref:`iVectors <ivector>`."
+  "",
+  ""
 ).add_constructor(
   bob::extension::FunctionDoc(
     "__init__",
diff --git a/bob/learn/em/jfa_base.cpp b/bob/learn/em/jfa_base.cpp
index 8a92d65405cf438c36bf95454df8fea52dba9058..aeec4a0367f758e18ba109bd13473afc25cea9cf 100644
--- a/bob/learn/em/jfa_base.cpp
+++ b/bob/learn/em/jfa_base.cpp
@@ -15,8 +15,8 @@
 
 static auto JFABase_doc = bob::extension::ClassDoc(
   BOB_EXT_MODULE_PREFIX ".JFABase",
-  "A JFABase instance can be seen as a container for :math:`U`, :math:`V` and :math:`D` when performing Joint Factor Analysis (JFA).\n\n"
-  "References: [Vogt2008]_ [McCool2013]_",
+  "Container for :math:`U`, :math:`V` and :math:`D` when performing Joint Factor Analysis (:ref:`JFA <jfa>`).\n\n"
+  "",
   ""
 ).add_constructor(
   bob::extension::FunctionDoc(
@@ -192,7 +192,7 @@ static auto supervector_length = bob::extension::VariableDoc(
 
   "Returns the supervector length.",
   "NGaussians x NInputs: Number of Gaussian components by the feature dimensionality"
-  "@warning An exception is thrown if no Universal Background Model has been set yet."
+  "An exception is thrown if no Universal Background Model has been set yet."
   ""
 );
 PyObject* PyBobLearnEMJFABase_getSupervectorLength(PyBobLearnEMJFABaseObject* self, void*) {
diff --git a/bob/learn/em/jfa_trainer.cpp b/bob/learn/em/jfa_trainer.cpp
index d8ed5d78d9151b8f0eaf6c9d382d781a2bb7f111..5d9abadc96821d28cec456dc20b1ef49c5364376 100644
--- a/bob/learn/em/jfa_trainer.cpp
+++ b/bob/learn/em/jfa_trainer.cpp
@@ -87,8 +87,8 @@ int list_as_vector(PyObject* list, std::vector<blitz::Array<double,N> >& vec)
 
 static auto JFATrainer_doc = bob::extension::ClassDoc(
   BOB_EXT_MODULE_PREFIX ".JFATrainer",
-  "JFATrainer"
-  "References: [Vogt2008,McCool2013]",
+  "Trains a Joint Factor Analysis (:ref:`JFA <jfa>`) on top of GMMs"
+  "",
   ""
 ).add_constructor(
   bob::extension::FunctionDoc(
diff --git a/bob/learn/em/kmeans_machine.cpp b/bob/learn/em/kmeans_machine.cpp
index 1abad646ceb9e2ef7a1ffcd2bbb44f7b022c1718..416a84b6268d368757877b9c90e5a9f5fd6877e8 100644
--- a/bob/learn/em/kmeans_machine.cpp
+++ b/bob/learn/em/kmeans_machine.cpp
@@ -15,7 +15,7 @@
 
 static auto KMeansMachine_doc = bob::extension::ClassDoc(
   BOB_EXT_MODULE_PREFIX ".KMeansMachine",
-  "This class implements a k-means classifier.\n"
+  "Statistical model for the :ref:`k-means <kmeans>` .\n"
   "See Section 9.1 of Bishop, \"Pattern recognition and machine learning\", 2006"
 ).add_constructor(
   bob::extension::FunctionDoc(
diff --git a/bob/learn/em/kmeans_trainer.cpp b/bob/learn/em/kmeans_trainer.cpp
index 6d4ef677c0a97aaf81ca9e87d5bfc2ffbaa2ac31..f06dc4279769b5a0d40c0cd798c83c3ee09b86be 100644
--- a/bob/learn/em/kmeans_trainer.cpp
+++ b/bob/learn/em/kmeans_trainer.cpp
@@ -42,8 +42,8 @@ static inline const std::string& IM2string(bob::learn::em::KMeansTrainer::Initia
 
 static auto KMeansTrainer_doc = bob::extension::ClassDoc(
   BOB_EXT_MODULE_PREFIX ".KMeansTrainer",
-  "Trains a KMeans machine."
-  "This class implements the expectation-maximization algorithm for a k-means machine."
+  "Trains a KMeans clustering :ref:`k-means <kmeans>.`"
+  "This class implements the expectation-maximization algorithm for a k-means."
   "See Section 9.1 of Bishop, \"Pattern recognition and machine learning\", 2006"
   "It uses a random initialization of the means followed by the expectation-maximization algorithm"
 ).add_constructor(
diff --git a/bob/learn/em/linear_scoring.cpp b/bob/learn/em/linear_scoring.cpp
index c475ff0cd9e0128450bf859ff461cb88b679e24e..f29b59370b12ef35635c78ec6a267dec756f8ac9 100644
--- a/bob/learn/em/linear_scoring.cpp
+++ b/bob/learn/em/linear_scoring.cpp
@@ -71,6 +71,7 @@ static inline bool f(PyObject* o){return o != 0 && PyObject_IsTrue(o) > 0;}
 /*** linear_scoring ***/
 bob::extension::FunctionDoc linear_scoring1 = bob::extension::FunctionDoc(
   "linear_scoring",
+  "The :ref:`Linear scoring <linearscoring>` is an approximation to the log-likelihood ratio that was shown to be as accurate and up to two orders of magnitude more efficient to compute [Glembek2009]_."
   "",
   0,
   true
diff --git a/bob/learn/em/map_gmm_trainer.cpp b/bob/learn/em/map_gmm_trainer.cpp
index 767e283f96b65cf06e1ba515dbe18c49f7b388db..ed4b6ed759e866a940f2064cabcfc47e636ddc32 100644
--- a/bob/learn/em/map_gmm_trainer.cpp
+++ b/bob/learn/em/map_gmm_trainer.cpp
@@ -17,7 +17,7 @@ static inline bool f(PyObject* o){return o != 0 && PyObject_IsTrue(o) > 0;}  /*
 
 static auto MAP_GMMTrainer_doc = bob::extension::ClassDoc(
   BOB_EXT_MODULE_PREFIX ".MAP_GMMTrainer",
-  "This class implements the maximum a posteriori M-step of the expectation-maximization algorithm for a GMM Machine. The prior parameters are encoded in the form of a GMM (e.g. a universal background model). The EM algorithm thus performs GMM adaptation."
+  "This class implements the maximum a posteriori (:ref:`MAP <map>`) M-step of the expectation-maximization algorithm for a GMM Machine. The prior parameters are encoded in the form of a GMM (e.g. a universal background model). The EM algorithm thus performs GMM adaptation."
 ).add_constructor(
   bob::extension::FunctionDoc(
     "__init__",
diff --git a/bob/learn/em/ml_gmm_trainer.cpp b/bob/learn/em/ml_gmm_trainer.cpp
index 708f9345bf960cfd5ad818c375497aadf75e5bbe..734fe2b9e910702519ef6129830e1a38df1e309a 100644
--- a/bob/learn/em/ml_gmm_trainer.cpp
+++ b/bob/learn/em/ml_gmm_trainer.cpp
@@ -17,7 +17,7 @@ static inline bool f(PyObject* o){return o != 0 && PyObject_IsTrue(o) > 0;}  /*
 
 static auto ML_GMMTrainer_doc = bob::extension::ClassDoc(
   BOB_EXT_MODULE_PREFIX ".ML_GMMTrainer",
-  "This class implements the maximum likelihood M-step of the expectation-maximisation algorithm for a GMM Machine."
+  "This class implements the maximum likelihood M-step (:ref:`MLE <mle>`) of the expectation-maximisation algorithm for a GMM Machine."
 ).add_constructor(
   bob::extension::FunctionDoc(
     "__init__",
diff --git a/bob/learn/em/ztnorm.cpp b/bob/learn/em/ztnorm.cpp
index adc8a6f68daaa7e81d85b928b4be0e9fa1054b35..6b1c9c03cc5a80ba6cf497a3a617ea56a9d42d04 100644
--- a/bob/learn/em/ztnorm.cpp
+++ b/bob/learn/em/ztnorm.cpp
@@ -12,7 +12,7 @@
 /*** zt_norm ***/
 bob::extension::FunctionDoc zt_norm = bob::extension::FunctionDoc(
   "ztnorm",
-  "Normalise raw scores with ZT-Norm."
+  "Normalise raw scores with :ref:`ZT-Norm <ztnorm>`."
   "Assume that znorm and tnorm have no common subject id.",
   0,
   true
@@ -72,7 +72,7 @@ PyObject* PyBobLearnEM_ztNorm(PyObject*, PyObject* args, PyObject* kwargs) {
 /*** t_norm ***/
 bob::extension::FunctionDoc t_norm = bob::extension::FunctionDoc(
   "tnorm",
-  "Normalise raw scores with T-Norm",
+  "Normalise raw scores with :ref:`T-Norm <tnorm>`",
   0,
   true
 )
@@ -109,7 +109,7 @@ PyObject* PyBobLearnEM_tNorm(PyObject*, PyObject* args, PyObject* kwargs) {
 /*** z_norm ***/
 bob::extension::FunctionDoc z_norm = bob::extension::FunctionDoc(
   "znorm",
-  "Normalise raw scores with Z-Norm",
+  "Normalise raw scores with :ref:`Z-Norm <znorm>`",
   0,
   true
 )
diff --git a/doc/conf.py b/doc/conf.py
old mode 100644
new mode 100755
index 9fbe77d7e30b920a2902997ec9af077a22168a9a..c297958fd2d1b29b95fdb9d52efaa7a78877fda6
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -25,6 +25,7 @@ extensions = [
     'sphinx.ext.intersphinx',
     'sphinx.ext.napoleon',
     'sphinx.ext.viewcode',
+    'matplotlib.sphinxext.plot_directive'
     ]
 
 import sphinx
diff --git a/doc/guide.rst b/doc/guide.rst
index 95fc5160cada4701feb9f635a53d335569bfa031..9ca31ec0065a8027c9812736eba0089f80d84a74 100644
--- a/doc/guide.rst
+++ b/doc/guide.rst
@@ -1,6 +1,6 @@
 .. vim: set fileencoding=utf-8 :
-.. Laurent El Shafey <Laurent.El-Shafey@idiap.ch>
-.. Wed Mar 14 12:31:35 2012 +0100
+.. Tiago de Freitas Pereira <tiago.pereira@idiap.ch>
+.. Sat May 13 11:40:35 2012 PST
 ..
 .. Copyright (C) 2011-2014 Idiap Research Institute, Martigny, Switzerland
 
@@ -24,668 +24,476 @@
 This section includes the machine/trainer guides for learning techniques
 available in this package.
 
-Machines
---------
 
-Machines are one of the core components of |project|. They represent
-statistical models or other functions defined by parameters that can be learnt
-or set by using Trainers.
+K-Means
+-------
+.. _kmeans:
 
-K-means machines
-================
+**k-means** [7]_ is a clustering method which aims to partition a set of :math:`N` observations into
+:math:`C` clusters with equal variance minimizing the following cost function
+:math:`J = \sum_{i=0}^{N} \min_{\mu_j \in C} ||x_i - \mu_j||`, where :math:`\mu` is a given mean (also called centroid) and
+:math:`x_i` is an observation.
 
-`k-means <http://en.wikipedia.org/wiki/K-means_clustering>`_ is a clustering
-method which aims to partition a set of observations into :math:`k` clusters.
-The `training` procedure is described further below. Here, we explain only how
-to use the resulting machine. For the sake of example, we create a new
-:py:class:`bob.learn.em.KMeansMachine` as follows:
+This implementation has two stopping criterias.
+The first one is when the maximum number of iterations is reached; the second one is when the difference between
+:math:`Js` of successive iterations are lower than a convergence threshold.
 
-.. doctest::
-   :options: +NORMALIZE_WHITESPACE
 
-   >>> machine = bob.learn.em.KMeansMachine(2,3) # Two clusters with a feature dimensionality of 3
-   >>> machine.means = numpy.array([[1,0,0],[0,0,1]], 'float64') # Defines the two clusters
+In this implementation, the training consists in the definition of the statistical model, called machine,
+(:py:class:`bob.learn.em.KMeansMachine`) and this statistical model is learnt via a trainer (:py:class:`bob.learn.em.KMeansTrainer`).
 
-Then, given some input data, it is possible to determine to which cluster the
-data is the closest as well as the min distance.
+Follow bellow an snippet on how to train a KMeans using Bob.
 
 .. doctest::
    :options: +NORMALIZE_WHITESPACE
 
-   >>> sample = numpy.array([2,1,-2], 'float64')
-   >>> print(machine.get_closest_mean(sample)) # Returns the index of the closest mean and the distance to it at the power of 2
-   (0, 6.0)
-
-
-Gaussian machines
-=================
-
-The :py:class:`bob.learn.em.Gaussian` represents a `multivariate diagonal
-Gaussian (or normal) distribution
-<http://en.wikipedia.org/wiki/Multivariate_normal_distribution>`_. In this
-context, a *diagonal* Gaussian refers to the covariance matrix of the
-distribution being diagonal. When the covariance matrix is diagonal, each
-variable in the distribution is independent of the others.
-
-Objects of this class are normally used as building blocks for more complex
-:py:class:`bob.learn.em.GMMMachine` or GMM objects, but can also be used
-individually. Here is how to create one multivariate diagonal Gaussian
-distribution:
-
-.. doctest::
+   >>> import bob.learn.em
+   >>> import numpy
+   >>> data = numpy.array([[3,-3,100], [4,-4,98], [3.5,-3.5,99], [-7,7,-100], [-5,5,-101]], dtype='float64')
+   >>> kmeans_machine = bob.learn.em.KMeansMachine(2, 3) # Create a kmeans m with k=2 clusters with a dimensionality equal to 3
+   >>> kmeans_trainer = bob.learn.em.KMeansTrainer()
+   >>> max_iterations = 200
+   >>> convergence_threshold = 1e-5
+   >>> bob.learn.em.train(kmeans_trainer, kmeans_machine, data, max_iterations = max_iterations, convergence_threshold = convergence_threshold) # Train the KMeansMachine
+   >>> print(kmeans_machine.means)
+   [[ -6.   6.  -100.5]
+    [  3.5 -3.5   99. ]]
 
-  >>> g = bob.learn.em.Gaussian(2) #bi-variate diagonal normal distribution
-  >>> g.mean = numpy.array([0.3, 0.7], 'float64')
-  >>> g.mean
-  array([ 0.3,  0.7])
-  >>> g.variance = numpy.array([0.2, 0.1], 'float64')
-  >>> g.variance
-  array([ 0.2,  0.1])
 
-Once the :py:class:`bob.learn.em.Gaussian` has been set, you can use it to
-estimate the log-likelihood of an input feature vector with a matching number
-of dimensions:
+Bellow follow an intuition of a kmeans training using the Iris flower `dataset <https://en.wikipedia.org/wiki/Iris_flower_data_set>`_.
 
-.. doctest::
+.. plot:: plot/plot_kmeans.py
+   :include-source: False
 
-  >>> log_likelihood = g(numpy.array([0.4, 0.4], 'float64'))
 
-As with other machines you can save and re-load machines of this type using
-:py:meth:`bob.learn.em.Gaussian.save` and the class constructor
-respectively.
 
 Gaussian mixture models
-=======================
+-----------------------
 
-The :py:class:`bob.learn.em.GMMMachine` represents a Gaussian `mixture model
-<http://en.wikipedia.org/wiki/Mixture_model>`_ (GMM), which consists of a
-mixture of weighted :py:class:`bob.learn.em.Gaussian`\s.
 
-.. doctest::
+A Gaussian mixture model (`GMM <http://en.wikipedia.org/wiki/Mixture_model>`_) is a probabilistic model for density estimation.
+It assumes that all the data points are generated from a mixture of a finite number of Gaussian distributions.
+More formally, a GMM can be defined as: :math:`P(x|\Theta) = \sum_{c=0}^{C} \omega_c \mathcal{N}(x | \mu_c, \sigma_c)`,
+where :math:`\Theta = \{ \omega_c, \mu_c, \sigma_c \}`.
 
-  >>> gmm = bob.learn.em.GMMMachine(2,3) # Mixture of two diagonal Gaussian of dimension 3
-
-By default, the diagonal Gaussian distributions of the GMM are initialized with
-zero mean and unit variance, and the weights are identical. This can be updated
-using the :py:attr:`bob.learn.em.GMMMachine.means`,
-:py:attr:`bob.learn.em.GMMMachine.variances` or
-:py:attr:`bob.learn.em.GMMMachine.weights`.
+Bob defines this statistical model by the class :py:class:`bob.learn.em.GMMMachine` as bellow.
 
 .. doctest::
-  :options: +NORMALIZE_WHITESPACE
-
-  >>> gmm.weights = numpy.array([0.4, 0.6], 'float64')
-  >>> gmm.means = numpy.array([[1, 6, 2], [4, 3, 2]], 'float64')
-  >>> gmm.variances = numpy.array([[1, 2, 1], [2, 1, 2]], 'float64')
-  >>> gmm.means
-  array([[ 1.,  6.,  2.],
-       [ 4.,  3.,  2.]])
+   :options: +NORMALIZE_WHITESPACE
 
-Once the :py:class:`bob.learn.em.GMMMachine` has been set, you can use it to
-estimate the log-likelihood of an input feature vector with a matching number
-of dimensions:
+   >>> import bob.learn.em
+   >>> # Create a GMM with k=2 clusters with a dimensionality equal to 3
+   >>> gmm_machine = bob.learn.em.GMMMachine(2, 3)
 
-.. doctest::
 
-  >>> log_likelihood = gmm(numpy.array([5.1, 4.7, -4.9], 'float64'))
+There are plenty of ways to estimate :math:`\Theta`; the next subsections explains some that are implemented in Bob.
 
-As with other machines you can save and re-load machines of this type using
-:py:meth:`bob.learn.em.GMMMachine.save` and the class constructor respectively.
 
-Gaussian mixture models Statistics
+Maximum likelihood Estimator (MLE)
 ==================================
+.. _mle:
 
-The :py:class:`bob.learn.em.GMMStats` is a container for the sufficient
-statistics of a GMM distribution.
-
-Given a GMM, the sufficient statistics of a sample can be computed as
-follows:
-
-.. doctest::
-  :options: +NORMALIZE_WHITESPACE
-
-  >>> gs = bob.learn.em.GMMStats(2,3)
-  >>> sample = numpy.array([0.5, 4.5, 1.5])
-  >>> gmm.acc_statistics(sample, gs)
-  >>> print(gs) # doctest: +SKIP
-
-Then, the sufficient statistics can be accessed (or set as below), by
-considering the following attributes.
-
-.. doctest::
-  :options: +NORMALIZE_WHITESPACE
-
-  >>> gs = bob.learn.em.GMMStats(2,3)
-  >>> log_likelihood = -3. # log-likelihood of the accumulated samples
-  >>> T = 1 # Number of samples used to accumulate statistics
-  >>> n = numpy.array([0.4, 0.6], 'float64') # zeroth order stats
-  >>> sumpx = numpy.array([[1., 2., 3.], [4., 5., 6.]], 'float64') # first order stats
-  >>> sumpxx = numpy.array([[10., 20., 30.], [40., 50., 60.]], 'float64') # second order stats
-  >>> gs.log_likelihood = log_likelihood
-  >>> gs.t = T
-  >>> gs.n = n
-  >>> gs.sum_px = sumpx
-  >>> gs.sum_pxx = sumpxx
-
-Joint Factor Analysis
-=====================
-
-Joint Factor Analysis (JFA) [1]_ [2]_ is a session variability modelling
-technique built on top of the Gaussian mixture modelling approach. It utilises
-a within-class subspace :math:`U`, a between-class subspace :math:`V`, and a
-subspace for the residuals :math:`D` to capture and suppress a significant
-portion of between-class variation.
-
-An instance of :py:class:`bob.learn.em.JFABase` carries information about
-the matrices :math:`U`, :math:`V` and :math:`D`, which can be shared between
-several classes.  In contrast, after the enrollment phase, an instance of
-:py:class:`bob.learn.em.JFAMachine` carries class-specific information about
-the latent variables :math:`y` and :math:`z`.
-
-An instance of :py:class:`bob.learn.em.JFABase` can be initialized as
-follows, given an existing GMM:
-
-.. doctest::
-  :options: +NORMALIZE_WHITESPACE
-
-  >>> jfa_base = bob.learn.em.JFABase(gmm,2,2) # dimensions of U and V are both equal to 2
-  >>> U = numpy.array([[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], [11, 12]], 'float64')
-  >>> V = numpy.array([[6, 5], [4, 3], [2, 1], [1, 2], [3, 4], [5, 6]], 'float64')
-  >>> d = numpy.array([0, 1, 0, 1, 0, 1], 'float64')
-  >>> jfa_base.u = U
-  >>> jfa_base.v = V
-  >>> jfa_base.d = d
-
-Next, this :py:class:`bob.learn.em.JFABase` can be shared by several
-instances of :py:class:`bob.learn.em.JFAMachine`, the initialization being
-as follows:
-
-.. doctest::
-  :options: +NORMALIZE_WHITESPACE
-
-  >>> m = bob.learn.em.JFAMachine(jfa_base)
-  >>> m.y = numpy.array([1,2], 'float64')
-  >>> m.z = numpy.array([3,4,1,2,0,1], 'float64')
-
-
-Once the :py:class:`bob.learn.em.JFAMachine` has been configured for a
-specific class, the log-likelihood (score) that an input sample belongs to the
-enrolled class, can be estimated, by first computing the GMM sufficient
-statistics of this input sample, and then calling the
-:py:meth:`bob.learn.em.JFAMachine.log_likelihood` on the sufficient statistics.
-
-.. doctest::
-  :options: +NORMALIZE_WHITESPACE
-
-  >>> gs = bob.learn.em.GMMStats(2,3)
-  >>> gmm.acc_statistics(sample, gs)
-  >>> score = m(gs)
-
-As with other machines you can save and re-load machines of this type using
-:py:meth:`bob.learn.em.JFAMachine.save` and the class constructor
-respectively.
-
-
-Inter-Session Variability
-=========================
-
-Similarly to Joint Factor Analysis, Inter-Session Variability (ISV) modelling
-[3]_ [2]_ is another session variability modelling technique built on top of
-the Gaussian mixture modelling approach. It utilises a within-class subspace
-:math:`U` and a subspace for the residuals :math:`D` to capture and suppress a
-significant portion of between-class variation. The main difference compared to
-JFA is the absence of the between-class subspace :math:`V`.
-
-Similarly to JFA, an instance of :py:class:`bob.learn.em.JFABase` carries
-information about the matrices :math:`U` and :math:`D`, which can be shared
-between several classes, whereas an instance of
-:py:class:`bob.learn.em.JFAMachine` carries class-specific information about
-the latent variable :math:`z`.
-
-An instance of :py:class:`bob.learn.em.ISVBase` can be initialized as
-follows, given an existing GMM:
-
-.. doctest::
-  :options: +NORMALIZE_WHITESPACE
-
-  >>> isv_base = bob.learn.em.ISVBase(gmm,2) # dimension of U is equal to 2
-  >>> isv_base.u = U
-  >>> isv_base.d = d
-
-Next, this :py:class:`bob.learn.em.ISVBase` can be shared by several
-instances of :py:class:`bob.learn.em.ISVMachine`, the initialization being
-as follows:
+In statistics, maximum likelihood estimation (MLE) is a method of estimating the parameters of a statistical model given observations
+by finding the :math:`\Theta` that maximizes :math:`P(x|\Theta)` for all :math:`x` in your dataset [10]_.
+This optimization is done by the **Expectation-Maximization** (EM) algorithm [8]_ and
+it is implemented by :py:class:`bob.learn.em.ML_GMMTrainer`.
 
-.. doctest::
-  :options: +NORMALIZE_WHITESPACE
-
-  >>> m = bob.learn.em.ISVMachine(isv_base)
-  >>> m.z = numpy.array([3,4,1,2,0,1], 'float64')
-
-Once the :py:class:`bob.learn.em.ISVMachine` has been configured for a
-specific class, the log-likelihood (score) that an input sample belongs to the
-enrolled class, can be estimated, by first computing the GMM sufficient
-statistics of this input sample, and then calling the
-``__call__`` on the sufficient statistics.
-
-.. doctest::
-  :options: +NORMALIZE_WHITESPACE
-
-  >>> gs = bob.learn.em.GMMStats(2,3)
-  >>> gmm.acc_statistics(sample, gs)
-  >>> score = m(gs)
-
-As with other machines you can save and re-load machines of this type using
-:py:meth:`bob.learn.em.ISVMachine.save` and the class constructor
-respectively.
+A very nice explanation of EM algorithm for the maximum likelihood estimation can be found in the `Mathematical Monk <https://www.youtube.com/watch?v=AnbiNaVp3eQ>`_
+youtube channel.
 
+Follow bellow an snippet on how to train a GMM using the maximum likelihood estimator.
 
-Total Variability (i-vectors)
-=============================
-
-Total Variability (TV) modelling [4]_ is a front-end initially introduced for
-speaker recognition, which aims at describing samples by vectors of low
-dimensionality called ``i-vectors``. The model consists of a subspace :math:`T`
-and a residual diagonal covariance matrix :math:`\Sigma`, that are then used to
-extract i-vectors, and is built upon the GMM approach.
-
-An instance of the class :py:class:`bob.learn.em.IVectorMachine` carries
-information about these two matrices. This can be initialized as follows:
-
-.. doctest::
-  :options: +NORMALIZE_WHITESPACE
-
-  >>> m = bob.learn.em.IVectorMachine(gmm, 2)
-  >>> m.t = numpy.array([[1.,2],[4,1],[0,3],[5,8],[7,10],[11,1]])
-  >>> m.sigma = numpy.array([1.,2.,1.,3.,2.,4.])
-
-
-Once the :py:class:`bob.learn.em.IVectorMachine` has been set, the
-extraction of an i-vector :math:`w_{ij}` can be done in two steps, by first
-extracting the GMM sufficient statistics, and then estimating the i-vector:
-
-.. doctest::
-  :options: +NORMALIZE_WHITESPACE
-
-  >>> gs = bob.learn.em.GMMStats(2,3)
-  >>> gmm.acc_statistics(sample, gs)
-  >>> w_ij = m(gs)
-
-As with other machines you can save and re-load machines of this type using
-:py:meth:`bob.learn.em.IVectorMachine.save` and the class constructor
-respectively.
-
-
-Probabilistic Linear Discriminant Analysis (PLDA)
-=================================================
-
-Probabilistic Linear Discriminant Analysis [5]_ [6]_ is a probabilistic model
-that incorporates components describing both between-class and within-class
-variations. Given a mean :math:`\mu`, between-class and within-class subspaces
-:math:`F` and :math:`G` and residual noise :math:`\epsilon` with zero mean and
-diagonal covariance matrix :math:`\Sigma`, the model assumes that a sample
-:math:`x_{i,j}` is generated by the following process:
-
-.. math::
-
-   x_{i,j} = \mu + F h_{i} + G w_{i,j} + \epsilon_{i,j}
-
-Information about a PLDA model (:math:`\mu`, :math:`F`, :math:`G` and
-:math:`\Sigma`) are carried out by an instance of the class
-:py:class:`bob.learn.em.PLDABase`.
-
-.. doctest::
-
-   >>> ### This creates a PLDABase container for input feature of dimensionality 3,
-   >>> ### and with subspaces F and G of rank 1 and 2 respectively.
-   >>> pldabase = bob.learn.em.PLDABase(3,1,2)
-
-Class-specific information (usually from enrollment samples) are contained in
-an instance of :py:class:`bob.learn.em.PLDAMachine`, that must be attached
-to a given :py:class:`bob.learn.em.PLDABase`. Once done, log-likelihood
-computations can be performed.
-
-.. doctest::
-
-   >>> plda = bob.learn.em.PLDAMachine(pldabase)
-   >>> samples = numpy.array([[3.5,-3.4,102], [4.5,-4.3,56]], dtype=numpy.float64)
-   >>> loglike = plda.compute_log_likelihood(samples)
-
-
-Trainers
---------
-
-In the previous section, the concept of a `machine` was introduced. A `machine`
-is fed by some input data, processes it and returns an output. Machines can be
-learnt using trainers in |project|.
-
-Expectation Maximization
-========================
-
-Each one of the following trainers has their own `initialize`, `eStep` and `mStep` methods in order to train the respective machines.
-For example, to train a K-Means with 10 iterations you can use the following steps.
 
 .. doctest::
    :options: +NORMALIZE_WHITESPACE
 
-   >>> data           = numpy.array([[3,-3,100], [4,-4,98], [3.5,-3.5,99], [-7,7,-100], [-5,5,-101]], dtype='float64') #Data
-   >>> kmeans_machine = bob.learn.em.KMeansMachine(2, 3) # Create a machine with k=2 clusters with a dimensionality equal to 3
-   >>> kmeans_trainer = bob.learn.em.KMeansTrainer() #Creating the k-means machine
-   >>> max_iterations = 10
-   >>> kmeans_trainer.initialize(kmeans_machine, data) #Initilizing the means with random values
-   >>> for i in range(max_iterations):
-   ...   kmeans_trainer.e_step(kmeans_machine, data)
-   ...   kmeans_trainer.m_step(kmeans_machine, data)
-   >>> print(kmeans_machine.means)
-   [[  -6.     6.  -100.5]
-   [   3.5   -3.5   99. ]]
+   >>> import bob.learn.em
+   >>> import numpy
+   >>>
+   >>> data = numpy.array([[3,-3,100], [4,-4,98], [3.5,-3.5,99], [-7,7,-100], [-5,5,-101]], dtype='float64')
+   >>> # Create a kmeans model (machine) m with k=2 clusters with a dimensionality equal to 3
+   >>> gmm_machine = bob.learn.em.GMMMachine(2, 3)
+   >>> # Using the MLE trainer to train the GMM
+   >>> gmm_trainer = bob.learn.em.ML_GMMTrainer(True, True, True) # update means/variances/weights at each iteration
+   >>> # Setting some means to start the training.
+   >>> # In practice, the output of kmeans is a good start for the MLE training
+   >>> gmm_machine.means = numpy.array([[ -4.,   2.3,  -10.5], [  2.5, -4.5,   59. ]])
+   >>> max_iterations = 200
+   >>> convergence_threshold = 1e-5
+   >>> # Training
+   >>> bob.learn.em.train(gmm_trainer, gmm_machine, data, max_iterations = max_iterations, convergence_threshold = convergence_threshold) # Train the KMeansMachine
+   >>> print(gmm_machine.means)
+   [[ -6.   6.  -100.5]
+    [  3.5 -3.5   99. ]]
 
+Bellow follow an intuition of the GMM trained the maximum likelihood estimator using the Iris flower `dataset <https://en.wikipedia.org/wiki/Iris_flower_data_set>`_.
 
-With that granularity you can train your K-Means (or any trainer procedure) with your own convergence criteria.
-Furthermore, to make the things even simpler, it is possible to train the K-Means (and have the same example as above) using the wrapper :py:class:`bob.learn.em.train` as in the example below:
+.. plot:: plot/plot_ML.py
+   :include-source: False
 
-.. doctest::
-   :options: +NORMALIZE_WHITESPACE
 
-   >>> data           = numpy.array([[3,-3,100], [4,-4,98], [3.5,-3.5,99], [-7,7,-100], [-5,5,-101]], dtype='float64') #Data
-   >>> kmeans_machine = bob.learn.em.KMeansMachine(2, 3) # Create a machine with k=2 clusters with a dimensionality equal to 3
-   >>> kmeans_trainer = bob.learn.em.KMeansTrainer() #Creating the k-means machine
-   >>> max_iterations = 10
-   >>> bob.learn.em.train(kmeans_trainer, kmeans_machine, data, max_iterations = 10) #wrapper for the em trainer
-   >>> print(kmeans_machine.means)
-   [[  -6.     6.  -100.5]
-   [   3.5   -3.5   99. ]]
+Maximum a posteriori Estimator (MAP)
+====================================
+.. _map:
 
+Closely related to the MLE, Maximum a posteriori probability (MAP) is an estimate that equals the mode of the posterior distribution
+by incorporating in its loss function a prior distribution [11]_.
+Commonly this prior distribution (the values of :math:`\Theta`) is estimated with MLE.
+This optimization is done by the **Expectation-Maximization** (EM) algorithm [8]_ and
+it is implemented by :py:class:`bob.learn.em.MAP_GMMTrainer`.
 
+A compact way to write relevance MAP adaptation is by using GMM supervector notation (this will be useful in the next subsections).
+The GMM supervector notation consists of taking the parameters of :math:`\Theta` (weights, means and covariance matrices)
+of a GMM and create a single vector or matrix to represent each of them.
+For each gaussian compoenent :math:`c`, we can represent the MAP adaptation as the following
+:math:`\mu_i = m + d_i`, where :math:`m` is our prior and :math:`d_i` is the class offset.
 
-K-means
-=======
+Follow bellow an snippet on how to train a GMM using the MAP estimator.
 
-**k-means** [7]_ is a clustering method, which aims to partition a set of
-observations into :math:`k` clusters. This is an `unsupervised` technique. As
-for **PCA** [1]_, which is implemented in the :py:class:`bob.learn.linear.PCATrainer`
-class, the training data is passed in a 2D :py:class:`numpy.ndarray` container.
 
 .. doctest::
    :options: +NORMALIZE_WHITESPACE
 
+   >>> import bob.learn.em
+   >>> import numpy
+   >>>
    >>> data = numpy.array([[3,-3,100], [4,-4,98], [3.5,-3.5,99], [-7,7,-100], [-5,5,-101]], dtype='float64')
+   >>> # Creating a fake prior
+   >>> prior_gmm = bob.learn.em.GMMMachine(2, 3)
+   >>> prior_gmm.means = numpy.array([[ -4.,   2.3,  -10.5], [  2.5, -4.5,   59. ]]) # Setting some random means for the example
+   >>> # Creating the model for the adapted GMM
+   >>> adapted_gmm = bob.learn.em.GMMMachine(2, 3)
+   >>> # Creating the MAP trainer
+   >>> gmm_trainer = bob.learn.em.MAP_GMMTrainer(prior_gmm, relevance_factor=4)
+   >>>
+   >>> max_iterations = 200
+   >>> convergence_threshold = 1e-5
+   >>> # Training
+   >>> bob.learn.em.train(gmm_trainer, adapted_gmm, data, max_iterations = max_iterations, convergence_threshold = convergence_threshold) # Train the KMeansMachine
+   >>> print(adapted_gmm.means)
+    [[ -4.66666667   3.53333333 -40.5       ]
+     [  2.92857143  -4.07142857  76.14285714]]
 
-The training procedure will learn the `means` for the
-:py:class:`bob.learn.em.KMeansMachine`. The number :math:`k` of `means` is given
-when creating the `machine`, as well as the dimensionality of the features.
 
-.. doctest::
-   :options: +NORMALIZE_WHITESPACE
+Bellow follow an intuition of the GMM trained with the MAP estimator using the Iris flower `dataset <https://en.wikipedia.org/wiki/Iris_flower_data_set>`_.
 
-   >>> kmeans = bob.learn.em.KMeansMachine(2, 3) # Create a machine with k=2 clusters with a dimensionality equal to 3
+.. plot:: plot/plot_MAP.py
+   :include-source: False
 
-Then training procedure for `k-means` is an **Expectation-Maximization**-based
-[8]_ algorithm. There are several options that can be set such as the maximum
-number of iterations and the criterion used to determine if the convergence has
-occurred. After setting all of these options, the training procedure can then
-be called.
 
-.. doctest::
-   :options: +NORMALIZE_WHITESPACE
+Session Variability Modeling with Gaussian Mixture Models
+=========================================================
 
-   >>> kmeansTrainer = bob.learn.em.KMeansTrainer()
+In the aforementioned GMM based algorithms there is no explicit modeling of session variability.
+This section will introduce some session variability algorithms built on top of GMMs.
 
-   >>> bob.learn.em.train(kmeansTrainer, kmeans, data, max_iterations = 200, convergence_threshold = 1e-5) # Train the KMeansMachine
-   >>> print(kmeans.means)
-   [[ -6.   6.  -100.5]
-    [  3.5 -3.5   99. ]]
 
+GMM statistics
+**************
 
-Maximum likelihood for Gaussian mixture model
-=============================================
+Before introduce session variability for GMM based algorithms, we must introduce a component called
+:py:class:`bob.learn.em.GMMStats`.
+This component is useful for some computation in the next sections.
+:py:class:`bob.learn.em.GMMStats` is a container that solves the Equations 8, 9 and 10 in [Reynolds2000]_ (also called,
+zeroth, first and second order GMM statistics).
 
-A Gaussian **mixture model** (GMM) [9]_ is a common probabilistic model. In
-order to train the parameters of such a model it is common to use a
-**maximum-likelihood** (ML) approach [10]_. To do this we use an
-**Expectation-Maximization** (EM) algorithm [8]_. Let's first start by creating
-a :py:class:`bob.learn.em.GMMMachine`. By default, all of the Gaussian's have
-zero-mean and unit variance, and all the weights are equal. As a starting
-point, we could set the mean to the one obtained with **k-means** [7]_.
+Given a GMM (:math:`\Theta`) and a set of samples :math:`x_t` this component accumulates statistics for each
+gaussian component :math:`c`.
 
-.. doctest::
-   :options: +NORMALIZE_WHITESPACE
-
-   >>> gmm = bob.learn.em.GMMMachine(2,3) # Create a machine with 2 Gaussian and feature dimensionality 3
-   >>> gmm.means = kmeans.means # Set the means to the one obtained with k-means
-
-The |project| class to learn the parameters of a GMM [9]_ using ML [10]_ is
-:py:class:`bob.learn.em.ML_GMMTrainer`. It uses an **EM**-based [8]_ algorithm
-and requires the user to specify which parameters of the GMM are updated at
-each iteration (means, variances and/or weights). In addition, and as for
-**k-means** [7]_, it has parameters such as the maximum number of iterations
-and the criterion used to determine if the parameters have converged.
-
-.. doctest::
-   :options: +NORMALIZE_WHITESPACE
-
-   >>> trainer = bob.learn.em.ML_GMMTrainer(True, True, True) # update means/variances/weights at each iteration
-   >>> bob.learn.em.train(trainer, gmm, data, max_iterations = 200, convergence_threshold = 1e-5)
-   >>> print(gmm) # doctest: +SKIP
+Follow bellow a 1-1 relationship between statistics in [Reynolds2000]_ and the properties in :py:class:`bob.learn.em.GMMStats`:
+   - Eq (8) is :py:class:`bob.learn.em.GMMStats.n`: :math:`n_c=\sum\limits_{t=1}^T Pr(c | x_t)` (also called responsibilities)
+   - Eq (9) is :py:class:`bob.learn.em.GMMStats.sum_px`:  :math:`E_c(x)=\frac{1}{n(c)}\sum\limits_{t=1}^T Pr(c | x_t)x_t`
+   - Eq (10) is :py:class:`bob.learn.em.GMMStats.sum_pxx`: :math:`E_c(x^2)=\frac{1}{n(c)}\sum\limits_{t=1}^T Pr(c | x_t)x_t^2`
 
 
-MAP-adaptation for Gaussian mixture model
-=========================================
-
-|project| also supports the training of GMMs [9]_ using a **maximum a
-posteriori** (MAP) approach [11]_. MAP is closely related to the ML [10]_
-technique but it incorporates a prior on the quantity that we want to estimate.
-In our case, this prior is a GMM [9]_. Based on this prior model and some
-training data, a new model, the MAP estimate, will be `adapted`.
-
-Let's consider that the previously trained GMM [9]_ is our prior model.
-
-.. doctest::
-   :options: +NORMALIZE_WHITESPACE
-
-   >>> print(gmm) # doctest: +SKIP
+The snippet bellow shows how to compute accumulated these statistics given a prior GMM.
 
-The training data used to compute the MAP estimate [11]_ is again stored in a
-2D :py:class:`numpy.ndarray` container.
 
 .. doctest::
    :options: +NORMALIZE_WHITESPACE
 
-   >>> dataMAP = numpy.array([[7,-7,102], [6,-6,103], [-3.5,3.5,-97]], dtype='float64')
+    >>> import bob.learn.em
+    >>> import numpy
+    >>> numpy.random.seed(10)
+    >>>
+    >>> data = numpy.array([[0, 0.3, -0.2], [0.4, 0.1, 0.15], [-0.3, -0.1, 0], [1.2, 1.4, 1], [0.8, 1., 1]], dtype='float64')
+    >>> # Creating a fake prior with 2 gaussians of dimension 3
+    >>> prior_gmm = bob.learn.em.GMMMachine(2, 3)
+    >>> prior_gmm.means = numpy.vstack((numpy.random.normal(0, 0.5, (1, 3)), numpy.random.normal(1, 0.5, (1, 3))))
+    >>> # All nice and round diagonal covariance
+    >>> prior_gmm.variances = numpy.ones((2, 3)) * 0.5
+    >>> prior_gmm.weights = numpy.array([0.3, 0.7])
+
+    >>> # Creating the container
+    >>> gmm_stats_container = bob.learn.em.GMMStats(2, 3)
+    >>> for d in data:
+    ... prior_gmm.acc_statistics(d, gmm_stats_container)
+    >>>
+    >>> # Printing the responsibilities
+    >>> print gmm_stats_container.n/gmm_stats_container.t
+   [ 0.42861627  0.57138373]
 
-The |project| class used to perform MAP adaptation training [11]_ is
-:py:class:`bob.learn.em.MAP_GMMTrainer`. As with the ML estimate [10]_, it uses
-an **EM**-based [8]_ algorithm and requires the user to specify which parts of
-the GMM are adapted at each iteration (means, variances and/or weights). In
-addition, it also has parameters such as the maximum number of iterations and
-the criterion used to determine if the parameters have converged, in addition
-to this there is also a relevance factor which indicates the importance we give
-to the prior.  Once the trainer has been created, a prior GMM [9]_ needs to be
-set.
 
-.. doctest::
-   :options: +NORMALIZE_WHITESPACE
-
-   >>> relevance_factor = 4.
-   >>> trainer = bob.learn.em.MAP_GMMTrainer(gmm, relevance_factor=relevance_factor, update_means=True, update_variances=False, update_weights=False) # mean adaptation only
-   >>> gmmAdapted = bob.learn.em.GMMMachine(2,3) # Create a new machine for the MAP estimate
-   >>> bob.learn.em.train(trainer, gmmAdapted, dataMAP, max_iterations = 200, convergence_threshold = 1e-5)
-   >>> print(gmmAdapted) # doctest: +SKIP
-
-
-Joint Factor Analysis
-=====================
-
-The training of the subspace :math:`U`, :math:`V` and :math:`D` of a Joint
-Factor Analysis model, is performed in two steps. First, GMM sufficient
-statistics of the training samples should be computed against the UBM GMM. Once
-done, we get a training set of GMM statistics:
-
-.. doctest::
-   :options: +NORMALIZE_WHITESPACE
-
-   >>> F1 = numpy.array( [0.3833, 0.4516, 0.6173, 0.2277, 0.5755, 0.8044, 0.5301, 0.9861, 0.2751, 0.0300, 0.2486, 0.5357]).reshape((6,2))
-   >>> F2 = numpy.array( [0.0871, 0.6838, 0.8021, 0.7837, 0.9891, 0.5341, 0.0669, 0.8854, 0.9394, 0.8990, 0.0182, 0.6259]).reshape((6,2))
-   >>> F=[F1, F2]
-
-   >>> N1 = numpy.array([0.1379, 0.1821, 0.2178, 0.0418]).reshape((2,2))
-   >>> N2 = numpy.array([0.1069, 0.9397, 0.6164, 0.3545]).reshape((2,2))
-   >>> N=[N1, N2]
+Inter-Session Variability
+*************************
+.. _isv:
 
-   >>> gs11 = bob.learn.em.GMMStats(2,3)
-   >>> gs11.n = N1[:,0]
-   >>> gs11.sum_px = F1[:,0].reshape(2,3)
-   >>> gs12 = bob.learn.em.GMMStats(2,3)
-   >>> gs12.n = N1[:,1]
-   >>> gs12.sum_px = F1[:,1].reshape(2,3)
+Inter-Session Variability (ISV) modelling [3]_ [2]_ is a session variability modelling technique built on top of
+the Gaussian mixture modelling approach.
+It hypothesizes that within-class variations are embedded in a linear subspace in the GMM means subspace and these variations
+can be supressed by an offset w.r.t each mean during the MAP adaptation.
 
-   >>> gs21 = bob.learn.em.GMMStats(2,3)
-   >>> gs21.n = N2[:,0]
-   >>> gs21.sum_px = F2[:,0].reshape(2,3)
-   >>> gs22 = bob.learn.em.GMMStats(2,3)
-   >>> gs22.n = N2[:,1]
-   >>> gs22.sum_px = F2[:,1].reshape(2,3)
+In this generative model each sample is assumed to have been generated by a GMM mean supervector with the following shape:
+:math:`\mu_{i, j} = m + Ux_{i, j} + D_z{i}`, where :math:`m` is our prior, :math:`Ux_{i, j}` is the session offset that we
+want to suppress and :math:`D_z{i}` is the class offset (with all session effects suppressed).
 
-   >>> TRAINING_STATS = [[gs11, gs12], [gs21, gs22]]
+All possible sources of session variations is embedded in this matrix :math:`U`.
+Follow bellow an intuition of what is modelled with :math:`U` in the Iris flower `dataset <https://en.wikipedia.org/wiki/Iris_flower_data_set>`_.
+The arrows :math:`U_{1}`, :math:`U_{2}` and :math:`U_{3}` are the directions of the within class variations, with respect to
+each gaussian component, that will be suppressed a posteriori.
 
-In the following, we will allocate a :py:class:`bob.learn.em.JFABase` machine,
-that will then be trained.
+.. plot:: plot/plot_ISV.py
+   :include-source: False
 
-.. doctest::
-   :options: +NORMALIZE_WHITESPACE
 
-    >>> jfa_base = bob.learn.em.JFABase(gmm, 2, 2) # the dimensions of U and V are both equal to 2
+The ISV statistical model is stored in this container :py:class:`bob.learn.em.ISVBase` and the training is performed by
+:py:class:`bob.learn.em.ISVTrainer`.
+The snippet bellow shows how to train a Intersession variability modelling.
 
-Next, we initialize a trainer, which is an instance of
-:py:class:`bob.learn.em.JFATrainer`, as follows:
 
 .. doctest::
    :options: +NORMALIZE_WHITESPACE
 
-   >>> jfa_trainer = bob.learn.em.JFATrainer()
+    >>> import bob.learn.em
+    >>> import numpy
+    >>> numpy.random.seed(10)
+    >>>
+    >>> # Generating some fake data
+    >>> data_class1 = numpy.random.normal(0, 0.5, (10, 3))
+    >>> data_class2 = numpy.random.normal(-0.2, 0.2, (10, 3))
+    >>> data = [data_class1, data_class2]
+
+    >>> # Creating a fake prior with 2 gaussians of dimension 3
+    >>> prior_gmm = bob.learn.em.GMMMachine(2, 3)
+    >>> prior_gmm.means = numpy.vstack((numpy.random.normal(0, 0.5, (1, 3)), numpy.random.normal(1, 0.5, (1, 3))))
+    >>> # All nice and round diagonal covariance
+    >>> prior_gmm.variances = numpy.ones((2, 3)) * 0.5
+    >>> prior_gmm.weights = numpy.array([0.3, 0.7])
+    >>> # The input the the ISV Training is the statistics of the GMM
+    >>> gmm_stats_per_class = []
+    >>> for d in data:
+    ...   stats = []
+    ...     for i in d:
+    ...       gmm_stats_container = bob.learn.em.GMMStats(2, 3)
+    ...       prior_gmm.acc_statistics(i, gmm_stats_container)
+    ...       stats.append(gmm_stats_container)
+    ...     gmm_stats_per_class.append(stats)
+
+    >>> # Finally doing the ISV training
+    >>> subspace_dimension_of_u = 2
+    >>> relevance_factor = 4
+    >>> isvbase = bob.learn.em.ISVBase(prior_gmm, subspace_dimension_of_u)
+    >>> trainer = bob.learn.em.ISVTrainer(relevance_factor)
+    >>> bob.learn.em.train(trainer, isvbase, gmm_stats_per_class, max_iterations=50)
+    >>> # Printing the session offset w.r.t each Gaussian component
+    >>> print isvbase.u
+   [[-0.01018674 -0.0266506 ]
+    [-0.00160621 -0.00420217]
+    [ 0.02811708  0.07356007]
+    [ 0.01162401  0.0304108 ]
+    [ 0.03261834  0.08533628]
+    [ 0.04602195  0.1204029 ]]
 
-The training process is started by calling the
-:py:meth:`bob.learn.em.train`.
 
-.. doctest::
-   :options: +NORMALIZE_WHITESPACE
+Joint Factor Analysis
+*********************
+.. _jfa:
 
-   >>> bob.learn.em.train_jfa(jfa_trainer, jfa_base, TRAINING_STATS, max_iterations=10)
+Joint Factor Analysis (JFA) [1]_ [2]_ is an extension of ISV.
+Besides the within-class assumption (modeled with :math:`U`), it also hypothesize that between class variations
+are embedded in a low rank retangular matrix :math:`V`.
+In the supervector notation, this modelling has the following shape: :math:`\mu_{i, j} = m + Ux_{i, j}  + Vy_{i} + D_z{i}`.
 
-Once the training is finished (i.e. the subspaces :math:`U`, :math:`V` and
-:math:`D` are estimated), the JFA model can be shared and used by several
-class-specific models.  As for the training samples, we first need to extract
-GMM statistics from the samples.  These GMM statistics are manually defined in
-the following.
+Follow bellow an intuition of what is modelled with :math:`U` and :math:`V` in the Iris flower `dataset <https://en.wikipedia.org/wiki/Iris_flower_data_set>`_.
+The arrows :math:`V_{1}`, :math:`V_{2}` and :math:`V_{3}` are the directions of the between class variations with respect to
+each gaussian component that will be added a posteriori.
 
-.. doctest::
-   :options: +NORMALIZE_WHITESPACE
 
-   >>> Ne = numpy.array([0.1579, 0.9245, 0.1323, 0.2458]).reshape((2,2))
-   >>> Fe = numpy.array([0.1579, 0.1925, 0.3242, 0.1234, 0.2354, 0.2734, 0.2514, 0.5874, 0.3345, 0.2463, 0.4789, 0.5236]).reshape((6,2))
-   >>> gse1 = bob.learn.em.GMMStats(2,3)
-   >>> gse1.n = Ne[:,0]
-   >>> gse1.sum_px = Fe[:,0].reshape(2,3)
-   >>> gse2 = bob.learn.em.GMMStats(2,3)
-   >>> gse2.n = Ne[:,1]
-   >>> gse2.sum_px = Fe[:,1].reshape(2,3)
-   >>> gse = [gse1, gse2]
+.. plot:: plot/plot_JFA.py
+   :include-source: False
 
-Class-specific enrollment can then be perfomed as follows. This will estimate
-the class-specific latent variables :math:`y` and :math:`z`:
+The JFA statistical model is stored in this container :py:class:`bob.learn.em.JFABase` and the training is performed by
+:py:class:`bob.learn.em.JFATrainer`.
+The snippet bellow shows how to train a Intersession variability modelling.
 
 .. doctest::
    :options: +NORMALIZE_WHITESPACE
 
-   >>> m = bob.learn.em.JFAMachine(jfa_base)
-   >>> jfa_trainer.enroll(m, gse, 5) # where 5 is the number of enrollment iterations
+    >>> import bob.learn.em
+    >>> import numpy
+    >>> numpy.random.seed(10)
+    >>>
+    >>> # Generating some fake data
+    >>> data_class1 = numpy.random.normal(0, 0.5, (10, 3))
+    >>> data_class2 = numpy.random.normal(-0.2, 0.2, (10, 3))
+    >>> data = [data_class1, data_class2]
+
+    >>> # Creating a fake prior with 2 gaussians of dimension 3
+    >>> prior_gmm = bob.learn.em.GMMMachine(2, 3)
+    >>> prior_gmm.means = numpy.vstack((numpy.random.normal(0, 0.5, (1, 3)), numpy.random.normal(1, 0.5, (1, 3))))
+    >>> # All nice and round diagonal covariance
+    >>> prior_gmm.variances = numpy.ones((2, 3)) * 0.5
+    >>> prior_gmm.weights = numpy.array([0.3, 0.7])
+    >>>
+    >>> # The input the the JFA Training is the statistics of the GMM
+    >>> gmm_stats_per_class = []
+    >>> for d in data:
+    ...   stats = []
+    ...     for i in d:
+    ...       gmm_stats_container = bob.learn.em.GMMStats(2, 3)
+    ...       prior_gmm.acc_statistics(i, gmm_stats_container)
+    ...       stats.append(gmm_stats_container)
+    ...     gmm_stats_per_class.append(stats)
+    >>>
+    >>> # Finally doing the JFA training
+    >>> subspace_dimension_of_u = 2
+    >>> subspace_dimension_of_v = 2
+    >>> relevance_factor = 4
+    >>> jfabase = bob.learn.em.JFABase(prior_gmm, subspace_dimension_of_u, subspace_dimension_of_v)
+    >>> trainer = bob.learn.em.JFATrainer()
+    >>> bob.learn.em.train_jfa(trainer, jfabase, gmm_stats_per_class, max_iterations=50)
+
+    >>> # Printing the session offset w.r.t each Gaussian component
+    >>> print jfabase.v
+   [[ 0.002881   -0.00584226]
+    [ 0.04143534 -0.084025  ]
+    [-0.26149889  0.53028268]
+    [-0.25156799  0.51014422]
+    [-0.38687714  0.78453199]
+    [-0.36015773  0.73034882]]
+
+Total variability Modelling
+***************************
+.. _ivector:
 
-More information about the training process can be found in [12]_ and [13]_.
-
-
-Inter-Session Variability
-=========================
-
-The training of the subspace :math:`U` and :math:`D` of an Inter-Session
-Variability model, is performed in two steps. As for JFA, GMM sufficient
-statistics of the training samples should be computed against the UBM GMM. Once
-done, we get a training set of GMM statistics.  Next, we will allocate an
-:py:class:`bob.learn.em.ISVBase` machine, that will then be trained.
-
-.. doctest::
-   :options: +NORMALIZE_WHITESPACE
-
-    >>> isv_base = bob.learn.em.ISVBase(gmm, 2) # the dimensions of U is equal to 2
+Total Variability (TV) modelling [4]_ is a front-end initially introduced for
+speaker recognition, which aims at describing samples by vectors of low
+dimensionality called ``i-vectors``.
+The model consists of a subspace :math:`T` and a residual diagonal covariance matrix :math:`\Sigma`, that are then used to
+extract i-vectors, and is built upon the GMM approach.
+In the supervector notation this modelling has the following shape: :math:`\mu = m + Tv`.
 
-Next, we initialize a trainer, which is an instance of
-:py:class:`bob.learn.em.ISVTrainer`, as follows:
+Follow bellow an intuition of the data from the Iris flower `dataset <https://en.wikipedia.org/wiki/Iris_flower_data_set>`_,
+embedded in the iVector space.
 
-.. doctest::
-   :options: +NORMALIZE_WHITESPACE
+.. plot:: plot/plot_iVector.py
+   :include-source: False
 
-   >>> isv_trainer = bob.learn.em.ISVTrainer(relevance_factor=4.) # 4 is the relevance factor
 
-The training process is started by calling the
-:py:meth:`bob.learn.em.train`.
+The iVector statistical model is stored in this container :py:class:`bob.learn.em.IVectorMachine` and the training is performed by
+:py:class:`bob.learn.em.IVectorTrainer`.
+The snippet bellow shows how to train a Total variability modelling.
 
 .. doctest::
    :options: +NORMALIZE_WHITESPACE
 
-   >>> bob.learn.em.train(isv_trainer, isv_base, TRAINING_STATS, max_iterations=10)
+    >>> import bob.learn.em
+    >>> import numpy
+    >>> numpy.random.seed(10)
+    >>>
+    >>> # Generating some fake data
+    >>> data_class1 = numpy.random.normal(0, 0.5, (10, 3))
+    >>> data_class2 = numpy.random.normal(-0.2, 0.2, (10, 3))
+    >>> data = [data_class1, data_class2]
+    >>>
+    >>> # Creating a fake prior with 2 gaussians of dimension 3
+    >>> prior_gmm = bob.learn.em.GMMMachine(2, 3)
+    >>> prior_gmm.means = numpy.vstack((numpy.random.normal(0, 0.5, (1, 3)), numpy.random.normal(1, 0.5, (1, 3))))
+    >>> # All nice and round diagonal covariance
+    >>> prior_gmm.variances = numpy.ones((2, 3)) * 0.5
+    >>> prior_gmm.weights = numpy.array([0.3, 0.7])
+    >>>
+    >>> # The input the the TV Training is the statistics of the GMM
+    >>> gmm_stats_per_class = []
+    >>> for d in data:
+    ...     for i in d:
+    ...       gmm_stats_container = bob.learn.em.GMMStats(2, 3)
+    ...       prior_gmm.acc_statistics(i, gmm_stats_container)
+    ...       gmm_stats_per_class.append(gmm_stats_container)
+    >>>
+    >>> # Finally doing the TV training
+    >>> subspace_dimension_of_t = 2
+    >>>
+    >>> ivector_trainer = bob.learn.em.IVectorTrainer(update_sigma=True)
+    >>> ivector_machine = bob.learn.em.IVectorMachine(prior_gmm, subspace_dimension_of_t, 10e-5)
+    >>> # train IVector model
+    >>> bob.learn.em.train(ivector_trainer, ivector_machine, gmm_stats_per_class, 500)
+    >>>
+    >>> # Printing the session offset w.r.t each Gaussian component
+    >>> print ivector_machine.t
+   [[ 0.1101072  -0.20271139]
+    [-0.12426696  0.01402857]
+    [ 0.29584642  0.67414389]
+    [ 0.44728435  0.1744876 ]
+    [ 0.42547226  0.58287138]
+    [ 0.39369553  0.79358693]]
+
+
+Linear Scoring
+**************
+.. _linearscoring:
+
+In :ref:`MAP <map>` adaptation, :ref:`ISV <isv>` and :ref:`JFA <jfa>` a traditional way to do scoring is via the
+log-likelihood ratio between the adapted model and the prior as the following:
 
-Once the training is finished (i.e. the subspaces :math:`V` and :math:`D` are
-estimated), the ISV model can be shared and used by several class-specific
-models.  As for the training samples, we first need to extract GMM statistics
-from the samples.  Class-specific enrollment can then be perfomed, which will
-estimate the class-specific latent variable :math:`z`:
+.. math::
+   score = ln(P(x | \Theta)) -  ln(P(x | \Theta_{prior})),
 
-.. doctest::
-   :options: +NORMALIZE_WHITESPACE
 
-   >>> m = bob.learn.em.ISVMachine(isv_base)
-   >>> isv_trainer.enroll(m, gse, 5) # where 5 is the number of iterations
+(with :math:`\Theta` varying for each approach).
 
-More information about the training process can be found in [14]_ and [13]_.
+A simplification proposed by [Glembek2009]_, called linear scoring,
+approximate this ratio using a first order Taylor series as the following:
 
+.. math::
+   score = \frac{\mu - \mu_{prior}}{\sigma_{prior}} f * (\mu_{prior} + U_x),
 
-Total Variability (i-vectors)
-=============================
+where :math:`\mu` is the the GMM mean supervector (of the prior and the adapted model), :math:`\sigma` is the variance,
+supervector, :math:`f` is the first order GMM statistics (:py:class:`bob.learn.em.GMMStats.sum_px`) and :math:`U_x`,
+is possible channel offset (:ref:`ISV <isv>`).
 
-The training of the subspace :math:`T` and :math:`\Sigma` of a Total
-Variability model, is performed in two steps. As for JFA and ISV, GMM
-sufficient statistics of the training samples should be computed against the
-UBM GMM. Once done, we get a training set of GMM statistics.  Next, we will
-allocate an instance of :py:class:`bob.learn.em.IVectorMachine`, that will
-then be trained.
+This scoring technique is implemented in :py:func:`bob.learn.em.linear_scoring`.
+The snippet bellow shows how to compute scores using this approximation.
 
 .. doctest::
    :options: +NORMALIZE_WHITESPACE
 
-    >>> m = bob.learn.em.IVectorMachine(gmm, 2)
-    >>> m.variance_threshold = 1e-5
-
-
-Next, we initialize a trainer, which is an instance of
-:py:class:`bob.learn.em.IVectorTrainer`, as follows:
+   >>> import bob.learn.em
+   >>> import numpy
 
-.. doctest::
-   :options: +NORMALIZE_WHITESPACE
+   >>> # Defining a fake prior
+   >>> prior_gmm = bob.learn.em.GMMMachine(3, 2)
+   >>> prior_gmm.means = numpy.array([[1, 1], [2, 2.1], [3, 3]])
 
-   >>> ivec_trainer = bob.learn.em.IVectorTrainer(update_sigma=True)
-   >>> TRAINING_STATS_flatten = [gs11, gs12, gs21, gs22]
+   >>> # Defining a fake prior
+   >>> adapted_gmm = bob.learn.em.GMMMachine(3,2)
+   >>> adapted_gmm.means = numpy.array([[1.5, 1.5], [2.5, 2.5], [2, 2]])
 
-The training process is started by calling the
-:py:meth:`bob.learn.em.train`.
+   >>> # Defining an input
+   >>> input = numpy.array([[1.5, 1.5], [1.6, 1.6]])
 
-.. doctest::
-   :options: +NORMALIZE_WHITESPACE
+   >>> #Accumulating statistics of the GMM
+   >>> stats = bob.learn.em.GMMStats(3, 2)
+   >>> prior_gmm.acc_statistics(input, stats)
 
-   >>> bob.learn.em.train(ivec_trainer, m, TRAINING_STATS_flatten, max_iterations=10)
+   >>> score = bob.learn.em.linear_scoring([adapted_gmm], prior_gmm, [stats], [],
+   >>>         frame_length_normalisation=True)
+   >>> print score
+   [[ 0.25354909]]
 
-More information about the training process can be found in [15]_.
 
 Probabilistic Linear Discriminant Analysis (PLDA)
-=================================================
+-------------------------------------------------
 
 Probabilistic Linear Discriminant Analysis [16]_ is a probabilistic model that
 incorporates components describing both between-class and within-class
@@ -785,6 +593,92 @@ log-likelihood ratio will be computed, which is defined in more formal way by:
   os.chdir(current_directory)
   shutil.rmtree(temp_dir)
 
+
+Score Normalization
+-------------------
+
+Score normalisation aims to compensate statistical variations in output scores due to changes in the conditions across
+different enrolment and probe samples.
+This is achieved by scaling distributions of system output scores to better facilitate the application of a single,
+global threshold for authentication.
+
+Bob has implemented 3 different strategies to normalize scores and these strategies are presented in the next subsections.
+
+Z-Norm
+======
+.. _znorm:
+
+Given a score :math:`s_i`, Z-Norm (zero-normalisation) scales this value by the mean (:math:`\mu`) and
+standard deviation (:math:`\sigma`) of an impostor score distribution.
+This score distribution can be computed before hand and it is defined as the following.
+
+.. math::
+
+   zs_i = \frac{s_i - \mu}{\sigma}
+
+
+This scoring technique is implemented in :py:func:`bob.learn.em.znorm`.
+Follow bellow an example of score normalization using :py:func:`bob.learn.em.znorm`.
+
+.. plot:: plot/plot_Znorm.py
+   :include-source: True
+   :scale: 125
+
+
+
+.. note::
+   Observe how the scores were scaled in the plot above.
+
+
+T-Norm
+======
+.. _tnorm:
+
+T-norm (Test-normalization) operates in a probe-centric manner.
+If in the Z-Norm :math:`\mu` and :math:`\sigma` are estimated using an impostor set of models and its scores,
+the t-norm computes these statistics using the current probe sample against at set of models in a co-hort :math:`\Theta_{c}`.
+A co-hort can be any semantic organization that is sensible to your recognition task, such as sex (male and females), ethnicity, age, etc
+and is defined as the following.
+
+.. math::
+
+   zs_i = \frac{s_i - \mu}{\sigma}
+
+where, :math:`s_i` is :math:`P(x_i | \Theta)` (the score given the claimed model),
+:math:`\mu = \frac{ \sum\limits_{i=0}^{N} P(x_i | \Theta_{c}) }{N}` (:math:`\Theta_{c}` are the models of one co-hort) and
+:math:`\sigma` is the standard deviation computed using the same criteria used to compute :math:`\mu`.
+
+
+This scoring technique is implemented in :py:func:`bob.learn.em.tnorm`.
+Follow bellow an example of score normalization using :py:func:`bob.learn.em.tnorm`.
+
+.. plot:: plot/plot_Tnorm.py
+   :include-source: True
+   :scale: 125
+
+
+.. note::
+   T-norm introduces extra computation during scoring, as the probe samples need to be compared to each cohort model in order
+   to have :math:`\mu` and :math:`\sigma`.
+
+
+ZT-Norm
+=======
+.. _ztnorm:
+
+ZT-Norm consists in the application of :ref:`Z-Norm <znorm>` followed by a :ref:`T-Norm <tnorm>` and
+it is implemented in :py:func:`bob.learn.em.ztnorm`.
+
+Follow bellow an example of score normalization using :py:func:`bob.learn.em.ztnorm`.
+
+.. plot:: plot/plot_ZTnorm.py
+   :include-source: True
+   :scale: 125
+
+.. note::
+   Observe how the scores were scaled in the plot above.
+
+
 .. Place here your external references
 .. include:: links.rst
 .. [1] http://dx.doi.org/10.1109/TASL.2006.881693
diff --git a/doc/index.rst b/doc/index.rst
index d4b7062f6ad9808e8da09d45f68bc73f10122753..59312743d2362802e3fe9acc5fb38073ae17173f 100644
--- a/doc/index.rst
+++ b/doc/index.rst
@@ -10,12 +10,16 @@
  Expectation Maximization Machine Learning Tools
 ================================================
 
-The EM algorithm is an iterative method that estimates parameters for statistical models, where the model depends on unobserved latent variables. The EM iteration alternates between performing an expectation (E) step, which creates a function for the expectation of the log-likelihood evaluated using the current estimate for the parameters, and a maximization (M) step, which computes parameters maximizing the expected log-likelihood found on the E step. These parameter-estimates are then used to determine the distribution of the latent variables in the next E step [WikiEM]_. 
+The EM algorithm is an iterative method that estimates parameters for statistical models, where the model depends on unobserved latent variables.
+The EM iteration alternates between performing an expectation (E) step, which creates a function for the expectation of the log-likelihood
+evaluated using the current estimate for the parameters, and a maximization (M) step,
+which computes parameters maximizing the expected log-likelihood found on the E step.
+These parameter-estimates are then used to determine the distribution of the latent variables in the next E step [WikiEM]_.
 
 The package includes the machine definition per se and a selection of different trainers for specialized purposes:
+ - K-Means
  - Maximum Likelihood (ML)
  - Maximum a Posteriori (MAP)
- - K-Means
  - Inter Session Variability Modelling (ISV)
  - Joint Factor Analysis (JFA)
  - Total Variability Modeling (iVectors)
@@ -47,8 +51,7 @@ References
 ..   [Roweis1998] Roweis, Sam. "EM algorithms for PCA and SPCA." Advances in neural information processing systems (1998): 626-632.
 
 ..   [WikiEM] `Expectation Maximization <http://en.wikipedia.org/wiki/Expectation%E2%80%93maximization_algorithm>`_
-
-
+..   [Glembek2009] Glembek, Ondrej, et al. "Comparison of scoring methods used in speaker recognition with joint factor analysis." Acoustics, Speech and Signal Processing, 2009. ICASSP 2009. IEEE International Conference on. IEEE, 2009.
 
 Indices and tables
 ------------------
diff --git a/doc/plot/plot_ISV.py b/doc/plot/plot_ISV.py
new file mode 100755
index 0000000000000000000000000000000000000000..3cde2b1647c85cf2f912f85c75051fc316869e2e
--- /dev/null
+++ b/doc/plot/plot_ISV.py
@@ -0,0 +1,100 @@
+import bob.db.iris
+import numpy
+
+numpy.random.seed(2)  # FIXING A SEED
+import bob.learn.linear
+import bob.learn.em
+
+import matplotlib.pyplot as plt
+
+
+def train_ubm(features, n_gaussians):
+    input_size = features.shape[1]
+
+    kmeans_machine = bob.learn.em.KMeansMachine(int(n_gaussians), input_size)
+    ubm = bob.learn.em.GMMMachine(int(n_gaussians), input_size)
+
+    # The K-means clustering is firstly used to used to estimate the initial means, the final variances and the final weights for each gaussian component
+    kmeans_trainer = bob.learn.em.KMeansTrainer('RANDOM_NO_DUPLICATE')
+    bob.learn.em.train(kmeans_trainer, kmeans_machine, features)
+
+    # Getting the means, weights and the variances for each cluster. This is a very good estimator for the ML
+    (variances, weights) = kmeans_machine.get_variances_and_weights_for_each_cluster(features)
+    means = kmeans_machine.means
+
+    # initialize the UBM with the output of kmeans
+    ubm.means = means
+    ubm.variances = variances
+    ubm.weights = weights
+
+    # Creating the ML Trainer. We will adapt only the means
+    trainer = bob.learn.em.ML_GMMTrainer(update_means=True, update_variances=False, update_weights=False)
+    bob.learn.em.train(trainer, ubm, features)
+
+    return ubm
+
+
+def isv_train(features, ubm):
+    """
+    Features com lista de listas [  [data_point_1_user_1,data_point_2_user_1], [data_point_1_user_2,data_point_2_user_2]  ] 
+    """
+
+    stats = []
+    for user in features:
+        user_stats = []
+        for f in user:
+            s = bob.learn.em.GMMStats(ubm.shape[0], ubm.shape[1])
+            ubm.acc_statistics(f, s)
+            user_stats.append(s)
+        stats.append(user_stats)
+
+    relevance_factor = 4
+    subspace_dimension_of_u = 1
+
+    isvbase = bob.learn.em.ISVBase(ubm, subspace_dimension_of_u)
+    trainer = bob.learn.em.ISVTrainer(relevance_factor)
+    # trainer.rng = bob.core.random.mt19937(int(self.init_seed))
+    bob.learn.em.train(trainer, isvbase, stats, max_iterations=50)
+
+    return isvbase
+
+
+### GENERATING DATA
+data_per_class = bob.db.iris.data()
+setosa = numpy.column_stack((data_per_class['setosa'][:, 0], data_per_class['setosa'][:, 3]))
+versicolor = numpy.column_stack((data_per_class['versicolor'][:, 0], data_per_class['versicolor'][:, 3]))
+virginica = numpy.column_stack((data_per_class['virginica'][:, 0], data_per_class['virginica'][:, 3]))
+data = numpy.vstack((setosa, versicolor, virginica))
+
+# TRAINING THE PRIOR
+ubm = train_ubm(data, 3)
+isvbase = isv_train([setosa, versicolor, virginica], ubm)
+
+# Variability direction
+u0 = isvbase.u[0:2, 0] / numpy.linalg.norm(isvbase.u[0:2, 0])
+u1 = isvbase.u[2:4, 0] / numpy.linalg.norm(isvbase.u[2:4, 0])
+u2 = isvbase.u[4:6, 0] / numpy.linalg.norm(isvbase.u[4:6, 0])
+
+figure, ax = plt.subplots()
+plt.scatter(setosa[:, 0], setosa[:, 1], c="darkcyan", label="setosa")
+plt.scatter(versicolor[:, 0], versicolor[:, 1], c="goldenrod", label="versicolor")
+plt.scatter(virginica[:, 0], virginica[:, 1], c="dimgrey", label="virginica")
+
+plt.scatter(ubm.means[:, 0], ubm.means[:, 1], c="blue", marker="x", label="centroids - mle")
+#plt.scatter(ubm.means[:, 0], ubm.means[:, 1], c="blue", marker=".", label="within class varibility", s=0.01)
+
+ax.arrow(ubm.means[0, 0], ubm.means[0, 1], u0[0], u0[1], fc="k", ec="k", head_width=0.05, head_length=0.1)
+ax.arrow(ubm.means[1, 0], ubm.means[1, 1], u1[0], u1[1], fc="k", ec="k", head_width=0.05, head_length=0.1)
+ax.arrow(ubm.means[2, 0], ubm.means[2, 1], u2[0], u2[1], fc="k", ec="k", head_width=0.05, head_length=0.1)
+plt.text(ubm.means[0, 0] + u0[0], ubm.means[0, 1] + u0[1] - 0.1, r'$\mathbf{U}_1$', fontsize=15)
+plt.text(ubm.means[1, 0] + u1[0], ubm.means[1, 1] + u1[1] - 0.1, r'$\mathbf{U}_2$', fontsize=15)
+plt.text(ubm.means[2, 0] + u2[0], ubm.means[2, 1] + u2[1] - 0.1, r'$\mathbf{U}_3$', fontsize=15)
+
+ax.set_xticklabels("" for item in ax.get_xticklabels())
+ax.set_yticklabels("" for item in ax.get_yticklabels())
+
+# plt.grid(True)
+plt.xlabel('Sepal length')
+plt.ylabel('Petal width')
+plt.legend()
+plt.show()
diff --git a/doc/plot/plot_JFA.py b/doc/plot/plot_JFA.py
new file mode 100755
index 0000000000000000000000000000000000000000..d5fe9f33c2754c390361df90a0ef71bd2486ae4d
--- /dev/null
+++ b/doc/plot/plot_JFA.py
@@ -0,0 +1,119 @@
+import bob.db.iris
+import numpy
+
+numpy.random.seed(2)  # FIXING A SEED
+import bob.learn.linear
+import bob.learn.em
+
+import matplotlib.pyplot as plt
+
+
+def train_ubm(features, n_gaussians):
+    input_size = features.shape[1]
+
+    kmeans_machine = bob.learn.em.KMeansMachine(int(n_gaussians), input_size)
+    ubm = bob.learn.em.GMMMachine(int(n_gaussians), input_size)
+
+    # The K-means clustering is firstly used to used to estimate the initial means, the final variances and the final weights for each gaussian component
+    kmeans_trainer = bob.learn.em.KMeansTrainer('RANDOM_NO_DUPLICATE')
+    bob.learn.em.train(kmeans_trainer, kmeans_machine, features)
+
+    # Getting the means, weights and the variances for each cluster. This is a very good estimator for the ML
+    (variances, weights) = kmeans_machine.get_variances_and_weights_for_each_cluster(features)
+    means = kmeans_machine.means
+
+    # initialize the UBM with the output of kmeans
+    ubm.means = means
+    ubm.variances = variances
+    ubm.weights = weights
+
+    # Creating the ML Trainer. We will adapt only the means
+    trainer = bob.learn.em.ML_GMMTrainer(update_means=True, update_variances=False, update_weights=False)
+    bob.learn.em.train(trainer, ubm, features)
+
+    return ubm
+
+
+def jfa_train(features, ubm):
+    """
+    Features com lista de listas [  [data_point_1_user_1,data_point_2_user_1], [data_point_1_user_2,data_point_2_user_2]  ] 
+    """
+
+    stats = []
+    for user in features:
+        user_stats = []
+        for f in user:
+            s = bob.learn.em.GMMStats(ubm.shape[0], ubm.shape[1])
+            ubm.acc_statistics(f, s)
+            user_stats.append(s)
+        stats.append(user_stats)
+
+    subspace_dimension_of_u = 1
+    subspace_dimension_of_v = 1
+
+    jfa_base = bob.learn.em.JFABase(ubm, subspace_dimension_of_u, subspace_dimension_of_v)
+    trainer = bob.learn.em.JFATrainer()
+    # trainer.rng = bob.core.random.mt19937(int(self.init_seed))
+    bob.learn.em.train_jfa(trainer, jfa_base, stats, max_iterations=50)
+
+    return jfa_base
+
+
+### GENERATING DATA
+data_per_class = bob.db.iris.data()
+setosa = numpy.column_stack((data_per_class['setosa'][:, 0], data_per_class['setosa'][:, 3]))
+versicolor = numpy.column_stack((data_per_class['versicolor'][:, 0], data_per_class['versicolor'][:, 3]))
+virginica = numpy.column_stack((data_per_class['virginica'][:, 0], data_per_class['virginica'][:, 3]))
+data = numpy.vstack((setosa, versicolor, virginica))
+
+# TRAINING THE PRIOR
+ubm = train_ubm(data, 3)
+jfa_base = jfa_train([setosa, versicolor, virginica], ubm)
+
+# Variability direction U
+u0 = jfa_base.u[0:2, 0] / numpy.linalg.norm(jfa_base.u[0:2, 0])
+u1 = jfa_base.u[2:4, 0] / numpy.linalg.norm(jfa_base.u[2:4, 0])
+u2 = jfa_base.u[4:6, 0] / numpy.linalg.norm(jfa_base.u[4:6, 0])
+
+
+# Variability direction V
+v0 = jfa_base.v[0:2, 0] / numpy.linalg.norm(jfa_base.v[0:2, 0])
+v1 = jfa_base.v[2:4, 0] / numpy.linalg.norm(jfa_base.v[2:4, 0])
+v2 = jfa_base.v[4:6, 0] / numpy.linalg.norm(jfa_base.v[4:6, 0])
+
+
+
+figure, ax = plt.subplots()
+plt.scatter(setosa[:, 0], setosa[:, 1], c="darkcyan", label="setosa")
+plt.scatter(versicolor[:, 0], versicolor[:, 1], c="goldenrod", label="versicolor")
+plt.scatter(virginica[:, 0], virginica[:, 1], c="dimgrey", label="virginica")
+
+plt.scatter(ubm.means[:, 0], ubm.means[:, 1], c="blue", marker="x", label="centroids - mle")
+#plt.scatter(ubm.means[:, 0], ubm.means[:, 1], c="blue", marker=".", label="within class varibility", s=0.01)
+
+# U
+ax.arrow(ubm.means[0, 0], ubm.means[0, 1], u0[0], u0[1], fc="k", ec="k", head_width=0.05, head_length=0.1)
+ax.arrow(ubm.means[1, 0], ubm.means[1, 1], u1[0], u1[1], fc="k", ec="k", head_width=0.05, head_length=0.1)
+ax.arrow(ubm.means[2, 0], ubm.means[2, 1], u2[0], u2[1], fc="k", ec="k", head_width=0.05, head_length=0.1)
+plt.text(ubm.means[0, 0] + u0[0], ubm.means[0, 1] + u0[1] - 0.1, r'$\mathbf{U}_1$', fontsize=15)
+plt.text(ubm.means[1, 0] + u1[0], ubm.means[1, 1] + u1[1] - 0.1, r'$\mathbf{U}_2$', fontsize=15)
+plt.text(ubm.means[2, 0] + u2[0], ubm.means[2, 1] + u2[1] - 0.1, r'$\mathbf{U}_3$', fontsize=15)
+
+# V
+ax.arrow(ubm.means[0, 0], ubm.means[0, 1], v0[0], v0[1], fc="k", ec="k", head_width=0.05, head_length=0.1)
+ax.arrow(ubm.means[1, 0], ubm.means[1, 1], v1[0], v1[1], fc="k", ec="k", head_width=0.05, head_length=0.1)
+ax.arrow(ubm.means[2, 0], ubm.means[2, 1], v2[0], v2[1], fc="k", ec="k", head_width=0.05, head_length=0.1)
+plt.text(ubm.means[0, 0] + v0[0], ubm.means[0, 1] + v0[1] - 0.1, r'$\mathbf{V}_1$', fontsize=15)
+plt.text(ubm.means[1, 0] + v1[0], ubm.means[1, 1] + v1[1] - 0.1, r'$\mathbf{V}_2$', fontsize=15)
+plt.text(ubm.means[2, 0] + v2[0], ubm.means[2, 1] + v2[1] - 0.1, r'$\mathbf{V}_3$', fontsize=15)
+
+ax.set_xticklabels("" for item in ax.get_xticklabels())
+ax.set_yticklabels("" for item in ax.get_yticklabels())
+
+# plt.grid(True)
+plt.xlabel('Sepal length')
+plt.ylabel('Petal width')
+plt.legend(loc=2)
+plt.ylim([-1, 3.5])
+
+#plt.show()
diff --git a/doc/plot/plot_MAP.py b/doc/plot/plot_MAP.py
new file mode 100644
index 0000000000000000000000000000000000000000..e7b9a7ffdcae21d76ab3a1fa833773bc1893b34b
--- /dev/null
+++ b/doc/plot/plot_MAP.py
@@ -0,0 +1,39 @@
+import bob.learn.em
+import bob.db.iris
+import numpy
+numpy.random.seed(10)
+from matplotlib import pyplot
+
+data_per_class = bob.db.iris.data()
+setosa = numpy.column_stack((data_per_class['setosa'][:, 0], data_per_class['setosa'][:, 3]))
+versicolor = numpy.column_stack((data_per_class['versicolor'][:, 0], data_per_class['versicolor'][:, 3]))
+virginica = numpy.column_stack((data_per_class['virginica'][:, 0], data_per_class['virginica'][:, 3]))
+
+data = numpy.vstack((setosa, versicolor, virginica))
+
+mle_machine = bob.learn.em.GMMMachine(3, 2)# Two clusters with a feature dimensionality of 3
+mle_trainer = bob.learn.em.ML_GMMTrainer(True, True, True)
+mle_machine.means = numpy.array([[5, 3], [4, 2], [7, 3.]])
+bob.learn.em.train(mle_trainer, mle_machine, data, max_iterations=200, convergence_threshold=1e-5) # Train the KMeansMachine
+
+#Creating some random data centered in
+new_data = numpy.random.normal(2, 0.8, (50, 2))
+map_machine = bob.learn.em.GMMMachine(3, 2) # Two clusters with a feature dimensionality of 3
+map_trainer = bob.learn.em.MAP_GMMTrainer(mle_machine, relevance_factor=4)
+bob.learn.em.train(map_trainer, map_machine, new_data, max_iterations=200, convergence_threshold=1e-5) # Train the KMeansMachine
+
+
+figure, ax = pyplot.subplots()
+pyplot.scatter(new_data[:, 0], new_data[:, 1], c="olivedrab", label="new data")
+pyplot.scatter(setosa[:, 0], setosa[:, 1], c="darkcyan", label="setosa")
+pyplot.scatter(versicolor[:, 0], versicolor[:, 1], c="goldenrod", label="versicolor")
+pyplot.scatter(virginica[:, 0], virginica[:, 1], c="dimgrey", label="virginica")
+pyplot.scatter(mle_machine.means[:, 0], mle_machine.means[:, 1], c="blue", marker="x", label="prior centroids - mle", s=60)
+pyplot.scatter(map_machine.means[:, 0], map_machine.means[:, 1], c="red", marker="^", label="adapted centroids - map", s=60)
+pyplot.legend()
+ax.set_xticklabels("" for item in ax.get_xticklabels())
+ax.set_yticklabels("" for item in ax.get_yticklabels())
+ax.set_xlabel("Sepal length")
+ax.set_ylabel("Petal width")
+pyplot.show()
+
diff --git a/doc/plot/plot_ML.py b/doc/plot/plot_ML.py
new file mode 100644
index 0000000000000000000000000000000000000000..4a30e71e776c823757670afd3874d2eb222cef09
--- /dev/null
+++ b/doc/plot/plot_ML.py
@@ -0,0 +1,29 @@
+import bob.learn.em
+import bob.db.iris
+import numpy
+from matplotlib import pyplot
+
+data_per_class = bob.db.iris.data()
+setosa = numpy.column_stack((data_per_class['setosa'][:, 0], data_per_class['setosa'][:, 3]))
+versicolor = numpy.column_stack((data_per_class['versicolor'][:, 0], data_per_class['versicolor'][:, 3]))
+virginica = numpy.column_stack((data_per_class['virginica'][:, 0], data_per_class['virginica'][:, 3]))
+
+data = numpy.vstack((setosa, versicolor, virginica))
+
+machine = bob.learn.em.GMMMachine(3, 2) # Two clusters with a feature dimensionality of 3
+trainer = bob.learn.em.ML_GMMTrainer(True, True, True)
+machine.means = numpy.array([[5, 3], [4, 2], [7, 3.]])
+bob.learn.em.train(trainer, machine, data, max_iterations = 200, convergence_threshold = 1e-5) # Train the KMeansMachine
+
+figure, ax = pyplot.subplots()
+pyplot.scatter(setosa[:, 0], setosa[:, 1], c="darkcyan", label="setosa")
+pyplot.scatter(versicolor[:, 0], versicolor[:, 1], c="goldenrod", label="versicolor")
+pyplot.scatter(virginica[:, 0], virginica[:, 1], c="dimgrey", label="virginica")
+pyplot.scatter(machine.means[:, 0], machine.means[:, 1], c="blue", marker="x", label="centroids", s=60)
+pyplot.legend()
+ax.set_xticklabels("" for item in ax.get_xticklabels())
+ax.set_yticklabels("" for item in ax.get_yticklabels())
+ax.set_xlabel("Sepal length")
+ax.set_ylabel("Petal width")
+pyplot.show()
+
diff --git a/doc/plot/plot_Tnorm.py b/doc/plot/plot_Tnorm.py
new file mode 100644
index 0000000000000000000000000000000000000000..24137b08d2f8381f497cab3de9f721e3cb6b1c4c
--- /dev/null
+++ b/doc/plot/plot_Tnorm.py
@@ -0,0 +1,43 @@
+import bob.learn.em
+import numpy
+numpy.random.seed(10)
+
+from matplotlib import pyplot
+
+n_clients = 10
+n_scores_per_client = 200
+
+# Defining some fake scores for genuines and impostors
+impostor_scores = numpy.random.normal(-15.5, 5, (n_scores_per_client, n_clients))
+genuine_scores = numpy.random.normal(0.5, 5, (n_scores_per_client, n_clients))
+
+# Defining the scores for the statistics computation
+t_scores = numpy.random.normal(-5., 5, (n_scores_per_client, n_clients))
+
+# T - Normalizing
+t_norm_impostors = bob.learn.em.tnorm(impostor_scores, t_scores)
+t_norm_genuine = bob.learn.em.tnorm(genuine_scores, t_scores)
+
+# PLOTTING
+figure = pyplot.subplot(2, 1, 1)
+ax = figure.axes
+pyplot.title("Raw scores", fontsize=8)
+pyplot.hist(impostor_scores.reshape(n_scores_per_client*n_clients), label='Impostors', normed=True,
+            color='red', alpha=0.5, bins=50)
+pyplot.hist(genuine_scores.reshape(n_scores_per_client*n_clients), label='Genuine', normed=True,
+            color='blue', alpha=0.5, bins=50)
+pyplot.legend(fontsize=8)
+ax.set_yticklabels("" for item in ax.get_yticklabels())
+
+
+figure = pyplot.subplot(2, 1, 2)
+ax = figure.axes
+pyplot.title("T-norm scores", fontsize=8)
+pyplot.hist(t_norm_impostors.reshape(n_scores_per_client*n_clients), label='T-Norm Impostors', normed=True,
+            color='red', alpha=0.5, bins=50)
+pyplot.hist(t_norm_genuine.reshape(n_scores_per_client*n_clients), label='T-Norm Genuine', normed=True,
+            color='blue', alpha=0.5, bins=50)
+pyplot.legend(fontsize=8)
+ax.set_yticklabels("" for item in ax.get_yticklabels())
+
+pyplot.show()
diff --git a/doc/plot/plot_ZTnorm.py b/doc/plot/plot_ZTnorm.py
new file mode 100644
index 0000000000000000000000000000000000000000..f1b151bc8a7d6abb5c2416ae05a97cee58771e8c
--- /dev/null
+++ b/doc/plot/plot_ZTnorm.py
@@ -0,0 +1,47 @@
+import bob.learn.em
+import numpy
+numpy.random.seed(10)
+
+from matplotlib import pyplot
+
+n_clients = 10
+n_scores_per_client = 200
+
+# Defining some fake scores for genuines and impostors
+impostor_scores = numpy.random.normal(-15.5, 5, (n_scores_per_client, n_clients))
+genuine_scores = numpy.random.normal(0.5, 5, (n_scores_per_client, n_clients))
+
+# Defining the scores for the statistics computation
+z_scores = numpy.random.normal(-5., 5, (n_scores_per_client, n_clients))
+t_scores = numpy.random.normal(-6., 5, (n_scores_per_client, n_clients))
+
+# T-normalizing the Z-scores
+zt_scores = bob.learn.em.tnorm(z_scores, t_scores)
+
+# ZT - Normalizing
+zt_norm_impostors = bob.learn.em.ztnorm(impostor_scores, z_scores, t_scores, zt_scores)
+zt_norm_genuine = bob.learn.em.ztnorm(genuine_scores, z_scores, t_scores, zt_scores)
+
+# PLOTTING
+figure = pyplot.subplot(2, 1, 1)
+ax = figure.axes
+pyplot.title("Raw scores", fontsize=8)
+pyplot.hist(impostor_scores.reshape(n_scores_per_client*n_clients), label='Impostors', normed=True,
+            color='red', alpha=0.5, bins=50)
+pyplot.hist(genuine_scores.reshape(n_scores_per_client*n_clients), label='Genuine', normed=True,
+            color='blue', alpha=0.5, bins=50)
+pyplot.legend(fontsize=8)
+ax.set_yticklabels("" for item in ax.get_yticklabels())
+
+
+figure = pyplot.subplot(2, 1, 2)
+ax = figure.axes
+pyplot.title("T-norm scores", fontsize=8)
+pyplot.hist(zt_norm_impostors.reshape(n_scores_per_client*n_clients), label='T-Norm Impostors', normed=True,
+            color='red', alpha=0.5, bins=50)
+pyplot.hist(zt_norm_genuine.reshape(n_scores_per_client*n_clients), label='T-Norm Genuine', normed=True,
+            color='blue', alpha=0.5, bins=50)
+pyplot.legend(fontsize=8)
+ax.set_yticklabels("" for item in ax.get_yticklabels())
+
+pyplot.show()
diff --git a/doc/plot/plot_Znorm.py b/doc/plot/plot_Znorm.py
new file mode 100644
index 0000000000000000000000000000000000000000..62e31e67f3d6da68cc5c3788bb7d3be37dfc308e
--- /dev/null
+++ b/doc/plot/plot_Znorm.py
@@ -0,0 +1,43 @@
+import bob.learn.em
+import numpy
+numpy.random.seed(10)
+
+from matplotlib import pyplot
+
+n_clients = 10
+n_scores_per_client = 200
+
+# Defining some fake scores for genuines and impostors
+impostor_scores = numpy.random.normal(-15.5, 5, (n_scores_per_client, n_clients))
+genuine_scores = numpy.random.normal(0.5, 5, (n_scores_per_client, n_clients))
+
+# Defining the scores for the statistics computation
+z_scores = numpy.random.normal(-5., 5, (n_scores_per_client, n_clients))
+
+# Z - Normalizing
+z_norm_impostors = bob.learn.em.znorm(impostor_scores, z_scores)
+z_norm_genuine = bob.learn.em.znorm(genuine_scores, z_scores)
+
+# PLOTTING
+figure = pyplot.subplot(2, 1, 1)
+ax = figure.axes
+pyplot.title("Raw scores", fontsize=8)
+pyplot.hist(impostor_scores.reshape(n_scores_per_client*n_clients), label='Impostors', normed=True,
+            color='red', alpha=0.5, bins=50)
+pyplot.hist(genuine_scores.reshape(n_scores_per_client*n_clients), label='Genuine', normed=True,
+            color='blue', alpha=0.5, bins=50)
+pyplot.legend(fontsize=8)
+ax.set_yticklabels("" for item in ax.get_yticklabels())
+
+
+figure = pyplot.subplot(2, 1, 2)
+ax = figure.axes
+pyplot.title("Z-norm scores", fontsize=8)
+pyplot.hist(z_norm_impostors.reshape(n_scores_per_client*n_clients), label='Z-Norm Impostors', normed=True,
+            color='red', alpha=0.5, bins=50)
+pyplot.hist(z_norm_genuine.reshape(n_scores_per_client*n_clients), label='Z-Norm Genuine', normed=True,
+            color='blue', alpha=0.5, bins=50)
+pyplot.legend(fontsize=8)
+ax.set_yticklabels("" for item in ax.get_yticklabels())
+
+pyplot.show()
diff --git a/doc/plot/plot_iVector.py b/doc/plot/plot_iVector.py
new file mode 100755
index 0000000000000000000000000000000000000000..bb8ed57caf60320bed4e585dd1a3e56773fa6b6a
--- /dev/null
+++ b/doc/plot/plot_iVector.py
@@ -0,0 +1,160 @@
+import bob.db.iris
+import numpy
+
+numpy.random.seed(2)  # FIXING A SEED
+import bob.learn.linear
+import bob.learn.em
+
+import matplotlib.pyplot as plt
+
+
+def train_ubm(features, n_gaussians):
+    input_size = features.shape[1]
+
+    kmeans_machine = bob.learn.em.KMeansMachine(int(n_gaussians), input_size)
+    ubm = bob.learn.em.GMMMachine(int(n_gaussians), input_size)
+
+    # The K-means clustering is firstly used to used to estimate the initial means, the final variances and the final weights for each gaussian component
+    kmeans_trainer = bob.learn.em.KMeansTrainer('RANDOM_NO_DUPLICATE')
+    bob.learn.em.train(kmeans_trainer, kmeans_machine, features)
+
+    # Getting the means, weights and the variances for each cluster. This is a very good estimator for the ML
+    (variances, weights) = kmeans_machine.get_variances_and_weights_for_each_cluster(features)
+    means = kmeans_machine.means
+
+    # initialize the UBM with the output of kmeans
+    ubm.means = means
+    ubm.variances = variances
+    ubm.weights = weights
+
+    # Creating the ML Trainer. We will adapt only the means
+    trainer = bob.learn.em.ML_GMMTrainer(update_means=True, update_variances=False, update_weights=False)
+    bob.learn.em.train(trainer, ubm, features)
+
+    return ubm
+
+
+def ivector_train(features, ubm):
+    """
+    Features com lista de listas [  [data_point_1_user_1,data_point_2_user_1], [data_point_1_user_2,data_point_2_user_2]  ] 
+    """
+
+    stats = []
+    for user in features:
+        s = bob.learn.em.GMMStats(ubm.shape[0], ubm.shape[1])
+        for f in user:
+            ubm.acc_statistics(f, s)
+        stats.append(s)
+
+    subspace_dimension_of_t = 2
+
+    ivector_trainer = bob.learn.em.IVectorTrainer(update_sigma=True)
+    ivector_machine = bob.learn.em.IVectorMachine(ubm, subspace_dimension_of_t, 10e-5)
+
+    # train IVector model
+    bob.learn.em.train(ivector_trainer, ivector_machine, stats, 500)
+
+    return ivector_machine
+
+
+def acc_stats(data, gmm):
+    gmm_stats = []
+    for d in data:
+        s = bob.learn.em.GMMStats(gmm.shape[0], gmm.shape[1])
+        gmm.acc_statistics(d, s)
+        gmm_stats.append(s)
+
+    return gmm_stats
+
+
+def compute_ivectors(gmm_stats, ivector_machine):
+
+    ivectors = []
+    for g in gmm_stats:
+        ivectors.append(ivector_machine(g))
+
+    return numpy.array(ivectors)
+
+### GENERATING DATA
+data_per_class = bob.db.iris.data()
+setosa = numpy.column_stack((data_per_class['setosa'][:, 0], data_per_class['setosa'][:, 3]))
+versicolor = numpy.column_stack((data_per_class['versicolor'][:, 0], data_per_class['versicolor'][:, 3]))
+virginica = numpy.column_stack((data_per_class['virginica'][:, 0], data_per_class['virginica'][:, 3]))
+data = numpy.vstack((setosa, versicolor, virginica))
+
+# TRAINING THE PRIOR
+ubm = train_ubm(data, 3)
+ivector_machine = ivector_train([setosa, versicolor, virginica], ubm)
+
+##  Variability direction U
+#t0 = T[0:2, 0] / numpy.linalg.norm(T[0:2, 0])
+#t1 = T[2:4, 0] / numpy.linalg.norm(T[2:4, 0])
+#t2 = T[4:6, 0] / numpy.linalg.norm(T[4:6, 0])
+
+
+#figure, ax = plt.subplots()
+figure = plt.subplot(2, 1, 1)
+ax = figure.axes
+plt.title("Raw fetures")
+plt.scatter(setosa[:, 0], setosa[:, 1], c="darkcyan", label="setosa")
+plt.scatter(versicolor[:, 0], versicolor[:, 1], c="goldenrod", label="versicolor")
+plt.scatter(virginica[:, 0], virginica[:, 1], c="dimgrey", label="virginica")
+# plt.grid(True)
+#plt.xlabel('Sepal length')
+plt.ylabel('Petal width')
+plt.legend(loc=2)
+plt.ylim([-1, 3.5])
+ax.set_xticklabels("" for item in ax.get_xticklabels())
+ax.set_yticklabels("" for item in ax.get_yticklabels())
+
+
+
+figure = plt.subplot(2, 1, 2)
+ax = figure.axes
+ivector_setosa = compute_ivectors(acc_stats(setosa, ubm), ivector_machine)
+ivector_versicolor = compute_ivectors(acc_stats(versicolor, ubm), ivector_machine)
+ivector_virginica = compute_ivectors(acc_stats(virginica, ubm), ivector_machine)
+
+
+# Whitening iVectors
+whitening_trainer = bob.learn.linear.WhiteningTrainer()
+whitener_machine = bob.learn.linear.Machine(ivector_setosa.shape[1], ivector_setosa.shape[1])
+whitening_trainer.train(numpy.vstack((ivector_setosa, ivector_versicolor, ivector_virginica)), whitener_machine)
+ivector_setosa = whitener_machine(ivector_setosa)
+ivector_versicolor = whitener_machine(ivector_versicolor)
+ivector_virginica = whitener_machine(ivector_virginica)
+
+
+#LDA ivectors
+lda_trainer = bob.learn.linear.FisherLDATrainer()
+lda_machine = bob.learn.linear.Machine(ivector_setosa.shape[1], ivector_setosa.shape[1])
+lda_trainer.train([ivector_setosa, ivector_versicolor, ivector_virginica], lda_machine)
+ivector_setosa = lda_machine(ivector_setosa)
+ivector_versicolor = lda_machine(ivector_versicolor)
+ivector_virginica = lda_machine(ivector_virginica)
+
+
+#WCCN ivectors
+#wccn_trainer = bob.learn.linear.WCCNTrainer()
+#wccn_machine = bob.learn.linear.Machine(ivector_setosa.shape[1], ivector_setosa.shape[1])
+#wccn_trainer.train([ivector_setosa, ivector_versicolor, ivector_virginica], wccn_machine)
+#ivector_setosa = wccn_machine(ivector_setosa)
+#ivector_versicolor = wccn_machine(ivector_versicolor)
+#ivector_virginica = wccn_machine(ivector_virginica)
+
+
+plt.title("First two ivectors")
+plt.scatter(ivector_setosa[:, 0], ivector_setosa[:, 1], c="darkcyan", label="setosa", marker="x")
+plt.scatter(ivector_versicolor[:, 0], ivector_versicolor[:, 1], c="goldenrod", label="versicolor", marker="x")
+plt.scatter(ivector_virginica[:, 0], ivector_virginica[:, 1], c="dimgrey", label="virginica", marker="x")
+
+ax.set_xticklabels("" for item in ax.get_xticklabels())
+ax.set_yticklabels("" for item in ax.get_yticklabels())
+
+# plt.grid(True)
+#plt.xlabel('Sepal length')
+#plt.ylabel('Petal width')
+plt.legend(loc=2)
+plt.ylim([-1, 3.5])
+
+plt.show()
diff --git a/doc/plot/plot_kmeans.py b/doc/plot/plot_kmeans.py
new file mode 100644
index 0000000000000000000000000000000000000000..e8f0bccbd2f91ff7f87f88509f4dd603307c6657
--- /dev/null
+++ b/doc/plot/plot_kmeans.py
@@ -0,0 +1,28 @@
+import bob.learn.em
+import bob.db.iris
+import numpy
+from matplotlib import pyplot
+
+data_per_class = bob.db.iris.data()
+setosa = numpy.column_stack((data_per_class['setosa'][:, 0], data_per_class['setosa'][:, 3]))
+versicolor = numpy.column_stack((data_per_class['versicolor'][:, 0], data_per_class['versicolor'][:, 3]))
+virginica = numpy.column_stack((data_per_class['virginica'][:, 0], data_per_class['virginica'][:, 3]))
+
+data = numpy.vstack((setosa, versicolor, virginica))
+
+# Training KMeans
+machine = bob.learn.em.KMeansMachine(3, 2) # Two clusters with a feature dimensionality of 3
+trainer = bob.learn.em.KMeansTrainer()
+bob.learn.em.train(trainer, machine, data, max_iterations = 200, convergence_threshold = 1e-5) # Train the KMeansMachine
+
+# Plotting
+figure, ax = pyplot.subplots()
+pyplot.scatter(setosa[:, 0], setosa[:, 1], c="darkcyan", label="setosa")
+pyplot.scatter(versicolor[:, 0], versicolor[:, 1], c="goldenrod", label="versicolor")
+pyplot.scatter(virginica[:, 0], virginica[:, 1], c="dimgrey", label="virginica")
+pyplot.scatter(machine.means[:, 0], machine.means[:, 1], c="blue", marker="x", label="centroids", s=60)
+pyplot.legend()
+ax.set_xticklabels("" for item in ax.get_xticklabels())
+ax.set_yticklabels("" for item in ax.get_yticklabels())
+ax.set_xlabel("Sepal length")
+ax.set_ylabel("Petal width")
diff --git a/test-requirements.txt b/test-requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a72389c3271b9d0c859caa1118af99c3dd21cd3c
--- /dev/null
+++ b/test-requirements.txt
@@ -0,0 +1 @@
+bob.db.iris
\ No newline at end of file