diff --git a/doc/c_cpp_api.rst b/doc/c_cpp_api.rst
deleted file mode 100644
index 2f4526afd78cba709d1560f251b9a73ad915b7db..0000000000000000000000000000000000000000
--- a/doc/c_cpp_api.rst
+++ /dev/null
@@ -1,121 +0,0 @@
-.. vim: set fileencoding=utf-8 :
-.. Andre Anjos <andre.dos.anjos@gmail.com>
-.. Tue 15 Oct 14:59:05 2013
-
-=========
- C++ API
-=========
-
-The C++ API of ``xbob.learn.activation`` allows users to leverage from
-automatic converters for classes in :py:class:`xbob.learn.activation`.  To use
-the C API, clients should first, include the header file
-``<xbob.learn.activation/api.h>`` on their compilation units and then, make
-sure to call once ``import_xbob_learn_activation()`` at their module
-instantiation, as explained at the `Python manual
-<http://docs.python.org/2/extending/extending.html#using-capsules>`_.
-
-Here is a dummy C example showing how to include the header and where to call
-the import function:
-
-.. code-block:: c++
-
-   #include <xbob.learn.activation/api.h>
-
-   PyMODINIT_FUNC initclient(void) {
-
-     PyObject* m Py_InitModule("client", ClientMethods);
-
-     if (!m) return;
-
-     // imports dependencies
-     if (import_xbob_blitz() < 0) {
-       PyErr_Print();
-       PyErr_SetString(PyExc_ImportError, "cannot import module");
-       return 0;
-     }
-
-     if (import_xbob_io_base() < 0) {
-       PyErr_Print();
-       PyErr_SetString(PyExc_ImportError, "cannot import module");
-       return 0;
-     }
-
-     if (import_xbob_learn_activation() < 0) {
-       PyErr_Print();
-       PyErr_SetString(PyExc_ImportError, "cannot import module");
-       return 0;
-     }
-
-     // imports xbob.learn.activation C-API
-     import_xbob_learn_activation();
-
-   }
-
-.. note::
-
-  The include directory can be discovered using
-  :py:func:`xbob.learn.activation.get_include`.
-
-Activation Functors
--------------------
-
-.. cpp:type:: PyBobLearnActivationObject
-
-   The pythonic object representation for a ``bob::machine::Activation``
-   object. It is the base class of all activation functors available in
-   |project|. In C/C++ code, we recommend you only manipulate objects like this
-   to keep your code agnostic to the activation type being used.
-
-   .. code-block:: cpp
-
-      typedef struct {
-        PyObject_HEAD
-        bob::machine::Activation* base;
-      } PyBobLearnActivationObject;
-
-   .. cpp:member:: bob::machine::Activation* base
-
-      A pointer to the activation functor virtual implementation.
-
-
-.. cpp:function:: int PyBobLearnActivation_Check(PyObject* o)
-
-   Checks if the input object ``o`` is a ``PyBobLearnActivationObject``.
-   Returns ``1`` if it is, and ``0`` otherwise.
-
-
-.. cpp:function:: PyObject* PyBobLearnActivation_NewFromActivation(boost::shared_ptr<bob::machine::Activation> a)
-
-   Constructs a new :c:type:`PyBobLearnActivationObject` starting from a shared
-   pointer to a pre-allocated `bob::machine::Activation` instance. This API is
-   available so that return values from actuall C++ machines can be mapped into
-   Python. It is the sole way to build an object of type :py:class:`Activation`
-   without recurring to the derived classes.
-
-.. note::
-
-   Other object definitions exist for each of the specializations for
-   activation functors found in |project|. They are exported through the module
-   C-API, but we don't recommend using them since you'd loose generality. In
-   case you do absolutely need to use any of these derivations, they have all
-   the same object configuration:
-
-   .. code-block:: c++
-
-      typedef struct {
-        PyBobLearnActivationObject parent;
-        bob::machine::<Subtype>Activation* base;
-      } PyBobLearn<Subtype>ActivationObject;
-
-   Presently, ``<Subtype>`` can be one of:
-
-     * Identity
-     * Linear
-     * Logistic
-     * HyperbolicTangent
-     * MultipliedHyperbolicTangent
-
-   Type objects are also named consistently like
-   ``PyBobLearn<Subtype>Activation_Type``.
-
-.. include:: links.rst
diff --git a/doc/conf.py b/doc/conf.py
index 9ff3a0fcfc3ea255d6d78df61d3cd6b55fd143b0..8ebae82c42647139db4c7a7b6df6a309704536a5 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -58,12 +58,12 @@ source_suffix = '.rst'
 master_doc = 'index'
 
 # General information about the project.
-project = u'xbob.learn.activation'
+project = u'xbob.learn.misc'
 import time
 copyright = u'%s, Idiap Research Institute' % time.strftime('%Y')
 
 # Grab the setup entry
-distribution = pkg_resources.require('xbob.learn.activation')[0]
+distribution = pkg_resources.require('xbob.learn.misc')[0]
 
 # The version info for the project you're documenting, acts as replacement for
 # |version| and |release|, also used in various other places throughout the
@@ -129,7 +129,7 @@ if sphinx.__version__ >= "1.0":
 #html_title = None
 
 # A shorter title for the navigation bar.  Default is the same as html_title.
-#html_short_title = 'xbob_learn_activation'
+#html_short_title = 'xbob_learn_misc'
 
 # The name of an image file (relative to this directory) to place at the top
 # of the sidebar.
@@ -187,7 +187,7 @@ html_favicon = 'img/favicon.ico'
 #html_file_suffix = None
 
 # Output file base name for HTML help builder.
-htmlhelp_basename = 'xbob_learn_activation_doc'
+htmlhelp_basename = 'xbob_learn_misc_doc'
 
 
 # -- Options for LaTeX output --------------------------------------------------
@@ -201,7 +201,7 @@ latex_font_size = '10pt'
 # Grouping the document tree into LaTeX files. List of tuples
 # (source start file, target name, title, author, documentclass [howto/manual]).
 latex_documents = [
-  ('index', 'xbob_learn_activation.tex', u'Bob Activation Functions',
+  ('index', 'xbob_learn_misc.tex', u'Bob Miscellaneous Machine Learning Tools',
    u'Biometrics Group, Idiap Research Institute', 'manual'),
 ]
 
@@ -241,7 +241,7 @@ rst_epilog = """
 # One entry per manual page. List of tuples
 # (source start file, name, description, authors, manual section).
 man_pages = [
-    ('index', 'xbob_learn_activation', u'Bob Activation Function Documentation', [u'Idiap Research Institute'], 1)
+    ('index', 'xbob_learn_misc', u'Bob Miscellaneous Machine Learning Tools', [u'Idiap Research Institute'], 1)
 ]
 
 # Default processing flags for sphinx
diff --git a/doc/guide.rst b/doc/guide.rst
new file mode 100644
index 0000000000000000000000000000000000000000..1c67b691b1bbe61fe2090c6034aadf804341a343
--- /dev/null
+++ b/doc/guide.rst
@@ -0,0 +1,777 @@
+.. vim: set fileencoding=utf-8 :
+.. Laurent El Shafey <Laurent.El-Shafey@idiap.ch>
+.. Wed Mar 14 12:31:35 2012 +0100
+..
+.. Copyright (C) 2011-2014 Idiap Research Institute, Martigny, Switzerland
+
+.. testsetup:: *
+
+   import numpy
+   numpy.set_printoptions(precision=3, suppress=True)
+
+   import xbob.learn.misc
+
+   import os
+   import tempfile
+   current_directory = os.path.realpath(os.curdir)
+   temp_dir = tempfile.mkdtemp(prefix='xbob_doctest_')
+   os.chdir(temp_dir)
+
+============
+ User guide
+============
+
+This section includes the machine/trainer guides for learning techniques
+available in this package.
+
+Machines
+--------
+
+Machines are one of the core components of |project|. They represent
+statistical models or other functions defined by parameters that can be learnt
+or set by using Trainers.
+
+K-means machines
+================
+
+`k-means <http://en.wikipedia.org/wiki/K-means_clustering>`_ is a clustering
+method which aims to partition a set of observations into :math:`k` clusters.
+The `training` procedure is described further below. Here, we explain only how
+to use the resulting machine. For the sake of example, we create a new
+:py:class:`xbob.learn.misc.KMeansMachine` as follows:
+
+.. doctest::
+   :options: +NORMALIZE_WHITESPACE
+
+   >>> machine = xbob.learn.misc.KMeansMachine(2,3) # Two clusters with a feature dimensionality of 3
+   >>> machine.means = numpy.array([[1,0,0],[0,0,1]], 'float64') # Defines the two clusters
+
+Then, given some input data, it is possible to determine to which cluster the
+data is the closest as well as the min distance.
+
+.. doctest::
+   :options: +NORMALIZE_WHITESPACE
+
+   >>> sample = numpy.array([2,1,-2], 'float64')
+   >>> print(machine.get_closest_mean(sample)) # Returns the index of the closest mean and the distance to it at the power of 2
+   (0, 6.0)
+
+
+Gaussian machines
+=================
+
+The :py:class:`xbob.learn.misc.Gaussian` represents a `multivariate diagonal
+Gaussian (or normal) distribution
+<http://en.wikipedia.org/wiki/Multivariate_normal_distribution>`_. In this
+context, a *diagonal* Gaussian refers to the covariance matrix of the
+distribution being diagonal. When the covariance matrix is diagonal, each
+variable in the distribution is independent of the others.
+
+Objects of this class are normally used as building blocks for more complex
+:py:class:`xbob.learn.misc.GMMMachine` or GMM objects, but can also be used
+individually. Here is how to create one multivariate diagonal Gaussian
+distribution:
+
+.. doctest::
+
+  >>> g = xbob.learn.misc.Gaussian(2) #bi-variate diagonal normal distribution
+  >>> g.mean = numpy.array([0.3, 0.7], 'float64')
+  >>> g.mean
+  array([ 0.3,  0.7])
+  >>> g.variance = numpy.array([0.2, 0.1], 'float64')
+  >>> g.variance
+  array([ 0.2,  0.1])
+
+Once the :py:class:`xbob.learn.misc.Gaussian` has been set, you can use it to
+estimate the log-likelihood of an input feature vector with a matching number
+of dimensions:
+
+.. doctest::
+
+  >>> log_likelihood = g(numpy.array([0.4, 0.4], 'float64'))
+
+As with other machines you can save and re-load machines of this type using
+:py:meth:`xbob.learn.misc.Gaussian.save` and the class constructor
+respectively.
+
+Gaussian mixture models
+=======================
+
+The :py:class:`xbob.learn.misc.GMMMachine` represents a Gaussian `mixture model
+<http://en.wikipedia.org/wiki/Mixture_model>`_ (GMM), which consists of a
+mixture of weighted :py:class:`xbob.learn.misc.Gaussian`\s.
+
+.. doctest::
+
+  >>> gmm = xbob.learn.misc.GMMMachine(2,3) # Mixture of two diagonal Gaussian of dimension 3
+
+By default, the diagonal Gaussian distributions of the GMM are initialized with
+zero mean and unit variance, and the weights are identical. This can be updated
+using the :py:attr:`xbob.learn.misc.GMMMachine.means`,
+:py:attr:`xbob.learn.misc.GMMMachine.variances` or
+:py:attr:`xbob.learn.misc.GMMMachine.weights`.
+
+.. doctest::
+  :options: +NORMALIZE_WHITESPACE
+
+  >>> gmm.weights = numpy.array([0.4, 0.6], 'float64')
+  >>> gmm.means = numpy.array([[1, 6, 2], [4, 3, 2]], 'float64')
+  >>> gmm.variances = numpy.array([[1, 2, 1], [2, 1, 2]], 'float64')
+  >>> gmm.means
+  array([[ 1.,  6.,  2.],
+       [ 4.,  3.,  2.]])
+
+Once the :py:class:`xbob.learn.misc.GMMMachine` has been set, you can use it to
+estimate the log-likelihood of an input feature vector with a matching number
+of dimensions:
+
+.. doctest::
+
+  >>> log_likelihood = gmm(numpy.array([5.1, 4.7, -4.9], 'float64'))
+
+As with other machines you can save and re-load machines of this type using
+:py:meth:`xbob.learn.misc.GMMMachine.save` and the class constructor respectively.
+
+Gaussian mixture models Statistics
+==================================
+
+The :py:class:`xbob.learn.misc.GMMStats` is a container for the sufficient
+statistics of a GMM distribution.
+
+Given a GMM, the sufficient statistics of a sample can be computed as
+follows:
+
+.. doctest::
+  :options: +NORMALIZE_WHITESPACE
+
+  >>> gs = xbob.learn.misc.GMMStats(2,3)
+  >>> sample = numpy.array([0.5, 4.5, 1.5])
+  >>> gmm.acc_statistics(sample, gs)
+  >>> print(gs) # doctest: +SKIP
+
+Then, the sufficient statistics can be accessed (or set as below), by
+considering the following attributes.
+
+.. doctest::
+  :options: +NORMALIZE_WHITESPACE
+
+  >>> gs = xbob.learn.misc.GMMStats(2,3)
+  >>> log_likelihood = -3. # log-likelihood of the accumulated samples
+  >>> T = 1 # Number of samples used to accumulate statistics
+  >>> n = numpy.array([0.4, 0.6], 'float64') # zeroth order stats
+  >>> sumpx = numpy.array([[1., 2., 3.], [4., 5., 6.]], 'float64') # first order stats
+  >>> sumpxx = numpy.array([[10., 20., 30.], [40., 50., 60.]], 'float64') # second order stats
+  >>> gs.log_likelihood = log_likelihood
+  >>> gs.t = T
+  >>> gs.n = n
+  >>> gs.sum_px = sumpx
+  >>> gs.sum_pxx = sumpxx
+
+Joint Factor Analysis
+=====================
+
+Joint Factor Analysis (JFA) [1]_ [2]_ is a session variability modelling
+technique built on top of the Gaussian mixture modelling approach. It utilises
+a within-class subspace :math:`U`, a between-class subspace :math:`V`, and a
+subspace for the residuals :math:`D` to capture and suppress a significant
+portion of between-class variation.
+
+An instance of :py:class:`xbob.learn.misc.JFABase` carries information about
+the matrices :math:`U`, :math:`V` and :math:`D`, which can be shared between
+several classes.  In contrast, after the enrolment phase, an instance of
+:py:class:`xbob.learn.misc.JFAMachine` carries class-specific information about
+the latent variables :math:`y` and :math:`z`.
+
+An instance of :py:class:`xbob.learn.misc.JFABase` can be initialized as
+follows, given an existing GMM:
+
+.. doctest::
+  :options: +NORMALIZE_WHITESPACE
+
+  >>> jfa_base = xbob.learn.misc.JFABase(gmm,2,2) # dimensions of U and V are both equal to 2
+  >>> U = numpy.array([[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], [11, 12]], 'float64')
+  >>> V = numpy.array([[6, 5], [4, 3], [2, 1], [1, 2], [3, 4], [5, 6]], 'float64')
+  >>> d = numpy.array([0, 1, 0, 1, 0, 1], 'float64')
+  >>> jfa_base.u = U
+  >>> jfa_base.v = V
+  >>> jfa_base.d = d
+
+Next, this :py:class:`xbob.learn.misc.JFABase` can be shared by several
+instances of :py:class:`xbob.learn.misc.JFAMachine`, the initialization being
+as follows:
+
+.. doctest::
+  :options: +NORMALIZE_WHITESPACE
+
+  >>> m = xbob.learn.misc.JFAMachine(jfa_base)
+  >>> m.y = numpy.array([1,2], 'float64')
+  >>> m.z = numpy.array([3,4,1,2,0,1], 'float64')
+
+
+Once the :py:class:`xbob.learn.misc.JFAMachine` has been configured for a
+specific class, the log-likelihood (score) that an input sample belongs to the
+enrolled class, can be estimated, by first computing the GMM sufficient
+statistics of this input sample, and then calling the
+:py:meth:`xbob.learn.misc.JFAMachine:forward` on the sufficient statistics.
+
+.. doctest::
+  :options: +NORMALIZE_WHITESPACE
+
+  >>> gs = xbob.learn.misc.GMMStats(2,3)
+  >>> gmm.acc_statistics(sample, gs)
+  >>> score = m.forward(gs)
+
+As with other machines you can save and re-load machines of this type using
+:py:meth:`xbob.learn.misc.JFAMachine.save` and the class constructor
+respectively.
+
+
+Inter-Session Variability
+=========================
+
+Similarly to Joint Factor Analysis, Inter-Session Variability (ISV) modelling
+[3]_ [2]_ is another session variability modelling technique built on top of
+the Gaussian mixture modelling approach. It utilises a within-class subspace
+:math:`U` and a subspace for the residuals :math:`D` to capture and suppress a
+significant portion of between-class variation. The main difference compared to
+JFA is the absence of the between-class subspace :math:`V`.
+
+Similarly to JFA, an instance of :py:class:`xbob.learn.misc.JFABase` carries
+information about the matrices :math:`U` and :math:`D`, which can be shared
+between several classes, whereas an instance of
+:py:class:`xbob.learn.misc.JFAMachine` carries class-specific information about
+the latent variable :math:`z`.
+
+An instance of :py:class:`xbob.learn.misc.ISVBase` can be initialized as
+follows, given an existing GMM:
+
+.. doctest::
+  :options: +NORMALIZE_WHITESPACE
+
+  >>> isv_base = xbob.learn.misc.ISVBase(gmm,2) # dimension of U is equal to 2
+  >>> isv_base.u = U
+  >>> isv_base.d = d
+
+Next, this :py:class:`xbob.learn.misc.ISVBase` can be shared by several
+instances of :py:class:`xbob.learn.misc.ISVMachine`, the initialization being
+as follows:
+
+.. doctest::
+  :options: +NORMALIZE_WHITESPACE
+
+  >>> m = xbob.learn.misc.ISVMachine(isv_base)
+  >>> m.z = numpy.array([3,4,1,2,0,1], 'float64')
+
+Once the :py:class:`xbob.learn.misc.ISVMachine` has been configured for a
+specific class, the log-likelihood (score) that an input sample belongs to the
+enrolled class, can be estimated, by first computing the GMM sufficient
+statistics of this input sample, and then calling the
+:py:meth:`xbob.learn.misc.ISVMachine:forward` on the sufficient statistics.
+
+.. doctest::
+  :options: +NORMALIZE_WHITESPACE
+
+  >>> gs = xbob.learn.misc.GMMStats(2,3)
+  >>> gmm.acc_statistics(sample, gs)
+  >>> score = m.forward(gs)
+
+As with other machines you can save and re-load machines of this type using
+:py:meth:`xbob.learn.misc.ISVMachine.save` and the class constructor
+respectively.
+
+
+Total Variability (i-vectors)
+=============================
+
+Total Variability (TV) modelling [4]_ is a front-end initially introduced for
+speaker recognition, which aims at describing samples by vectors of low
+dimensionality called ``i-vectors``. The model consists of a subspace :math:`T`
+and a residual diagonal covariance matrix :math:`\Sigma`, that are then used to
+extract i-vectors, and is built upon the GMM approach.
+
+An instance of the class :py:class:`xbob.learn.misc.IVectorMachine` carries
+information about these two matrices. This can be initialized as follows:
+
+.. doctest::
+  :options: +NORMALIZE_WHITESPACE
+
+  >>> m = xbob.learn.misc.IVectorMachine(gmm, 2)
+  >>> m.t = numpy.array([[1.,2],[4,1],[0,3],[5,8],[7,10],[11,1]])
+  >>> m.sigma = numpy.array([1.,2.,1.,3.,2.,4.])
+
+
+Once the :py:class:`xbob.learn.misc.IVectorMachine` has been set, the
+extraction of an i-vector :math:`w_ij` can be done in two steps, by first
+extracting the GMM sufficient statistics, and then estimating the i-vector:
+
+.. doctest::
+  :options: +NORMALIZE_WHITESPACE
+
+  >>> gs = xbob.learn.misc.GMMStats(2,3)
+  >>> gmm.acc_statistics(sample, gs)
+  >>> w_ij = m.forward(gs)
+
+As with other machines you can save and re-load machines of this type using
+:py:meth:`xbob.learn.misc.IVectorMachine.save` and the class constructor
+respectively.
+
+
+Probabilistic Linear Discriminant Analysis (PLDA)
+=================================================
+
+Probabilistic Linear Discriminant Analysis [5]_ [6]_ is a probabilistic model
+that incorporates components describing both between-class and within-class
+variations. Given a mean :math:`\mu`, between-class and within-class subspaces
+:math:`F` and :math:`G` and residual noise :math:`\epsilon` with zero mean and
+diagonal covariance matrix :math:`\Sigma`, the model assumes that a sample
+:math:`x_{i,j}` is generated by the following process:
+
+.. math::
+
+   x_{i,j} = \mu + F h_{i} + G w_{i,j} + \epsilon_{i,j}
+
+Information about a PLDA model (:math:`\mu`, :math:`F`, :math:`G` and
+:math:`\Sigma`) are carried out by an instance of the class
+:py:class:`xbob.learn.misc.PLDABase`.
+
+.. doctest::
+
+   >>> ### This creates a PLDABase container for input feature of dimensionality 3,
+   >>> ### and with subspaces F and G of rank 1 and 2 respectively.
+   >>> pldabase = xbob.learn.misc.PLDABase(3,1,2)
+
+Class-specific information (usually from enrollment samples) are contained in
+an instance of :py:class:`xbob.learn.misc.PLDAMachine`, that must be attached
+to a given :py:class:`xbob.learn.misc.PLDABase`. Once done, log-likelihood
+computations can be performed.
+
+.. doctest::
+
+   >>> plda = xbob.learn.misc.PLDAMachine(pldabase)
+   >>> samples = numpy.array([[3.5,-3.4,102], [4.5,-4.3,56]], dtype=numpy.float64)
+   >>> loglike = plda.compute_log_likelihood(samples)
+
+
+Trainers
+--------
+
+In the previous section, the concept of a `machine` was introduced. A `machine`
+is fed by some input data, processes it and returns an output. Machines can be
+learnt using trainers in |project|.
+
+K-means
+=======
+
+**k-means** [7]_ is a clustering method, which aims to partition a set of
+observations into :math:`k` clusters. This is an `unsupervised` technique. As
+for **PCA** [1]_, the training data is passed in a 2D :py:class:`numpy.ndarray`
+container.
+
+.. doctest::
+   :options: +NORMALIZE_WHITESPACE
+
+   >>> data = numpy.array([[3,-3,100], [4,-4,98], [3.5,-3.5,99], [-7,7,-100], [-5,5,-101]], dtype='float64')
+
+The training procedure will learn the `means` for the
+:py:class:`xbob.learn.misc.KMeansMachine`. The number :math:`k` of `means` is given
+when creating the `machine`, as well as the dimensionality of the features.
+
+.. doctest::
+   :options: +NORMALIZE_WHITESPACE
+
+   >>> kmeans = xbob.learn.misc.KMeansMachine(2, 3) # Create a machine with k=2 clusters with a dimensionality equal to 3
+
+Then training procedure for `k-means` is an **Expectation-Maximization**-based
+[8]_ algorithm. There are several options that can be set such as the maximum
+number of iterations and the criterion used to determine if the convergence has
+occurred. After setting all of these options, the training procedure can then
+be called.
+
+.. doctest::
+   :options: +NORMALIZE_WHITESPACE
+
+   >>> kmeansTrainer = xbob.learn.misc.KMeansTrainer()
+   >>> kmeansTrainer.max_iterations = 200
+   >>> kmeansTrainer.convergence_threshold = 1e-5
+
+   >>> kmeansTrainer.train(kmeans, data) # Train the KMeansMachine
+   >>> print(kmeans.means)
+   [[ -6.   6.  -100.5]
+    [  3.5 -3.5   99. ]]
+
+
+Maximum likelihood for Gaussian mixture model
+=============================================
+
+A Gaussian **mixture model** (GMM) [9]_ is a common probabilistic model. In
+order to train the parameters of such a model it is common to use a
+**maximum-likelihood** (ML) approach [10]_. To do this we use an
+**Expectation-Maximization** (EM) algorithm [8]_. Let's first start by creating
+a :py:class:`xbob.learn.misc.GMMMachine`. By default, all of the Gaussian's have
+zero-mean and unit variance, and all the weights are equal. As a starting
+point, we could set the mean to the one obtained with **k-means** [7]_.
+
+.. doctest::
+   :options: +NORMALIZE_WHITESPACE
+
+   >>> gmm = xbob.learn.misc.GMMMachine(2,3) # Create a machine with 2 Gaussian and feature dimensionality 3
+   >>> gmm.means = kmeans.means # Set the means to the one obtained with k-means
+
+The |project| class to learn the parameters of a GMM [9]_ using ML [10]_ is
+:py:class:`xbob.learn.misc.ML_GMMTrainer`. It uses an **EM**-based [8]_ algorithm
+and requires the user to specify which parameters of the GMM are updated at
+each iteration (means, variances and/or weights). In addition, and as for
+**k-means** [7]_, it has parameters such as the maximum number of iterations
+and the criterion used to determine if the parameters have converged.
+
+.. doctest::
+   :options: +NORMALIZE_WHITESPACE
+
+   >>> trainer = xbob.learn.misc.ML_GMMTrainer(True, True, True) # update means/variances/weights at each iteration
+   >>> trainer.convergence_threshold = 1e-5
+   >>> trainer.max_iterations = 200
+   >>> trainer.train(gmm, data)
+   >>> print(gmm) # doctest: +SKIP
+
+
+MAP-adaptation for Gaussian mixture model
+=========================================
+
+|project| also supports the training of GMMs [9]_ using a **maximum a
+posteriori** (MAP) approach [11]_. MAP is closely related to the ML [10]_
+technique but it incorporates a prior on the quantity that we want to estimate.
+In our case, this prior is a GMM [9]_. Based on this prior model and some
+training data, a new model, the MAP estimate, will be `adapted`.
+
+Let's consider that the previously trained GMM [9]_ is our prior model.
+
+.. doctest::
+   :options: +NORMALIZE_WHITESPACE
+
+   >>> print(gmm) # doctest: +SKIP
+
+The training data used to compute the MAP estimate [11]_ is again stored in a
+2D :py:class:`numpy.ndarray` container.
+
+.. doctest::
+   :options: +NORMALIZE_WHITESPACE
+
+   >>> dataMAP = numpy.array([[7,-7,102], [6,-6,103], [-3.5,3.5,-97]], dtype='float64')
+
+The |project| class used to perform MAP adaptation training [11]_ is
+:py:class:`xbob.learn.misc.MAP_GMMTrainer`. As with the ML estimate [10]_, it uses
+an **EM**-based [8]_ algorithm and requires the user to specify which parts of
+the GMM are adapted at each iteration (means, variances and/or weights). In
+addition, it also has parameters such as the maximum number of iterations and
+the criterion used to determine if the parameters have converged, in addition
+to this there is also a relevance factor which indicates the importance we give
+to the prior.  Once the trainer has been created, a prior GMM [9]_ needs to be
+set.
+
+.. doctest::
+   :options: +NORMALIZE_WHITESPACE
+
+   >>> relevance_factor = 4.
+   >>> trainer = xbob.learn.misc.MAP_GMMTrainer(relevance_factor, True, False, False) # mean adaptation only
+   >>> trainer.convergence_threshold = 1e-5
+   >>> trainer.max_iterations = 200
+   >>> trainer.set_prior_gmm(gmm)
+   True
+   >>> gmmAdapted = xbob.learn.misc.GMMMachine(2,3) # Create a new machine for the MAP estimate
+   >>> trainer.train(gmmAdapted, dataMAP)
+   >>> print(gmmAdapted) # doctest: +SKIP
+
+
+Joint Factor Analysis
+=====================
+
+The training of the subspace :math:`U`, :math:`V` and :math:`D` of a Joint
+Factor Analysis model, is performed in two steps. First, GMM sufficient
+statistics of the training samples should be computed against the UBM GMM. Once
+done, we get a training set of GMM statistics:
+
+.. doctest::
+   :options: +NORMALIZE_WHITESPACE
+
+   >>> F1 = numpy.array( [0.3833, 0.4516, 0.6173, 0.2277, 0.5755, 0.8044, 0.5301, 0.9861, 0.2751, 0.0300, 0.2486, 0.5357]).reshape((6,2))
+   >>> F2 = numpy.array( [0.0871, 0.6838, 0.8021, 0.7837, 0.9891, 0.5341, 0.0669, 0.8854, 0.9394, 0.8990, 0.0182, 0.6259]).reshape((6,2))
+   >>> F=[F1, F2]
+
+   >>> N1 = numpy.array([0.1379, 0.1821, 0.2178, 0.0418]).reshape((2,2))
+   >>> N2 = numpy.array([0.1069, 0.9397, 0.6164, 0.3545]).reshape((2,2))
+   >>> N=[N1, N2]
+
+   >>> gs11 = xbob.learn.misc.GMMStats(2,3)
+   >>> gs11.n = N1[:,0]
+   >>> gs11.sum_px = F1[:,0].reshape(2,3)
+   >>> gs12 = xbob.learn.misc.GMMStats(2,3)
+   >>> gs12.n = N1[:,1]
+   >>> gs12.sum_px = F1[:,1].reshape(2,3)
+
+   >>> gs21 = xbob.learn.misc.GMMStats(2,3)
+   >>> gs21.n = N2[:,0]
+   >>> gs21.sum_px = F2[:,0].reshape(2,3)
+   >>> gs22 = xbob.learn.misc.GMMStats(2,3)
+   >>> gs22.n = N2[:,1]
+   >>> gs22.sum_px = F2[:,1].reshape(2,3)
+
+   >>> TRAINING_STATS = [[gs11, gs12], [gs21, gs22]]
+
+In the following, we will allocate a :py:class:`xbob.learn.misc.JFABase` machine,
+that will then be trained.
+
+.. doctest::
+   :options: +NORMALIZE_WHITESPACE
+
+    >>> jfa_base = xbob.learn.misc.JFABase(gmm, 2, 2) # the dimensions of U and V are both equal to 2
+
+Next, we initialize a trainer, which is an instance of
+:py:class:`xbob.learn.misc.JFATrainer`, as follows:
+
+.. doctest::
+   :options: +NORMALIZE_WHITESPACE
+
+   >>> jfa_trainer = xbob.learn.misc.JFATrainer(10) # 10 is the number of iterations
+
+The training process is started by calling the
+:py:meth:`xbob.learn.misc.JFATrainer.train`.
+
+.. doctest::
+   :options: +NORMALIZE_WHITESPACE
+
+   >>> jfa_trainer.train(jfa_base, TRAINING_STATS)
+
+Once the training is finished (i.e. the subspaces :math:`U`, :math:`V` and
+:math:`D` are estimated), the JFA model can be shared and used by several
+class-specific models.  As for the training samples, we first need to extract
+GMM statistics from the samples.  These GMM statistics are manually defined in
+the following.
+
+.. doctest::
+   :options: +NORMALIZE_WHITESPACE
+
+   >>> Ne = numpy.array([0.1579, 0.9245, 0.1323, 0.2458]).reshape((2,2))
+   >>> Fe = numpy.array([0.1579, 0.1925, 0.3242, 0.1234, 0.2354, 0.2734, 0.2514, 0.5874, 0.3345, 0.2463, 0.4789, 0.5236]).reshape((6,2))
+   >>> gse1 = xbob.learn.misc.GMMStats(2,3)
+   >>> gse1.n = Ne[:,0]
+   >>> gse1.sum_px = Fe[:,0].reshape(2,3)
+   >>> gse2 = xbob.learn.misc.GMMStats(2,3)
+   >>> gse2.n = Ne[:,1]
+   >>> gse2.sum_px = Fe[:,1].reshape(2,3)
+   >>> gse = [gse1, gse2]
+
+Class-specific enrollment can then be perfomed as follows. This will estimate
+the class-specific latent variables :math:`y` and :math:`z`:
+
+.. doctest::
+   :options: +NORMALIZE_WHITESPACE
+
+   >>> m = xbob.learn.misc.JFAMachine(jfa_base)
+   >>> jfa_trainer.enrol(m, gse, 5) # where 5 is the number of enrollment iterations
+
+More information about the training process can be found in [12]_ and [13]_.
+
+
+Inter-Session Variability
+=========================
+
+The training of the subspace :math:`U` and :math:`D` of an Inter-Session
+Variability model, is performed in two steps. As for JFA, GMM sufficient
+statistics of the training samples should be computed against the UBM GMM. Once
+done, we get a training set of GMM statistics.  Next, we will allocate an
+:py:class:`xbob.learn.misc.ISVBase` machine, that will then be trained.
+
+.. doctest::
+   :options: +NORMALIZE_WHITESPACE
+
+    >>> isv_base = xbob.learn.misc.ISVBase(gmm, 2) # the dimensions of U is equal to 2
+
+Next, we initialize a trainer, which is an instance of
+:py:class:`xbob.learn.misc.ISVTrainer`, as follows:
+
+.. doctest::
+   :options: +NORMALIZE_WHITESPACE
+
+   >>> isv_trainer = xbob.learn.misc.ISVTrainer(10, 4.) # 10 is the number of iterations, and 4 is the relevance factor
+
+The training process is started by calling the
+:py:meth:`xbob.learn.misc.ISVTrainer.train`.
+
+.. doctest::
+   :options: +NORMALIZE_WHITESPACE
+
+   >>> isv_trainer.train(isv_base, TRAINING_STATS)
+
+Once the training is finished (i.e. the subspaces :math:`V` and :math:`D` are
+estimated), the ISV model can be shared and used by several class-specific
+models.  As for the training samples, we first need to extract GMM statistics
+from the samples.  Class-specific enrollment can then be perfomed, which will
+estimate the class-specific latent variable :math:`z`:
+
+.. doctest::
+   :options: +NORMALIZE_WHITESPACE
+
+   >>> m = xbob.learn.misc.ISVMachine(isv_base)
+   >>> isv_trainer.enrol(m, gse, 5) # where 5 is the number of iterations
+
+More information about the training process can be found in [14]_ and [13]_.
+
+
+Total Variability (i-vectors)
+=============================
+
+The training of the subspace :math:`T` and :math:`\Sigma` of a Total
+Variability model, is performed in two steps. As for JFA and ISV, GMM
+sufficient statistics of the training samples should be computed against the
+UBM GMM. Once done, we get a training set of GMM statistics.  Next, we will
+allocate an instance of :py:class:`xbob.learn.misc.IVectorMachine`, that will
+then be trained.
+
+.. doctest::
+   :options: +NORMALIZE_WHITESPACE
+
+    >>> m = xbob.learn.misc.IVectorMachine(gmm, 2)
+    >>> m.variance_threshold = 1e-5
+
+
+Next, we initialize a trainer, which is an instance of
+:py:class:`xbob.learn.misc.IVectorTrainer`, as follows:
+
+.. doctest::
+   :options: +NORMALIZE_WHITESPACE
+
+   >>> ivec_trainer = xbob.learn.misc.IVectorTrainer(update_sigma=True, max_iterations=10)
+   >>> TRAINING_STATS_flatten = [gs11, gs12, gs21, gs22]
+
+The training process is started by calling the
+:py:meth:`xbob.learn.misc.IVectorTrainer.train`.
+
+.. doctest::
+   :options: +NORMALIZE_WHITESPACE
+
+   >>> ivec_trainer.train(m, TRAINING_STATS_flatten)
+
+More information about the training process can be found in [15]_.
+
+Probabilistic Linear Discriminant Analysis (PLDA)
+=================================================
+
+Probabilistic Linear Discriminant Analysis [16]_ is a probabilistic model that
+incorporates components describing both between-class and within-class
+variations. Given a mean :math:`\mu`, between-class and within-class subspaces
+:math:`F` and :math:`G` and residual noise :math:`\epsilon` with zero mean and
+diagonal covariance matrix :math:`\Sigma`, the model assumes that a sample
+:math:`x_{i,j}` is generated by the following process:
+
+.. math::
+
+   x_{i,j} = \mu + F h_{i} + G w_{i,j} + \epsilon_{i,j}
+
+
+An Expectaction-Maximization algorithm can be used to learn the parameters of
+this model :math:`\mu`, :math:`F` :math:`G` and :math:`\Sigma`. As these
+parameters can be shared between classes, there is a specific container class
+for this purpose, which is :py:class:`xbob.learn.misc.PLDABase`. The process is
+described in detail in [17]_.
+
+Let us consider a training set of two classes, each with 3 samples of
+dimensionality 3.
+
+.. doctest::
+   :options: +NORMALIZE_WHITESPACE
+
+   >>> data1 = numpy.array([[3,-3,100], [4,-4,50], [40,-40,150]], dtype=numpy.float64)
+   >>> data2 = numpy.array([[3,6,-50], [4,8,-100], [40,79,-800]], dtype=numpy.float64)
+   >>> data = [data1,data2]
+
+Learning a PLDA model can be performed by instantiating the class
+:py:class:`xbob.learn.misc.PLDATrainer`, and calling the
+:py:meth:`xbob.learn.misc.PLDATrainer.train()` method.
+
+.. doctest::
+
+   >>> ### This creates a PLDABase container for input feature of dimensionality 3,
+   >>> ### and with subspaces F and G of rank 1 and 2 respectively.
+   >>> pldabase = xbob.learn.misc.PLDABase(3,1,2)
+
+   >>> trainer = xbob.learn.misc.PLDATrainer()
+   >>> trainer.train(pldabase, data)
+
+Once trained, this PLDA model can be used to compute the log-likelihood of a
+set of samples given some hypothesis. For this purpose, a
+:py:class:`xbob.learn.misc.PLDAMachine` should be instantiated. Then, the
+log-likelihood that a set of samples share the same latent identity variable
+:math:`h_{i}` (i.e. the samples are coming from the same identity/class) is
+obtained by calling the
+:py:meth:`xbob.learn.misc.PLDAMachine.compute_log_likelihood()` method.
+
+.. doctest::
+
+   >>> plda = xbob.learn.misc.PLDAMachine(pldabase)
+   >>> samples = numpy.array([[3.5,-3.4,102], [4.5,-4.3,56]], dtype=numpy.float64)
+   >>> loglike = plda.compute_log_likelihood(samples)
+
+If separate models for different classes need to be enrolled, each of them with
+a set of enrolment samples, then, several instances of
+:py:class:`xbob.learn.misc.PLDAMachine` need to be created and enroled using
+the :py:meth:`xbob.learn.misc.PLDATrainer.enrol()` method as follows.
+
+.. doctest::
+
+   >>> plda1 = xbob.learn.misc.PLDAMachine(pldabase)
+   >>> samples1 = numpy.array([[3.5,-3.4,102], [4.5,-4.3,56]], dtype=numpy.float64)
+   >>> trainer.enrol(plda1, samples1)
+   >>> plda2 = xbob.learn.misc.PLDAMachine(pldabase)
+   >>> samples2 = numpy.array([[3.5,7,-49], [4.5,8.9,-99]], dtype=numpy.float64)
+   >>> trainer.enrol(plda2, samples2)
+
+Afterwards, the joint log-likelihood of the enrollment samples and of one or
+several test samples can be computed as previously described, and this
+separately for each model.
+
+.. doctest::
+
+   >>> sample = numpy.array([3.2,-3.3,58], dtype=numpy.float64)
+   >>> l1 = plda1.compute_log_likelihood(sample)
+   >>> l2 = plda2.compute_log_likelihood(sample)
+
+In a verification scenario, there are two possible hypotheses: 1.
+:math:`x_{test}` and :math:`x_{enrol}` share the same class.  2.
+:math:`x_{test}` and :math:`x_{enrol}` are from different classes.  Using the
+methods :py:meth:`xbob.learn.misc.PLDAMachine:call()` or
+:py:meth:`xbob.learn.misc.PLDAMachine:forward()`, the corresponding
+log-likelihood ratio will be computed, which is defined in more formal way by:
+:math:`s = \ln(P(x_{test},x_{enrol})) - \ln(P(x_{test})P(x_{enrol}))`
+
+.. doctest::
+
+   >>> s1 = plda1(sample)
+   >>> s2 = plda2(sample)
+
+.. testcleanup:: *
+
+  import shutil
+  os.chdir(current_directory)
+  shutil.rmtree(temp_dir)
+
+.. Place here your external references
+.. include:: links.rst
+.. [1] http://dx.doi.org/10.1109/TASL.2006.881693
+.. [2] http://publications.idiap.ch/index.php/publications/show/2606
+.. [3] http://dx.doi.org/10.1016/j.csl.2007.05.003
+.. [4] http://dx.doi.org/10.1109/TASL.2010.2064307
+.. [5] http://dx.doi.org/10.1109/ICCV.2007.4409052
+.. [6] http://doi.ieeecomputersociety.org/10.1109/TPAMI.2013.38
+
+.. [7] http://en.wikipedia.org/wiki/K-means_clustering
+.. [8] http://en.wikipedia.org/wiki/Expectation-maximization_algorithm
+.. [9] http://en.wikipedia.org/wiki/Mixture_model
+.. [10] http://en.wikipedia.org/wiki/Maximum_likelihood
+.. [11] http://en.wikipedia.org/wiki/Maximum_a_posteriori_estimation
+.. [12] http://dx.doi.org/10.1109/TASL.2006.881693
+.. [13] http://publications.idiap.ch/index.php/publications/show/2606
+.. [14] http://dx.doi.org/10.1016/j.csl.2007.05.003
+.. [15] http://dx.doi.org/10.1109/TASL.2010.2064307
+.. [16] http://dx.doi.org/10.1109/ICCV.2007.4409052
+.. [17] http://doi.ieeecomputersociety.org/10.1109/TPAMI.2013.38
+
diff --git a/doc/index.rst b/doc/index.rst
index afd7480446c498f0e513e0459d3350d66366b205..9be410d9dfd84639b616024bcfecd1e0932331f8 100644
--- a/doc/index.rst
+++ b/doc/index.rst
@@ -2,17 +2,16 @@
 .. Andre Anjos <andre.anjos@idiap.ch>
 .. Fri 13 Dec 2013 12:50:06 CET
 ..
-.. Copyright (C) 2011-2013 Idiap Research Institute, Martigny, Switzerland
+.. Copyright (C) 2011-2014 Idiap Research Institute, Martigny, Switzerland
 
-=========================
- Bob Activation Functors
-=========================
+======================================
+ Miscellaneous Machine Learning Tools
+======================================
 
 .. todolist::
 
-This module contains some functionality from Bob bound to Python, available in
-the C++ counter-part ``bob::machine``. It includes Activation functors from the
-Machine Learning core.
+This package includes various machine learning utitilities which have not yet
+been ported into the new framework.
 
 Documentation
 -------------
@@ -20,8 +19,8 @@ Documentation
 .. toctree::
    :maxdepth: 2
 
+   guide
    py_api
-   c_cpp_api
 
 Indices and tables
 ------------------
diff --git a/doc/py_api.rst b/doc/py_api.rst
index 40eff591511e3f7a2ab78ef1e4ecc252683e622c..8cd4ba06be8bf070fb61f10bc06bbaed1f2a7a0a 100644
--- a/doc/py_api.rst
+++ b/doc/py_api.rst
@@ -7,8 +7,8 @@
 ============
 
 This section includes information for using the pure Python API of
-``xbob.learn.activation``.
+``xbob.learn.misc``.
 
 
-.. automodule:: xbob.learn.activation
+.. automodule:: xbob.learn.misc