Commit 61db3868 authored by Amir MOHAMMADI's avatar Amir MOHAMMADI

Merge branch '22-documentation-of-this-package-sucks' into 'master'

Re-write the user guide

See merge request !24
parents 0c54d5dd 942aca46
Pipeline #9754 passed with stages
in 15 minutes and 48 seconds
......@@ -15,5 +15,6 @@ dist
.nfs*
.gdb_history
build
.DS_Store
*.egg
src/
......@@ -15,7 +15,8 @@
static auto GMMMachine_doc = bob::extension::ClassDoc(
BOB_EXT_MODULE_PREFIX ".GMMMachine",
"This class implements a multivariate diagonal Gaussian distribution.",
"This class implements the statistical model for multivariate diagonal mixture Gaussian distribution (GMM). "
"A GMM is defined as :math:`\\sum_{c=0}^{C} \\omega_c \\mathcal{N}(x | \\mu_c, \\sigma_c)`, where :math:`C` is the number of Gaussian components :math:`\\mu_c`, :math:`\\sigma_c` and :math:`\\omega_c` are respectively the the mean, variance and the weight of each gaussian component :math:`c`.",
"See Section 2.3.9 of Bishop, \"Pattern recognition and machine learning\", 2006"
).add_constructor(
bob::extension::FunctionDoc(
......@@ -744,7 +745,7 @@ static PyObject* PyBobLearnEMGMMMachine_loglikelihood_(PyBobLearnEMGMMMachineObj
/*** acc_statistics ***/
static auto acc_statistics = bob::extension::FunctionDoc(
"acc_statistics",
"Accumulate the GMM statistics for this sample(s). Inputs are checked.",
"Accumulate the GMM statistics (:py:class:`bob.learn.em.GMMStats`) for this sample(s). Inputs are checked.",
"",
true
)
......@@ -780,7 +781,7 @@ static PyObject* PyBobLearnEMGMMMachine_accStatistics(PyBobLearnEMGMMMachineObje
/*** acc_statistics_ ***/
static auto acc_statistics_ = bob::extension::FunctionDoc(
"acc_statistics_",
"Accumulate the GMM statistics for this sample(s). Inputs are NOT checked.",
"Accumulate the GMM statistics (:py:class:`bob.learn.em.GMMStats`) for this sample(s). Inputs are NOT checked.",
"",
true
)
......@@ -853,7 +854,7 @@ static PyObject* PyBobLearnEMGMMMachine_setVarianceThresholds_method(PyBobLearnE
/*** get_gaussian ***/
static auto get_gaussian = bob::extension::FunctionDoc(
"get_gaussian",
"Get the specified Gaussian component.",
"Get the specified Gaussian (:py:class:`bob.learn.em.Gaussian`) component.",
".. note:: An exception is thrown if i is out of range.",
true
)
......
......@@ -40,7 +40,7 @@ class KMeansTrainer
/**
* @brief Constructor
*/
KMeansTrainer(InitializationMethod=RANDOM);
KMeansTrainer(InitializationMethod=RANDOM_NO_DUPLICATE);
/**
* @brief Virtualize destructor
......
......@@ -184,7 +184,7 @@ static auto supervector_length = bob::extension::VariableDoc(
"int",
"Returns the supervector length.",
"NGaussians x NInputs: Number of Gaussian components by the feature dimensionality"
"WARNING An exception is thrown if no Universal Background Model has been set yet."
"An exception is thrown if no Universal Background Model has been set yet."
""
);
PyObject* PyBobLearnEMISVBase_getSupervectorLength(PyBobLearnEMISVBaseObject* self, void*) {
......
This diff is collapsed.
......@@ -88,7 +88,7 @@ int list_as_vector(PyObject* list, std::vector<blitz::Array<double,N> >& vec)
static auto ISVTrainer_doc = bob::extension::ClassDoc(
BOB_EXT_MODULE_PREFIX ".ISVTrainer",
"ISVTrainer"
"References: [Vogt2008,McCool2013]",
"Train Intersession varibility modeling :ref:`ISV <isv>`.",
""
).add_constructor(
bob::extension::FunctionDoc(
......
......@@ -15,8 +15,8 @@
static auto IVectorMachine_doc = bob::extension::ClassDoc(
BOB_EXT_MODULE_PREFIX ".IVectorMachine",
"An IVectorMachine consists of a Total Variability subspace :math:`T` and allows the extraction of IVector"
"References: [Dehak2010]_",
"Statistical model for the Total Variability training for more information and explanation see the user guide in documentation (:ref:`iVectors <ivector>`)" // this documentation text is intentionally written to be long!
"",
""
).add_constructor(
bob::extension::FunctionDoc(
......@@ -189,7 +189,7 @@ static auto supervector_length = bob::extension::VariableDoc(
"Returns the supervector length.",
"NGaussians x NInputs: Number of Gaussian components by the feature dimensionality"
"@warning An exception is thrown if no Universal Background Model has been set yet."
"An exception is thrown if no Universal Background Model has been set yet."
""
);
PyObject* PyBobLearnEMIVectorMachine_getSupervectorLength(PyBobLearnEMIVectorMachineObject* self, void*) {
......
......@@ -36,9 +36,9 @@ static int extract_GMMStats_1d(PyObject *list,
static auto IVectorTrainer_doc = bob::extension::ClassDoc(
BOB_EXT_MODULE_PREFIX ".IVectorTrainer",
"IVectorTrainer"
"An IVectorTrainer to learn a Total Variability subspace :math:`$T$`"
" (and eventually a covariance matrix :math:`$\\Sigma$`).",
" References: [Dehak2010]"
"Trains the Total Variability subspace :math:`$T$` to generate :ref:`iVectors <ivector>`."
"",
""
).add_constructor(
bob::extension::FunctionDoc(
"__init__",
......
......@@ -15,8 +15,8 @@
static auto JFABase_doc = bob::extension::ClassDoc(
BOB_EXT_MODULE_PREFIX ".JFABase",
"A JFABase instance can be seen as a container for :math:`U`, :math:`V` and :math:`D` when performing Joint Factor Analysis (JFA).\n\n"
"References: [Vogt2008]_ [McCool2013]_",
"Container for :math:`U`, :math:`V` and :math:`D` when performing Joint Factor Analysis (:ref:`JFA <jfa>`).\n\n"
"",
""
).add_constructor(
bob::extension::FunctionDoc(
......@@ -192,7 +192,7 @@ static auto supervector_length = bob::extension::VariableDoc(
"Returns the supervector length.",
"NGaussians x NInputs: Number of Gaussian components by the feature dimensionality"
"@warning An exception is thrown if no Universal Background Model has been set yet."
"An exception is thrown if no Universal Background Model has been set yet."
""
);
PyObject* PyBobLearnEMJFABase_getSupervectorLength(PyBobLearnEMJFABaseObject* self, void*) {
......
......@@ -173,8 +173,8 @@ static auto supervector_length = bob::extension::VariableDoc(
"int",
"Returns the supervector length.",
"NGaussians x NInputs: Number of Gaussian components by the feature dimensionality"
"@warning An exception is thrown if no Universal Background Model has been set yet."
"NGaussians x NInputs: Number of Gaussian components by the feature dimensionality. "
"An exception is thrown if no Universal Background Model has been set yet."
""
);
PyObject* PyBobLearnEMJFAMachine_getSupervectorLength(PyBobLearnEMJFAMachineObject* self, void*) {
......
......@@ -87,8 +87,8 @@ int list_as_vector(PyObject* list, std::vector<blitz::Array<double,N> >& vec)
static auto JFATrainer_doc = bob::extension::ClassDoc(
BOB_EXT_MODULE_PREFIX ".JFATrainer",
"JFATrainer"
"References: [Vogt2008,McCool2013]",
"Trains a Joint Factor Analysis (:ref:`JFA <jfa>`) on top of GMMs"
"",
""
).add_constructor(
bob::extension::FunctionDoc(
......
......@@ -15,7 +15,7 @@
static auto KMeansMachine_doc = bob::extension::ClassDoc(
BOB_EXT_MODULE_PREFIX ".KMeansMachine",
"This class implements a k-means classifier.\n"
"Statistical model for the :ref:`k-means <kmeans>` .\n"
"See Section 9.1 of Bishop, \"Pattern recognition and machine learning\", 2006"
).add_constructor(
bob::extension::FunctionDoc(
......
......@@ -42,8 +42,8 @@ static inline const std::string& IM2string(bob::learn::em::KMeansTrainer::Initia
static auto KMeansTrainer_doc = bob::extension::ClassDoc(
BOB_EXT_MODULE_PREFIX ".KMeansTrainer",
"Trains a KMeans machine."
"This class implements the expectation-maximization algorithm for a k-means machine."
"Trains a KMeans clustering :ref:`k-means <kmeans>`."
"This class implements the expectation-maximization algorithm for a k-means."
"See Section 9.1 of Bishop, \"Pattern recognition and machine learning\", 2006"
"It uses a random initialization of the means followed by the expectation-maximization algorithm"
).add_constructor(
......
......@@ -71,6 +71,7 @@ static inline bool f(PyObject* o){return o != 0 && PyObject_IsTrue(o) > 0;}
/*** linear_scoring ***/
bob::extension::FunctionDoc linear_scoring1 = bob::extension::FunctionDoc(
"linear_scoring",
"The :ref:`Linear scoring <linearscoring>` is an approximation to the log-likelihood ratio that was shown to be as accurate and up to two orders of magnitude more efficient to compute [Glembek2009]_."
"",
0,
true
......
......@@ -17,7 +17,7 @@ static inline bool f(PyObject* o){return o != 0 && PyObject_IsTrue(o) > 0;} /*
static auto MAP_GMMTrainer_doc = bob::extension::ClassDoc(
BOB_EXT_MODULE_PREFIX ".MAP_GMMTrainer",
"This class implements the maximum a posteriori M-step of the expectation-maximization algorithm for a GMM Machine. The prior parameters are encoded in the form of a GMM (e.g. a universal background model). The EM algorithm thus performs GMM adaptation."
"This class implements the maximum a posteriori (:ref:`MAP <map>`) M-step of the expectation-maximization algorithm for a GMM Machine. The prior parameters are encoded in the form of a GMM (e.g. a universal background model). The EM algorithm thus performs GMM adaptation."
).add_constructor(
bob::extension::FunctionDoc(
"__init__",
......
......@@ -17,7 +17,7 @@ static inline bool f(PyObject* o){return o != 0 && PyObject_IsTrue(o) > 0;} /*
static auto ML_GMMTrainer_doc = bob::extension::ClassDoc(
BOB_EXT_MODULE_PREFIX ".ML_GMMTrainer",
"This class implements the maximum likelihood M-step of the expectation-maximisation algorithm for a GMM Machine."
"This class implements the maximum likelihood M-step (:ref:`MLE <mle>`) of the expectation-maximisation algorithm for a GMM Machine."
).add_constructor(
bob::extension::FunctionDoc(
"__init__",
......
This diff is collapsed.
......@@ -7,112 +7,125 @@
import numpy
import bob.learn.em
import logging
logger = logging.getLogger('bob.learn.em')
def train(trainer, machine, data, max_iterations = 50, convergence_threshold=None, initialize=True, rng=None):
"""
Trains a machine given a trainer and the proper data
**Parameters**:
trainer : one of :py:class:`KMeansTrainer`, :py:class:`MAP_GMMTrainer`, :py:class:`ML_GMMTrainer`, :py:class:`ISVTrainer`, :py:class:`IVectorTrainer`, :py:class:`PLDATrainer`, :py:class:`EMPCATrainer`
A trainer mechanism
machine : one of :py:class:`KMeansMachine`, :py:class:`GMMMachine`, :py:class:`ISVBase`, :py:class:`IVectorMachine`, :py:class:`PLDAMachine`, :py:class:`bob.learn.linear.Machine`
A container machine
data : array_like <float, 2D>
The data to be trained
max_iterations : int
The maximum number of iterations to train a machine
convergence_threshold : float
The convergence threshold to train a machine. If None, the training procedure will stop with the iterations criteria
initialize : bool
If True, runs the initialization procedure
rng : :py:class:`bob.core.random.mt19937`
The Mersenne Twister mt19937 random generator used for the initialization of subspaces/arrays before the EM loop
"""
#Initialization
if initialize:
if rng is not None:
trainer.initialize(machine, data, rng)
else:
trainer.initialize(machine, data)
trainer.e_step(machine, data)
average_output = 0
average_output_previous = 0
if hasattr(trainer,"compute_likelihood"):
average_output = trainer.compute_likelihood(machine)
for i in range(max_iterations):
logger.info("Iteration = %d/%d", i, max_iterations)
average_output_previous = average_output
trainer.m_step(machine, data)
def train(trainer, machine, data, max_iterations=50, convergence_threshold=None, initialize=True, rng=None,
check_inputs=True):
"""
Trains a machine given a trainer and the proper data
**Parameters**:
trainer : one of :py:class:`KMeansTrainer`, :py:class:`MAP_GMMTrainer`, :py:class:`ML_GMMTrainer`, :py:class:`ISVTrainer`, :py:class:`IVectorTrainer`, :py:class:`PLDATrainer`, :py:class:`EMPCATrainer`
A trainer mechanism
machine : one of :py:class:`KMeansMachine`, :py:class:`GMMMachine`, :py:class:`ISVBase`, :py:class:`IVectorMachine`, :py:class:`PLDAMachine`, :py:class:`bob.learn.linear.Machine`
A container machine
data : array_like <float, 2D>
The data to be trained
max_iterations : int
The maximum number of iterations to train a machine
convergence_threshold : float
The convergence threshold to train a machine. If None, the training procedure will stop with the iterations criteria
initialize : bool
If True, runs the initialization procedure
rng : :py:class:`bob.core.random.mt19937`
The Mersenne Twister mt19937 random generator used for the initialization of subspaces/arrays before the EM loop
check_inputs:
Shallow checks in the inputs. Check for inf and NaN
"""
if check_inputs and type(data) is numpy.ndarray:
if numpy.isinf(numpy.sum(data)):
raise ValueError("Please, check your inputs; numpy.inf detected in `data` ")
if numpy.isnan(numpy.sum(data)):
raise ValueError("Please, check your inputs; numpy.nan detected in `data` ")
# Initialization
if initialize:
if rng is not None:
trainer.initialize(machine, data, rng)
else:
trainer.initialize(machine, data)
trainer.e_step(machine, data)
if hasattr(trainer,"compute_likelihood"):
average_output = trainer.compute_likelihood(machine)
if type(machine) is bob.learn.em.KMeansMachine:
logger.info("average euclidean distance = %f", average_output)
else:
logger.info("log likelihood = %f", average_output)
convergence_value = abs((average_output_previous - average_output)/average_output_previous)
logger.info("convergence value = %f",convergence_value)
#Terminates if converged (and likelihood computation is set)
if convergence_threshold!=None and convergence_value <= convergence_threshold:
break
if hasattr(trainer,"finalize"):
trainer.finalize(machine, data)
average_output = 0
average_output_previous = 0
if hasattr(trainer, "compute_likelihood"):
average_output = trainer.compute_likelihood(machine)
for i in range(max_iterations):
logger.info("Iteration = %d/%d", i, max_iterations)
average_output_previous = average_output
trainer.m_step(machine, data)
trainer.e_step(machine, data)
if hasattr(trainer, "compute_likelihood"):
average_output = trainer.compute_likelihood(machine)
if type(machine) is bob.learn.em.KMeansMachine:
logger.info("average euclidean distance = %f", average_output)
else:
logger.info("log likelihood = %f", average_output)
convergence_value = abs((average_output_previous - average_output) / average_output_previous)
logger.info("convergence value = %f", convergence_value)
# Terminates if converged (and likelihood computation is set)
if convergence_threshold != None and convergence_value <= convergence_threshold:
break
if hasattr(trainer, "finalize"):
trainer.finalize(machine, data)
def train_jfa(trainer, jfa_base, data, max_iterations=10, initialize=True, rng=None):
"""
Trains a :py:class:`bob.learn.em.JFABase` given a :py:class:`bob.learn.em.JFATrainer` and the proper data
**Parameters**:
trainer : :py:class:`bob.learn.em.JFATrainer`
A JFA trainer mechanism
jfa_base : :py:class:`bob.learn.em.JFABase`
A container machine
data : [[:py:class:`bob.learn.em.GMMStats`]]
The data to be trained
max_iterations : int
The maximum number of iterations to train a machine
initialize : bool
If True, runs the initialization procedure
rng : :py:class:`bob.core.random.mt19937`
The Mersenne Twister mt19937 random generator used for the initialization of subspaces/arrays before the EM loops
"""
if initialize:
if rng is not None:
trainer.initialize(jfa_base, data, rng)
else:
trainer.initialize(jfa_base, data)
#V Subspace
logger.info("V subspace estimation...")
for i in range(max_iterations):
logger.info("Iteration = %d/%d", i, max_iterations)
trainer.e_step_v(jfa_base, data)
trainer.m_step_v(jfa_base, data)
trainer.finalize_v(jfa_base, data)
#U subspace
logger.info("U subspace estimation...")
for i in range(max_iterations):
logger.info("Iteration = %d/%d", i, max_iterations)
trainer.e_step_u(jfa_base, data)
trainer.m_step_u(jfa_base, data)
trainer.finalize_u(jfa_base, data)
# D subspace
logger.info("D subspace estimation...")
for i in range(max_iterations):
logger.info("Iteration = %d/%d", i, max_iterations)
trainer.e_step_d(jfa_base, data)
trainer.m_step_d(jfa_base, data)
trainer.finalize_d(jfa_base, data)
"""
Trains a :py:class:`bob.learn.em.JFABase` given a :py:class:`bob.learn.em.JFATrainer` and the proper data
**Parameters**:
trainer : :py:class:`bob.learn.em.JFATrainer`
A JFA trainer mechanism
jfa_base : :py:class:`bob.learn.em.JFABase`
A container machine
data : [[:py:class:`bob.learn.em.GMMStats`]]
The data to be trained
max_iterations : int
The maximum number of iterations to train a machine
initialize : bool
If True, runs the initialization procedure
rng : :py:class:`bob.core.random.mt19937`
The Mersenne Twister mt19937 random generator used for the initialization of subspaces/arrays before the EM loops
"""
if initialize:
if rng is not None:
trainer.initialize(jfa_base, data, rng)
else:
trainer.initialize(jfa_base, data)
# V Subspace
logger.info("V subspace estimation...")
for i in range(max_iterations):
logger.info("Iteration = %d/%d", i, max_iterations)
trainer.e_step_v(jfa_base, data)
trainer.m_step_v(jfa_base, data)
trainer.finalize_v(jfa_base, data)
# U subspace
logger.info("U subspace estimation...")
for i in range(max_iterations):
logger.info("Iteration = %d/%d", i, max_iterations)
trainer.e_step_u(jfa_base, data)
trainer.m_step_u(jfa_base, data)
trainer.finalize_u(jfa_base, data)
# D subspace
logger.info("D subspace estimation...")
for i in range(max_iterations):
logger.info("Iteration = %d/%d", i, max_iterations)
trainer.e_step_d(jfa_base, data)
trainer.m_step_d(jfa_base, data)
trainer.finalize_d(jfa_base, data)
......@@ -12,7 +12,7 @@
/*** zt_norm ***/
bob::extension::FunctionDoc zt_norm = bob::extension::FunctionDoc(
"ztnorm",
"Normalise raw scores with ZT-Norm."
"Normalise raw scores with :ref:`ZT-Norm <ztnorm>`."
"Assume that znorm and tnorm have no common subject id.",
0,
true
......@@ -72,7 +72,7 @@ PyObject* PyBobLearnEM_ztNorm(PyObject*, PyObject* args, PyObject* kwargs) {
/*** t_norm ***/
bob::extension::FunctionDoc t_norm = bob::extension::FunctionDoc(
"tnorm",
"Normalise raw scores with T-Norm",
"Normalise raw scores with :ref:`T-Norm <tnorm>`",
0,
true
)
......@@ -109,7 +109,7 @@ PyObject* PyBobLearnEM_tNorm(PyObject*, PyObject* args, PyObject* kwargs) {
/*** z_norm ***/
bob::extension::FunctionDoc z_norm = bob::extension::FunctionDoc(
"znorm",
"Normalise raw scores with Z-Norm",
"Normalise raw scores with :ref:`Z-Norm <znorm>`",
0,
true
)
......
......@@ -25,6 +25,7 @@ extensions = [
'sphinx.ext.intersphinx',
'sphinx.ext.napoleon',
'sphinx.ext.viewcode',
'matplotlib.sphinxext.plot_directive'
]
import sphinx
......
This diff is collapsed.
.. vim: set fileencoding=utf-8 :
.. Tiago de Freitas Pereira <tiago.pereira@idiap.ch>
.. Tue 17 Feb 2015 13:50:06 CET
..
.. Copyright (C) 2011-2014 Idiap Research Institute, Martigny, Switzerland
.. _bob.learn.em:
......@@ -10,12 +6,12 @@
Expectation Maximization Machine Learning Tools
================================================
The EM algorithm is an iterative method that estimates parameters for statistical models, where the model depends on unobserved latent variables. The EM iteration alternates between performing an expectation (E) step, which creates a function for the expectation of the log-likelihood evaluated using the current estimate for the parameters, and a maximization (M) step, which computes parameters maximizing the expected log-likelihood found on the E step. These parameter-estimates are then used to determine the distribution of the latent variables in the next E step [WikiEM]_.
This package is a part of Bob_. It implements a general EM algorithm and
includes implementations of the following algorithms:
The package includes the machine definition per se and a selection of different trainers for specialized purposes:
- K-Means
- Maximum Likelihood (ML)
- Maximum a Posteriori (MAP)
- K-Means
- Inter Session Variability Modelling (ISV)
- Joint Factor Analysis (JFA)
- Total Variability Modeling (iVectors)
......@@ -31,7 +27,7 @@ Documentation
guide
py_api
References
-----------
......@@ -47,7 +43,9 @@ References
.. [Roweis1998] Roweis, Sam. "EM algorithms for PCA and SPCA." Advances in neural information processing systems (1998): 626-632.
.. [WikiEM] `Expectation Maximization <http://en.wikipedia.org/wiki/Expectation%E2%80%93maximization_algorithm>`_
.. [Glembek2009] Glembek, Ondrej, et al. "Comparison of scoring methods used in speaker recognition with joint factor analysis." Acoustics, Speech and Signal Processing, 2009. ICASSP 2009. IEEE International Conference on. IEEE, 2009.
.. [Auckenthaler2000] Auckenthaler, Roland, Michael Carey, and Harvey Lloyd-Thomas. "Score normalization for text-independent speaker verification systems." Digital Signal Processing 10.1 (2000): 42-54.
.. [Mariethoz2005] Mariethoz, Johnny, and Samy Bengio. "A unified framework for score normalization techniques applied to text-independent speaker verification." IEEE signal processing letters 12.7 (2005): 532-535.
Indices and tables
......
......@@ -12,6 +12,7 @@
.. _blitz++: http://www.oonumerics.org/blitz
.. _bob's idiap guide: https://gitlab.idiap.ch/bob/bob/wikis/Using-Bob-at-Idiap
.. _bob's website: https://www.idiap.ch/software/bob
.. _bob: https://www.idiap.ch/software/bob
.. _boost: http://www.boost.org
.. _buildbot: http://trac.buildbot.net
.. _buildout: http://pypi.python.org/pypi/zc.buildout/
......
import bob.db.iris
import bob.learn.em
import bob.learn.linear
import matplotlib.pyplot as plt
import numpy
numpy.random.seed(2) # FIXING A SEED
def train_ubm(features, n_gaussians):
"""
Train UBM
**Parameters**
features: 2D numpy array with the features
n_gaussians: Number of Gaussians
"""
input_size = features.shape[1]
kmeans_machine = bob.learn.em.KMeansMachine(int(n_gaussians), input_size)
ubm = bob.learn.em.GMMMachine(int(n_gaussians), input_size)
# The K-means clustering is firstly used to used to estimate the initial
# means, the final variances and the final weights for each gaussian
# component
kmeans_trainer = bob.learn.em.KMeansTrainer('RANDOM_NO_DUPLICATE')
bob.learn.em.train(kmeans_trainer, kmeans_machine, features)
# Getting the means, weights and the variances for each cluster. This is a
# very good estimator for the ML
(variances, weights) = kmeans_machine.get_variances_and_weights_for_each_cluster(features)
means = kmeans_machine.means
# initialize the UBM with the output of kmeans
ubm.means = means
ubm.variances = variances
ubm.weights = weights
# Creating the ML Trainer. We will adapt only the means
trainer = bob.learn.em.ML_GMMTrainer(
update_means=True, update_variances=False, update_weights=False)
bob.learn.em.train(trainer, ubm, features)
return ubm
def isv_train(features, ubm):
"""
Train U matrix
**Parameters**
features: List of :py:class:`bob.learn.em.GMMStats` organized by class
n_gaussians: UBM (:py:class:`bob.learn.em.GMMMachine`)
"""
stats = []
for user in features:
user_stats = []
for f in user:
s = bob.learn.em.GMMStats(ubm.shape[0], ubm.shape[1])
ubm.acc_statistics(f, s)
user_stats.append(s)
stats.append(user_stats)
relevance_factor = 4
subspace_dimension_of_u = 1
isvbase = bob.learn.em.ISVBase(ubm, subspace_dimension_of_u)
trainer = bob.learn.em.ISVTrainer(relevance_factor)
# trainer.rng = bob.core.random.mt19937(int(self.init_seed))
bob.learn.em.train(trainer, isvbase, stats, max_iterations=50)
return isvbase
# GENERATING DATA
data_per_class = bob.db.iris.data()
setosa = numpy.column_stack(
(data_per_class['setosa'][:, 0], data_per_class['setosa'][:, 3]))
versicolor = numpy.column_stack(
(data_per_class['versicolor'][:, 0], data_per_class['versicolor'][:, 3]))
virginica = numpy.column_stack(
(data_per_class['virginica'][:, 0], data_per_class['virginica'][:, 3]))
data = numpy.vstack((setosa, versicolor, virginica))
# TRAINING THE PRIOR
ubm = train_ubm(data, 3)
isvbase = isv_train([setosa, versicolor, virginica], ubm)
# Variability direction
u0 = isvbase.u[0:2, 0] / numpy.linalg.norm(isvbase.u[0:2, 0])
u1 = isvbase.u[2:4, 0] / numpy.linalg.norm(isvbase.u[2:4, 0])
u2 = isvbase.u[4:6, 0] / numpy.linalg.norm(isvbase.u[4:6, 0])
figure, ax = plt.subplots()
plt.scatter(setosa[:, 0], setosa[:, 1], c="darkcyan", label="setosa")
plt.scatter(versicolor[:, 0], versicolor[:, 1],
c="goldenrod", label="versicolor")
plt.scatter(virginica[:, 0], virginica[:, 1], c="dimgrey", label="virginica")
plt.scatter(ubm.means[:, 0], ubm.means[:, 1], c="blue",
marker="x", label="centroids - mle")
# plt.scatter(ubm.means[:, 0], ubm.means[:, 1], c="blue",
# marker=".", label="within class varibility", s=0.01)
ax.arrow(ubm.means[0, 0], ubm.means[0, 1], u0[0], u0[1],
fc="k", ec="k", head_width=0.05, head_length=0.1)
ax.arrow(ubm.means[1, 0], ubm.means[1, 1], u1[0], u1[1],
fc="k", ec="k", head_width=0.05, head_length=0.1)
ax.arrow(ubm.means[2, 0], ubm.means[2, 1], u2[0], u2[1],
fc="k", ec="k", head_width=0.05, head_length=0.1)
plt.text(ubm.means[0, 0] + u0[0], ubm.means[0, 1] +
u0[1] - 0.1, r'$\mathbf{U}_1$', fontsize=15)
plt.text(ubm.means[1, 0] + u1[0], ubm.means[1, 1] +
u1[1] - 0.1, r'$\mathbf{U}_2$', fontsize=15)
plt.text(ubm.means[2, 0] + u2[0], ubm.means[2, 1] +
u2[1] - 0.1, r'$\mathbf{U}_3$', fontsize=15)
plt.xticks([], [])
plt.yticks([], [])
# plt.grid(True)
plt.xlabel('Sepal length')
plt.ylabel('Petal width')
plt.legend()
plt.tight_layout()
plt.show()
import bob.db.iris
import bob.learn.em
import bob.learn.linear
import matplotlib.pyplot as plt
import numpy
numpy.random.seed(2) # FIXING A SEED
def train_ubm(features, n_gaussians):
"""
Train UBM
**Parameters**
features: 2D numpy array with the features
n_gaussians: Number of Gaussians
"""
input_size = features.shape[1]
kmeans_machine = bob.learn.em.KMeansMachine(int(n_gaussians), input_size)
ubm = bob.learn.em.GMMMachine(int(n_gaussians), input_size)
# The K-means clustering is firstly used to used to estimate the initial
# means, the final variances and the final weights for each gaussian
# component
kmeans_trainer = bob.learn.em.KMeansTrainer('RANDOM_NO_DUPLICATE')
bob.learn.em.train(kmeans_trainer, kmeans_machine, features)
# Getting the means, weights and the variances for each cluster. This is a
# very good estimator for the ML
(variances, weights) = kmeans_machine.get_variances_and_weights_for_each_cluster(features)
means = kmeans_machine.means
# initialize the UBM with the output of kmeans
ubm.means = means
ubm.variances = variances
ubm.weights = weights
# Creating the ML Trainer. We will adapt only the means
trainer = bob.learn.em.ML_GMMTrainer(
update_means=True, update_variances=False, update_weights=False)
bob.learn.em.train(trainer, ubm, features)
return ubm
def jfa_train(features, ubm):
"""
Trains U and V matrix
**Parameters**
features: List of :py:class:`bob.learn.em.GMMStats` organized by class