Skip to content
Snippets Groups Projects
Commit e307cd16 authored by Tiago Pereira's avatar Tiago Pereira Committed by Amir MOHAMMADI
Browse files

Changed the MAP example

Improved the documentation

Fixed bug introduced in the train

[sphinx] Fixed  parameters

Fixed another issue with the tests
parent 80ceaa89
No related branches found
No related tags found
1 merge request!24Re-write the user guide
......@@ -184,10 +184,10 @@ def test_trainer_execption():
machine = KMeansMachine(2, 2)
data = numpy.array([[1.0, 2.0], [2, 3.], [1, 1.], [2, 5.], [numpy.inf, 1.0]])
trainer = KMeansTrainer()
assert_raises(ValueError, bob.learn.em.train, trainer, machine, data, 0.001)
assert_raises(ValueError, bob.learn.em.train, trainer, machine, data, 10)
# Testing Nan
machine = KMeansMachine(2, 2)
data = numpy.array([[1.0, 2.0], [2, 3.], [1, numpy.nan], [2, 5.], [2.0, 1.0]])
trainer = KMeansTrainer()
assert_raises(ValueError, bob.learn.em.train, trainer, machine, data, 0.001)
assert_raises(ValueError, bob.learn.em.train, trainer, machine, data, 10)
......@@ -31,10 +31,12 @@ def train(trainer, machine, data, max_iterations=50, convergence_threshold=None,
If True, runs the initialization procedure
rng : :py:class:`bob.core.random.mt19937`
The Mersenne Twister mt19937 random generator used for the initialization of subspaces/arrays before the EM loop
check_inputs: Shallow checks in the inputs. Check for inf and NaN
check_inputs:
Shallow checks in the inputs. Check for inf and NaN
"""
if check_inputs:
if check_inputs and type(data) is numpy.ndarray:
if numpy.isinf(numpy.sum(data)):
raise ValueError("Please, check your inputs; numpy.inf detected in `data` ")
......
......@@ -709,7 +709,7 @@ Z-Norm
======
.. _znorm:
Given a score :math:`s_i`, Z-Norm (zero-normalisation) scales this value by the
Given a score :math:`s_i`, Z-Norm [Auckenthaler2000]_ and [Mariethoz2005]_ (zero-normalisation) scales this value by the
mean (:math:`\mu`) and standard deviation (:math:`\sigma`) of an impostor score
distribution. This score distribution can be computed before hand and it is
defined as the following.
......@@ -734,7 +734,7 @@ T-Norm
======
.. _tnorm:
T-norm (Test-normalization) operates in a probe-centric manner. If in the
T-norm [Auckenthaler2000]_ and [Mariethoz2005]_ (Test-normalization) operates in a probe-centric manner. If in the
Z-Norm :math:`\mu` and :math:`\sigma` are estimated using an impostor set of
models and its scores, the t-norm computes these statistics using the current
probe sample against at set of models in a co-hort :math:`\Theta_{c}`. A co-
......@@ -771,7 +771,7 @@ ZT-Norm
=======
.. _ztnorm:
ZT-Norm consists in the application of :ref:`Z-Norm <znorm>` followed by a
ZT-Norm [Auckenthaler2000]_ and [Mariethoz2005]_ consists in the application of :ref:`Z-Norm <znorm>` followed by a
:ref:`T-Norm <tnorm>` and it is implemented in :py:func:`bob.learn.em.ztnorm`.
Follow bellow an example of score normalization using
......
......@@ -44,6 +44,9 @@ References
.. [WikiEM] `Expectation Maximization <http://en.wikipedia.org/wiki/Expectation%E2%80%93maximization_algorithm>`_
.. [Glembek2009] Glembek, Ondrej, et al. "Comparison of scoring methods used in speaker recognition with joint factor analysis." Acoustics, Speech and Signal Processing, 2009. ICASSP 2009. IEEE International Conference on. IEEE, 2009.
.. [Auckenthaler2000] Auckenthaler, Roland, Michael Carey, and Harvey Lloyd-Thomas. "Score normalization for text-independent speaker verification systems." Digital Signal Processing 10.1 (2000): 42-54.
.. [Mariethoz2005] Mariethoz, Johnny, and Samy Bengio. "A unified framework for score normalization techniques applied to text-independent speaker verification." IEEE signal processing letters 12.7 (2005): 532-535.
Indices and tables
------------------
......
......@@ -7,6 +7,15 @@ numpy.random.seed(2) # FIXING A SEED
def train_ubm(features, n_gaussians):
"""
Train UBM
**Parameters**
features: 2D numpy array with the features
n_gaussians: Number of Gaussians
"""
input_size = features.shape[1]
kmeans_machine = bob.learn.em.KMeansMachine(int(n_gaussians), input_size)
......@@ -38,8 +47,13 @@ def train_ubm(features, n_gaussians):
def isv_train(features, ubm):
"""
Features com lista de listas [ [data_point_1_user_1,data_point_2_user_1],
[data_point_1_user_2,data_point_2_user_2] ]
Train U matrix
**Parameters**
features: List of :py:class:`bob.learn.em.GMMStats` organized by class
n_gaussians: UBM (:py:class:`bob.learn.em.GMMMachine`)
"""
stats = []
......
......@@ -7,6 +7,16 @@ numpy.random.seed(2) # FIXING A SEED
def train_ubm(features, n_gaussians):
"""
Train UBM
**Parameters**
features: 2D numpy array with the features
n_gaussians: Number of Gaussians
"""
input_size = features.shape[1]
kmeans_machine = bob.learn.em.KMeansMachine(int(n_gaussians), input_size)
......@@ -38,7 +48,13 @@ def train_ubm(features, n_gaussians):
def jfa_train(features, ubm):
"""
Features com lista de listas [ [data_point_1_user_1,data_point_2_user_1], [data_point_1_user_2,data_point_2_user_2] ]
Trains U and V matrix
**Parameters**
features: List of :py:class:`bob.learn.em.GMMStats` organized by class
n_gaussians: UBM (:py:class:`bob.learn.em.GMMMachine`)
"""
stats = []
......
......@@ -16,22 +16,17 @@ data = numpy.vstack((setosa, versicolor, virginica))
# Two clusters with a feature dimensionality of 3
mle_machine = bob.learn.em.GMMMachine(3, 2)
mle_trainer = bob.learn.em.ML_GMMTrainer(True, True, True)
mle_machine.means = numpy.array([[5, 3], [4, 2], [7, 3.]])
bob.learn.em.train(mle_trainer, mle_machine, data, max_iterations=200,
convergence_threshold=1e-5) # Train the KMeansMachine
# Creating some random data centered in
new_data = numpy.random.normal(2, 0.8, (50, 2))
# Two clusters with a feature dimensionality of 3
map_machine = bob.learn.em.GMMMachine(3, 2)
map_trainer = bob.learn.em.MAP_GMMTrainer(mle_machine, relevance_factor=4)
bob.learn.em.train(map_trainer, map_machine, new_data, max_iterations=200,
bob.learn.em.train(map_trainer, map_machine, data, max_iterations=200,
convergence_threshold=1e-5) # Train the KMeansMachine
figure, ax = plt.subplots()
plt.scatter(new_data[:, 0], new_data[:, 1], c="olivedrab", label="new data")
#plt.scatter(data[:, 0], data[:, 1], c="olivedrab", label="new data")
plt.scatter(setosa[:, 0], setosa[:, 1], c="darkcyan", label="setosa")
plt.scatter(versicolor[:, 0], versicolor[:, 1],
c="goldenrod", label="versicolor")
......
......@@ -7,6 +7,16 @@ numpy.random.seed(2) # FIXING A SEED
def train_ubm(features, n_gaussians):
"""
Train UBM
**Parameters**
features: 2D numpy array with the features
n_gaussians: Number of Gaussians
"""
input_size = features.shape[1]
kmeans_machine = bob.learn.em.KMeansMachine(int(n_gaussians), input_size)
......@@ -38,8 +48,13 @@ def train_ubm(features, n_gaussians):
def ivector_train(features, ubm):
"""
Features com lista de listas [ [data_point_1_user_1,data_point_2_user_1],
[data_point_1_user_2,data_point_2_user_2] ]
Trains T matrix
**Parameters**
features: List of :py:class:`bob.learn.em.GMMStats`
n_gaussians: UBM (:py:class:`bob.learn.em.GMMMachine`)
"""
stats = []
......@@ -72,6 +87,9 @@ def acc_stats(data, gmm):
def compute_ivectors(gmm_stats, ivector_machine):
"""
Given :py:class:`bob.learn.em.GMMStats` and an T matrix, get the iVectors.
"""
ivectors = []
for g in gmm_stats:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment