Commit e307cd16 authored by Tiago Pereira's avatar Tiago Pereira Committed by Amir MOHAMMADI
Browse files

Changed the MAP example

Improved the documentation

Fixed bug introduced in the train

[sphinx] Fixed  parameters

Fixed another issue with the tests
parent 80ceaa89
......@@ -184,10 +184,10 @@ def test_trainer_execption():
machine = KMeansMachine(2, 2)
data = numpy.array([[1.0, 2.0], [2, 3.], [1, 1.], [2, 5.], [numpy.inf, 1.0]])
trainer = KMeansTrainer()
assert_raises(ValueError, bob.learn.em.train, trainer, machine, data, 0.001)
assert_raises(ValueError, bob.learn.em.train, trainer, machine, data, 10)
# Testing Nan
machine = KMeansMachine(2, 2)
data = numpy.array([[1.0, 2.0], [2, 3.], [1, numpy.nan], [2, 5.], [2.0, 1.0]])
trainer = KMeansTrainer()
assert_raises(ValueError, bob.learn.em.train, trainer, machine, data, 0.001)
assert_raises(ValueError, bob.learn.em.train, trainer, machine, data, 10)
......@@ -31,10 +31,12 @@ def train(trainer, machine, data, max_iterations=50, convergence_threshold=None,
If True, runs the initialization procedure
rng : :py:class:`bob.core.random.mt19937`
The Mersenne Twister mt19937 random generator used for the initialization of subspaces/arrays before the EM loop
check_inputs: Shallow checks in the inputs. Check for inf and NaN
check_inputs:
Shallow checks in the inputs. Check for inf and NaN
"""
if check_inputs:
if check_inputs and type(data) is numpy.ndarray:
if numpy.isinf(numpy.sum(data)):
raise ValueError("Please, check your inputs; numpy.inf detected in `data` ")
......
......@@ -709,7 +709,7 @@ Z-Norm
======
.. _znorm:
Given a score :math:`s_i`, Z-Norm (zero-normalisation) scales this value by the
Given a score :math:`s_i`, Z-Norm [Auckenthaler2000]_ and [Mariethoz2005]_ (zero-normalisation) scales this value by the
mean (:math:`\mu`) and standard deviation (:math:`\sigma`) of an impostor score
distribution. This score distribution can be computed before hand and it is
defined as the following.
......@@ -734,7 +734,7 @@ T-Norm
======
.. _tnorm:
T-norm (Test-normalization) operates in a probe-centric manner. If in the
T-norm [Auckenthaler2000]_ and [Mariethoz2005]_ (Test-normalization) operates in a probe-centric manner. If in the
Z-Norm :math:`\mu` and :math:`\sigma` are estimated using an impostor set of
models and its scores, the t-norm computes these statistics using the current
probe sample against at set of models in a co-hort :math:`\Theta_{c}`. A co-
......@@ -771,7 +771,7 @@ ZT-Norm
=======
.. _ztnorm:
ZT-Norm consists in the application of :ref:`Z-Norm <znorm>` followed by a
ZT-Norm [Auckenthaler2000]_ and [Mariethoz2005]_ consists in the application of :ref:`Z-Norm <znorm>` followed by a
:ref:`T-Norm <tnorm>` and it is implemented in :py:func:`bob.learn.em.ztnorm`.
Follow bellow an example of score normalization using
......
......@@ -44,6 +44,9 @@ References
.. [WikiEM] `Expectation Maximization <http://en.wikipedia.org/wiki/Expectation%E2%80%93maximization_algorithm>`_
.. [Glembek2009] Glembek, Ondrej, et al. "Comparison of scoring methods used in speaker recognition with joint factor analysis." Acoustics, Speech and Signal Processing, 2009. ICASSP 2009. IEEE International Conference on. IEEE, 2009.
.. [Auckenthaler2000] Auckenthaler, Roland, Michael Carey, and Harvey Lloyd-Thomas. "Score normalization for text-independent speaker verification systems." Digital Signal Processing 10.1 (2000): 42-54.
.. [Mariethoz2005] Mariethoz, Johnny, and Samy Bengio. "A unified framework for score normalization techniques applied to text-independent speaker verification." IEEE signal processing letters 12.7 (2005): 532-535.
Indices and tables
------------------
......
......@@ -7,6 +7,15 @@ numpy.random.seed(2) # FIXING A SEED
def train_ubm(features, n_gaussians):
"""
Train UBM
**Parameters**
features: 2D numpy array with the features
n_gaussians: Number of Gaussians
"""
input_size = features.shape[1]
kmeans_machine = bob.learn.em.KMeansMachine(int(n_gaussians), input_size)
......@@ -38,8 +47,13 @@ def train_ubm(features, n_gaussians):
def isv_train(features, ubm):
"""
Features com lista de listas [ [data_point_1_user_1,data_point_2_user_1],
[data_point_1_user_2,data_point_2_user_2] ]
Train U matrix
**Parameters**
features: List of :py:class:`bob.learn.em.GMMStats` organized by class
n_gaussians: UBM (:py:class:`bob.learn.em.GMMMachine`)
"""
stats = []
......
......@@ -7,6 +7,16 @@ numpy.random.seed(2) # FIXING A SEED
def train_ubm(features, n_gaussians):
"""
Train UBM
**Parameters**
features: 2D numpy array with the features
n_gaussians: Number of Gaussians
"""
input_size = features.shape[1]
kmeans_machine = bob.learn.em.KMeansMachine(int(n_gaussians), input_size)
......@@ -38,8 +48,14 @@ def train_ubm(features, n_gaussians):
def jfa_train(features, ubm):
"""
Features com lista de listas [ [data_point_1_user_1,data_point_2_user_1], [data_point_1_user_2,data_point_2_user_2] ]
"""
Trains U and V matrix
**Parameters**
features: List of :py:class:`bob.learn.em.GMMStats` organized by class
n_gaussians: UBM (:py:class:`bob.learn.em.GMMMachine`)
"""
stats = []
for user in features:
......
......@@ -16,22 +16,17 @@ data = numpy.vstack((setosa, versicolor, virginica))
# Two clusters with a feature dimensionality of 3
mle_machine = bob.learn.em.GMMMachine(3, 2)
mle_trainer = bob.learn.em.ML_GMMTrainer(True, True, True)
mle_machine.means = numpy.array([[5, 3], [4, 2], [7, 3.]])
bob.learn.em.train(mle_trainer, mle_machine, data, max_iterations=200,
convergence_threshold=1e-5) # Train the KMeansMachine
# Creating some random data centered in
new_data = numpy.random.normal(2, 0.8, (50, 2))
# Two clusters with a feature dimensionality of 3
map_machine = bob.learn.em.GMMMachine(3, 2)
map_trainer = bob.learn.em.MAP_GMMTrainer(mle_machine, relevance_factor=4)
bob.learn.em.train(map_trainer, map_machine, new_data, max_iterations=200,
bob.learn.em.train(map_trainer, map_machine, data, max_iterations=200,
convergence_threshold=1e-5) # Train the KMeansMachine
figure, ax = plt.subplots()
plt.scatter(new_data[:, 0], new_data[:, 1], c="olivedrab", label="new data")
#plt.scatter(data[:, 0], data[:, 1], c="olivedrab", label="new data")
plt.scatter(setosa[:, 0], setosa[:, 1], c="darkcyan", label="setosa")
plt.scatter(versicolor[:, 0], versicolor[:, 1],
c="goldenrod", label="versicolor")
......
......@@ -7,6 +7,16 @@ numpy.random.seed(2) # FIXING A SEED
def train_ubm(features, n_gaussians):
"""
Train UBM
**Parameters**
features: 2D numpy array with the features
n_gaussians: Number of Gaussians
"""
input_size = features.shape[1]
kmeans_machine = bob.learn.em.KMeansMachine(int(n_gaussians), input_size)
......@@ -38,9 +48,14 @@ def train_ubm(features, n_gaussians):
def ivector_train(features, ubm):
"""
Features com lista de listas [ [data_point_1_user_1,data_point_2_user_1],
[data_point_1_user_2,data_point_2_user_2] ]
"""
Trains T matrix
**Parameters**
features: List of :py:class:`bob.learn.em.GMMStats`
n_gaussians: UBM (:py:class:`bob.learn.em.GMMMachine`)
"""
stats = []
for user in features:
......@@ -72,6 +87,9 @@ def acc_stats(data, gmm):
def compute_ivectors(gmm_stats, ivector_machine):
"""
Given :py:class:`bob.learn.em.GMMStats` and an T matrix, get the iVectors.
"""
ivectors = []
for g in gmm_stats:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment