Changed the MAP example

Improved the documentation Fixed bug introduced in the train [sphinx] Fixed parameters Fixed another issue with the tests

Changed the MAP example
e307cd16 · Tiago Pereira · Amir MOHAMMADI · 80ceaa89 · e307cd16 · e307cd16
Commit e307cd16 authored 8 years ago by Tiago Pereira Committed by Amir MOHAMMADI 8 years ago
--- a/bob/learn/em/test/test_kmeans_trainer.py
+++ b/bob/learn/em/test/test_kmeans_trainer.py
@@ -184,10 +184,10 @@ def test_trainer_execption():
    machine = KMeansMachine(2, 2)
    data = numpy.array([[1.0, 2.0], [2, 3.], [1, 1.], [2, 5.], [numpy.inf, 1.0]])
    trainer = KMeansTrainer()
-    assert_raises(ValueError, bob.learn.em.train, trainer, machine, data, 0.001)
+    assert_raises(ValueError, bob.learn.em.train, trainer, machine, data, 10)

    # Testing Nan
    machine = KMeansMachine(2, 2)
    data = numpy.array([[1.0, 2.0], [2, 3.], [1, numpy.nan], [2, 5.], [2.0, 1.0]])
    trainer = KMeansTrainer()
-    assert_raises(ValueError, bob.learn.em.train, trainer, machine, data, 0.001)
+    assert_raises(ValueError, bob.learn.em.train, trainer, machine, data, 10)
--- a/bob/learn/em/train.py
+++ b/bob/learn/em/train.py
@@ -31,10 +31,12 @@ def train(trainer, machine, data, max_iterations=50, convergence_threshold=None,
        If True, runs the initialization procedure
      rng :  :py:class:`bob.core.random.mt19937`
        The Mersenne Twister mt19937 random generator used for the initialization of subspaces/arrays before the EM loop
-      check_inputs: Shallow checks in the inputs. Check for inf and NaN  
+      check_inputs: 
+         Shallow checks in the inputs. Check for inf and NaN  
    """

-    if check_inputs:
+    if check_inputs and type(data) is numpy.ndarray:
+
        if numpy.isinf(numpy.sum(data)):
            raise ValueError("Please, check your inputs; numpy.inf detected in `data` ")


--- a/doc/guide.rst
+++ b/doc/guide.rst
@@ -709,7 +709,7 @@ Z-Norm
 ======
 .. _znorm:

-Given a score :math:`s_i`, Z-Norm (zero-normalisation) scales this value by the
+Given a score :math:`s_i`, Z-Norm [Auckenthaler2000]_ and [Mariethoz2005]_ (zero-normalisation) scales this value by the
 mean (:math:`\mu`) and standard deviation (:math:`\sigma`) of an impostor score
 distribution. This score distribution can be computed before hand and it is
 defined as the following.
@@ -734,7 +734,7 @@ T-Norm
 ======
 .. _tnorm:

-T-norm (Test-normalization) operates in a probe-centric manner. If in the
+T-norm [Auckenthaler2000]_ and [Mariethoz2005]_ (Test-normalization) operates in a probe-centric manner. If in the
 Z-Norm :math:`\mu` and :math:`\sigma` are estimated using an impostor set of
 models and its scores, the t-norm computes these statistics using the current
 probe sample against at set of models in a co-hort :math:`\Theta_{c}`. A co-
@@ -771,7 +771,7 @@ ZT-Norm
 =======
 .. _ztnorm:

-ZT-Norm consists in the application of :ref:`Z-Norm <znorm>` followed by a
+ZT-Norm [Auckenthaler2000]_ and [Mariethoz2005]_ consists in the application of :ref:`Z-Norm <znorm>` followed by a
 :ref:`T-Norm <tnorm>` and it is implemented in :py:func:`bob.learn.em.ztnorm`.

 Follow bellow an example of score normalization using

--- a/doc/index.rst
+++ b/doc/index.rst
@@ -44,6 +44,9 @@ References

 ..   [WikiEM] `Expectation Maximization <http://en.wikipedia.org/wiki/Expectation%E2%80%93maximization_algorithm>`_
 ..   [Glembek2009] Glembek, Ondrej, et al. "Comparison of scoring methods used in speaker recognition with joint factor analysis." Acoustics, Speech and Signal Processing, 2009. ICASSP 2009. IEEE International Conference on. IEEE, 2009.
+..   [Auckenthaler2000] Auckenthaler, Roland, Michael Carey, and Harvey Lloyd-Thomas. "Score normalization for text-independent speaker verification systems." Digital Signal Processing 10.1 (2000): 42-54.
+..   [Mariethoz2005] Mariethoz, Johnny, and Samy Bengio. "A unified framework for score normalization techniques applied to text-independent speaker verification." IEEE signal processing letters 12.7 (2005): 532-535.
+

 Indices and tables
 ------------------

--- a/doc/plot/plot_ISV.py
+++ b/doc/plot/plot_ISV.py
@@ -7,6 +7,15 @@ numpy.random.seed(2)  # FIXING A SEED


 def train_ubm(features, n_gaussians):
+    """
+    Train UBM
+     
+     **Parameters**
+       features: 2D numpy array with the features
+       
+       n_gaussians: Number of Gaussians
+       
+    """
    input_size = features.shape[1]

    kmeans_machine = bob.learn.em.KMeansMachine(int(n_gaussians), input_size)
@@ -38,8 +47,13 @@ def train_ubm(features, n_gaussians):

 def isv_train(features, ubm):
    """
-    Features com lista de listas [  [data_point_1_user_1,data_point_2_user_1],
-    [data_point_1_user_2,data_point_2_user_2]  ]
+    Train U matrix
+    
+    **Parameters**
+      features: List of :py:class:`bob.learn.em.GMMStats` organized by class
+   
+      n_gaussians: UBM (:py:class:`bob.learn.em.GMMMachine`)
+     
    """

    stats = []

--- a/doc/plot/plot_JFA.py
+++ b/doc/plot/plot_JFA.py
@@ -7,6 +7,16 @@ numpy.random.seed(2)  # FIXING A SEED


 def train_ubm(features, n_gaussians):
+    """
+    Train UBM
+
+     **Parameters**
+       features: 2D numpy array with the features
+
+       n_gaussians: Number of Gaussians
+
+    """
+
    input_size = features.shape[1]

    kmeans_machine = bob.learn.em.KMeansMachine(int(n_gaussians), input_size)
@@ -38,7 +48,13 @@ def train_ubm(features, n_gaussians):

 def jfa_train(features, ubm):
    """
-    Features com lista de listas [  [data_point_1_user_1,data_point_2_user_1], [data_point_1_user_2,data_point_2_user_2]  ]
+     Trains U and V matrix
+
+     **Parameters**
+       features: List of :py:class:`bob.learn.em.GMMStats` organized by class
+
+       n_gaussians: UBM (:py:class:`bob.learn.em.GMMMachine`)
+
     """

    stats = []

--- a/doc/plot/plot_MAP.py
+++ b/doc/plot/plot_MAP.py
@@ -16,22 +16,17 @@ data = numpy.vstack((setosa, versicolor, virginica))

 # Two clusters with a feature dimensionality of 3
 mle_machine = bob.learn.em.GMMMachine(3, 2)
-mle_trainer = bob.learn.em.ML_GMMTrainer(True, True, True)
 mle_machine.means = numpy.array([[5, 3], [4, 2], [7, 3.]])
-bob.learn.em.train(mle_trainer, mle_machine, data, max_iterations=200,
-                   convergence_threshold=1e-5)  # Train the KMeansMachine

 # Creating some random data centered in
-new_data = numpy.random.normal(2, 0.8, (50, 2))
-# Two clusters with a feature dimensionality of 3
 map_machine = bob.learn.em.GMMMachine(3, 2)
 map_trainer = bob.learn.em.MAP_GMMTrainer(mle_machine, relevance_factor=4)
-bob.learn.em.train(map_trainer, map_machine, new_data, max_iterations=200,
+bob.learn.em.train(map_trainer, map_machine, data, max_iterations=200,
                   convergence_threshold=1e-5)  # Train the KMeansMachine


 figure, ax = plt.subplots()
-plt.scatter(new_data[:, 0], new_data[:, 1], c="olivedrab", label="new data")
+#plt.scatter(data[:, 0], data[:, 1], c="olivedrab", label="new data")
 plt.scatter(setosa[:, 0], setosa[:, 1], c="darkcyan", label="setosa")
 plt.scatter(versicolor[:, 0], versicolor[:, 1],
            c="goldenrod", label="versicolor")

--- a/doc/plot/plot_iVector.py
+++ b/doc/plot/plot_iVector.py
@@ -7,6 +7,16 @@ numpy.random.seed(2)  # FIXING A SEED


 def train_ubm(features, n_gaussians):
+    """
+    Train UBM
+
+     **Parameters**
+       features: 2D numpy array with the features
+
+       n_gaussians: Number of Gaussians
+
+    """
+
    input_size = features.shape[1]

    kmeans_machine = bob.learn.em.KMeansMachine(int(n_gaussians), input_size)
@@ -38,8 +48,13 @@ def train_ubm(features, n_gaussians):

 def ivector_train(features, ubm):
    """
-    Features com lista de listas [  [data_point_1_user_1,data_point_2_user_1],
-    [data_point_1_user_2,data_point_2_user_2]  ]
+     Trains T matrix
+
+     **Parameters**
+       features: List of :py:class:`bob.learn.em.GMMStats`
+
+       n_gaussians: UBM (:py:class:`bob.learn.em.GMMMachine`)
+
     """

    stats = []
@@ -72,6 +87,9 @@ def acc_stats(data, gmm):


 def compute_ivectors(gmm_stats, ivector_machine):
+    """
+    Given :py:class:`bob.learn.em.GMMStats` and an T matrix, get the iVectors.
+    """

    ivectors = []
    for g in gmm_stats: