From e307cd1641fe6ef988972ccfb1845f15aa419790 Mon Sep 17 00:00:00 2001
From: Tiago Pereira <tiago.pereira@partner.samsung.com>
Date: Mon, 29 May 2017 14:33:45 -0700
Subject: [PATCH] Changed the MAP example

Improved the documentation

Fixed bug introduced in the train

[sphinx] Fixed  parameters

Fixed another issue with the tests
---
 bob/learn/em/test/test_kmeans_trainer.py |  4 ++--
 bob/learn/em/train.py                    |  6 ++++--
 doc/guide.rst                            |  6 +++---
 doc/index.rst                            |  3 +++
 doc/plot/plot_ISV.py                     | 18 ++++++++++++++++--
 doc/plot/plot_JFA.py                     | 20 ++++++++++++++++++--
 doc/plot/plot_MAP.py                     |  9 ++-------
 doc/plot/plot_iVector.py                 | 24 +++++++++++++++++++++---
 8 files changed, 69 insertions(+), 21 deletions(-)

diff --git a/bob/learn/em/test/test_kmeans_trainer.py b/bob/learn/em/test/test_kmeans_trainer.py
index 978ce72..2ebf1fd 100644
--- a/bob/learn/em/test/test_kmeans_trainer.py
+++ b/bob/learn/em/test/test_kmeans_trainer.py
@@ -184,10 +184,10 @@ def test_trainer_execption():
     machine = KMeansMachine(2, 2)
     data = numpy.array([[1.0, 2.0], [2, 3.], [1, 1.], [2, 5.], [numpy.inf, 1.0]])
     trainer = KMeansTrainer()
-    assert_raises(ValueError, bob.learn.em.train, trainer, machine, data, 0.001)
+    assert_raises(ValueError, bob.learn.em.train, trainer, machine, data, 10)
 
     # Testing Nan
     machine = KMeansMachine(2, 2)
     data = numpy.array([[1.0, 2.0], [2, 3.], [1, numpy.nan], [2, 5.], [2.0, 1.0]])
     trainer = KMeansTrainer()
-    assert_raises(ValueError, bob.learn.em.train, trainer, machine, data, 0.001)
+    assert_raises(ValueError, bob.learn.em.train, trainer, machine, data, 10)
diff --git a/bob/learn/em/train.py b/bob/learn/em/train.py
index a4263e7..6927015 100644
--- a/bob/learn/em/train.py
+++ b/bob/learn/em/train.py
@@ -31,10 +31,12 @@ def train(trainer, machine, data, max_iterations=50, convergence_threshold=None,
         If True, runs the initialization procedure
       rng :  :py:class:`bob.core.random.mt19937`
         The Mersenne Twister mt19937 random generator used for the initialization of subspaces/arrays before the EM loop
-      check_inputs: Shallow checks in the inputs. Check for inf and NaN  
+      check_inputs: 
+         Shallow checks in the inputs. Check for inf and NaN  
     """
 
-    if check_inputs:
+    if check_inputs and type(data) is numpy.ndarray:
+
         if numpy.isinf(numpy.sum(data)):
             raise ValueError("Please, check your inputs; numpy.inf detected in `data` ")
 
diff --git a/doc/guide.rst b/doc/guide.rst
index d85c8a5..f506b58 100644
--- a/doc/guide.rst
+++ b/doc/guide.rst
@@ -709,7 +709,7 @@ Z-Norm
 ======
 .. _znorm:
 
-Given a score :math:`s_i`, Z-Norm (zero-normalisation) scales this value by the
+Given a score :math:`s_i`, Z-Norm [Auckenthaler2000]_ and [Mariethoz2005]_ (zero-normalisation) scales this value by the
 mean (:math:`\mu`) and standard deviation (:math:`\sigma`) of an impostor score
 distribution. This score distribution can be computed before hand and it is
 defined as the following.
@@ -734,7 +734,7 @@ T-Norm
 ======
 .. _tnorm:
 
-T-norm (Test-normalization) operates in a probe-centric manner. If in the
+T-norm [Auckenthaler2000]_ and [Mariethoz2005]_ (Test-normalization) operates in a probe-centric manner. If in the
 Z-Norm :math:`\mu` and :math:`\sigma` are estimated using an impostor set of
 models and its scores, the t-norm computes these statistics using the current
 probe sample against at set of models in a co-hort :math:`\Theta_{c}`. A co-
@@ -771,7 +771,7 @@ ZT-Norm
 =======
 .. _ztnorm:
 
-ZT-Norm consists in the application of :ref:`Z-Norm <znorm>` followed by a
+ZT-Norm [Auckenthaler2000]_ and [Mariethoz2005]_ consists in the application of :ref:`Z-Norm <znorm>` followed by a
 :ref:`T-Norm <tnorm>` and it is implemented in :py:func:`bob.learn.em.ztnorm`.
 
 Follow bellow an example of score normalization using
diff --git a/doc/index.rst b/doc/index.rst
index db40d1c..eab4d4b 100644
--- a/doc/index.rst
+++ b/doc/index.rst
@@ -44,6 +44,9 @@ References
 
 ..   [WikiEM] `Expectation Maximization <http://en.wikipedia.org/wiki/Expectation%E2%80%93maximization_algorithm>`_
 ..   [Glembek2009] Glembek, Ondrej, et al. "Comparison of scoring methods used in speaker recognition with joint factor analysis." Acoustics, Speech and Signal Processing, 2009. ICASSP 2009. IEEE International Conference on. IEEE, 2009.
+..   [Auckenthaler2000] Auckenthaler, Roland, Michael Carey, and Harvey Lloyd-Thomas. "Score normalization for text-independent speaker verification systems." Digital Signal Processing 10.1 (2000): 42-54.
+..   [Mariethoz2005] Mariethoz, Johnny, and Samy Bengio. "A unified framework for score normalization techniques applied to text-independent speaker verification." IEEE signal processing letters 12.7 (2005): 532-535.
+
 
 Indices and tables
 ------------------
diff --git a/doc/plot/plot_ISV.py b/doc/plot/plot_ISV.py
index b72cdb1..855c601 100755
--- a/doc/plot/plot_ISV.py
+++ b/doc/plot/plot_ISV.py
@@ -7,6 +7,15 @@ numpy.random.seed(2)  # FIXING A SEED
 
 
 def train_ubm(features, n_gaussians):
+    """
+    Train UBM
+     
+     **Parameters**
+       features: 2D numpy array with the features
+       
+       n_gaussians: Number of Gaussians
+       
+    """
     input_size = features.shape[1]
 
     kmeans_machine = bob.learn.em.KMeansMachine(int(n_gaussians), input_size)
@@ -38,8 +47,13 @@ def train_ubm(features, n_gaussians):
 
 def isv_train(features, ubm):
     """
-    Features com lista de listas [  [data_point_1_user_1,data_point_2_user_1],
-    [data_point_1_user_2,data_point_2_user_2]  ]
+    Train U matrix
+    
+    **Parameters**
+      features: List of :py:class:`bob.learn.em.GMMStats` organized by class
+   
+      n_gaussians: UBM (:py:class:`bob.learn.em.GMMMachine`)
+     
     """
 
     stats = []
diff --git a/doc/plot/plot_JFA.py b/doc/plot/plot_JFA.py
index 77f4e2e..a2b05d5 100755
--- a/doc/plot/plot_JFA.py
+++ b/doc/plot/plot_JFA.py
@@ -7,6 +7,16 @@ numpy.random.seed(2)  # FIXING A SEED
 
 
 def train_ubm(features, n_gaussians):
+    """
+    Train UBM
+
+     **Parameters**
+       features: 2D numpy array with the features
+
+       n_gaussians: Number of Gaussians
+
+    """
+
     input_size = features.shape[1]
 
     kmeans_machine = bob.learn.em.KMeansMachine(int(n_gaussians), input_size)
@@ -38,8 +48,14 @@ def train_ubm(features, n_gaussians):
 
 def jfa_train(features, ubm):
     """
-    Features com lista de listas [  [data_point_1_user_1,data_point_2_user_1], [data_point_1_user_2,data_point_2_user_2]  ]
-    """
+     Trains U and V matrix
+
+     **Parameters**
+       features: List of :py:class:`bob.learn.em.GMMStats` organized by class
+
+       n_gaussians: UBM (:py:class:`bob.learn.em.GMMMachine`)
+
+     """
 
     stats = []
     for user in features:
diff --git a/doc/plot/plot_MAP.py b/doc/plot/plot_MAP.py
index 0c721ba..0690cf8 100644
--- a/doc/plot/plot_MAP.py
+++ b/doc/plot/plot_MAP.py
@@ -16,22 +16,17 @@ data = numpy.vstack((setosa, versicolor, virginica))
 
 # Two clusters with a feature dimensionality of 3
 mle_machine = bob.learn.em.GMMMachine(3, 2)
-mle_trainer = bob.learn.em.ML_GMMTrainer(True, True, True)
 mle_machine.means = numpy.array([[5, 3], [4, 2], [7, 3.]])
-bob.learn.em.train(mle_trainer, mle_machine, data, max_iterations=200,
-                   convergence_threshold=1e-5)  # Train the KMeansMachine
 
 # Creating some random data centered in
-new_data = numpy.random.normal(2, 0.8, (50, 2))
-# Two clusters with a feature dimensionality of 3
 map_machine = bob.learn.em.GMMMachine(3, 2)
 map_trainer = bob.learn.em.MAP_GMMTrainer(mle_machine, relevance_factor=4)
-bob.learn.em.train(map_trainer, map_machine, new_data, max_iterations=200,
+bob.learn.em.train(map_trainer, map_machine, data, max_iterations=200,
                    convergence_threshold=1e-5)  # Train the KMeansMachine
 
 
 figure, ax = plt.subplots()
-plt.scatter(new_data[:, 0], new_data[:, 1], c="olivedrab", label="new data")
+#plt.scatter(data[:, 0], data[:, 1], c="olivedrab", label="new data")
 plt.scatter(setosa[:, 0], setosa[:, 1], c="darkcyan", label="setosa")
 plt.scatter(versicolor[:, 0], versicolor[:, 1],
             c="goldenrod", label="versicolor")
diff --git a/doc/plot/plot_iVector.py b/doc/plot/plot_iVector.py
index 8f7d1e1..95953d3 100755
--- a/doc/plot/plot_iVector.py
+++ b/doc/plot/plot_iVector.py
@@ -7,6 +7,16 @@ numpy.random.seed(2)  # FIXING A SEED
 
 
 def train_ubm(features, n_gaussians):
+    """
+    Train UBM
+
+     **Parameters**
+       features: 2D numpy array with the features
+
+       n_gaussians: Number of Gaussians
+
+    """
+
     input_size = features.shape[1]
 
     kmeans_machine = bob.learn.em.KMeansMachine(int(n_gaussians), input_size)
@@ -38,9 +48,14 @@ def train_ubm(features, n_gaussians):
 
 def ivector_train(features, ubm):
     """
-    Features com lista de listas [  [data_point_1_user_1,data_point_2_user_1],
-    [data_point_1_user_2,data_point_2_user_2]  ]
-    """
+     Trains T matrix
+
+     **Parameters**
+       features: List of :py:class:`bob.learn.em.GMMStats`
+
+       n_gaussians: UBM (:py:class:`bob.learn.em.GMMMachine`)
+
+     """
 
     stats = []
     for user in features:
@@ -72,6 +87,9 @@ def acc_stats(data, gmm):
 
 
 def compute_ivectors(gmm_stats, ivector_machine):
+    """
+    Given :py:class:`bob.learn.em.GMMStats` and an T matrix, get the iVectors.
+    """
 
     ivectors = []
     for g in gmm_stats:
-- 
GitLab