From b235305bab20c6766364872ef9aed02b2eb787ee Mon Sep 17 00:00:00 2001
From: Manuel Guenther <manuel.guenther@idiap.ch>
Date: Thu, 5 Mar 2015 19:19:47 +0100
Subject: [PATCH] Updated MAP_GMMTrainer and removed the intermediate Python
 class.

---
 bob/learn/em/MAP_gmm_trainer.cpp    | 109 ++++++++++++++--------------
 bob/learn/em/__MAP_gmm_trainer__.py |  48 ------------
 bob/learn/em/__init__.py            |   2 -
 bob/learn/em/test/test_em.py        |  22 +++---
 4 files changed, 67 insertions(+), 114 deletions(-)
 delete mode 100644 bob/learn/em/__MAP_gmm_trainer__.py

diff --git a/bob/learn/em/MAP_gmm_trainer.cpp b/bob/learn/em/MAP_gmm_trainer.cpp
index 9868577..d62fb6f 100644
--- a/bob/learn/em/MAP_gmm_trainer.cpp
+++ b/bob/learn/em/MAP_gmm_trainer.cpp
@@ -17,28 +17,28 @@ static inline bool f(PyObject* o){return o != 0 && PyObject_IsTrue(o) > 0;}  /*
 
 static auto MAP_GMMTrainer_doc = bob::extension::ClassDoc(
   BOB_EXT_MODULE_PREFIX ".MAP_GMMTrainer",
-  "This class implements the maximum a posteriori M-step of the expectation-maximisation algorithm for a GMM Machine. The prior parameters are encoded in the form of a GMM (e.g. a universal background model). The EM algorithm thus performs GMM adaptation."
+  "This class implements the maximum a posteriori M-step of the expectation-maximization algorithm for a GMM Machine. The prior parameters are encoded in the form of a GMM (e.g. a universal background model). The EM algorithm thus performs GMM adaptation."
 ).add_constructor(
   bob::extension::FunctionDoc(
     "__init__",
     "Creates a MAP_GMMTrainer",
-    "",
+    "Additionally to the copy constructor, there are two different ways to call this constructor, one using the ``relevance_factor`` and one using the ``alpha``, both which have the same signature. "
+    "Hence, the only way to differentiate the two functions is by using keyword arguments.",
     true
   )
 
-  .add_prototype("prior_gmm,relevance_factor, update_means, [update_variances], [update_weights], [mean_var_update_responsibilities_threshold]","")
-  .add_prototype("prior_gmm,alpha, update_means, [update_variances], [update_weights], [mean_var_update_responsibilities_threshold]","")
+  .add_prototype("prior_gmm, relevance_factor, [update_means], [update_variances], [update_weights], [mean_var_update_responsibilities_threshold]","")
+  .add_prototype("prior_gmm, alpha, [update_means], [update_variances], [update_weights], [mean_var_update_responsibilities_threshold]","")
   .add_prototype("other","")
 
-  .add_parameter("prior_gmm", ":py:class:`bob.learn.em.GMMMachine`", "The prior GMM to be adapted (Universal Backgroud Model UBM).")
-  .add_parameter("reynolds_adaptation", "bool", "Will use the Reynolds adaptation procedure? See Eq (14) from [Reynolds2000]_")
-  .add_parameter("relevance_factor", "double", "If set the reynolds_adaptation parameters, will apply the Reynolds Adaptation procedure. See Eq (14) from [Reynolds2000]_")
-  .add_parameter("alpha", "double", "Set directly the alpha parameter (Eq (14) from [Reynolds2000]_), ignoring zeroth order statistics as a weighting factor.")
+  .add_parameter("prior_gmm", ":py:class:`bob.learn.em.GMMMachine`", "The prior GMM to be adapted (Universal Background Model UBM).")
+  .add_parameter("relevance_factor", "float", "If set the Reynolds Adaptation procedure will be  applied. See Eq (14) from [Reynolds2000]_")
+  .add_parameter("alpha", "float", "Set directly the alpha parameter (Eq (14) from [Reynolds2000]_), ignoring zeroth order statistics as a weighting factor.")
 
-  .add_parameter("update_means", "bool", "Update means on each iteration")
-  .add_parameter("update_variances", "bool", "Update variances on each iteration")
-  .add_parameter("update_weights", "bool", "Update weights on each iteration")
-  .add_parameter("mean_var_update_responsibilities_threshold", "float", "Threshold over the responsibilities of the Gaussians Equations 9.24, 9.25 of Bishop, `Pattern recognition and machine learning`, 2006 require a division by the responsibilities, which might be equal to zero because of numerical issue. This threshold is used to avoid such divisions.")
+  .add_parameter("update_means", "bool", "[Default: ``True``] Update means on each iteration")
+  .add_parameter("update_variances", "bool", "[Default: ``True``] Update variances on each iteration")
+  .add_parameter("update_weights", "bool", "[Default: ``True``] Update weights on each iteration")
+  .add_parameter("mean_var_update_responsibilities_threshold", "float", "[Default: min_float] Threshold over the responsibilities of the Gaussians Equations 9.24, 9.25 of Bishop, `Pattern recognition and machine learning`, 2006 require a division by the responsibilities, which might be equal to zero because of numerical issue. This threshold is used to avoid such divisions.")
 
   .add_parameter("other", ":py:class:`bob.learn.em.MAP_GMMTrainer`", "A MAP_GMMTrainer object to be copied.")
 );
@@ -62,7 +62,7 @@ static int PyBobLearnEMMAPGMMTrainer_init_base_trainer(PyBobLearnEMMAPGMMTrainer
 
   char** kwlist1 = MAP_GMMTrainer_doc.kwlist(0);
   char** kwlist2 = MAP_GMMTrainer_doc.kwlist(1);
-  
+
   PyBobLearnEMGMMMachineObject* gmm_machine;
   bool reynolds_adaptation   = false;
   double alpha = 0.5;
@@ -78,27 +78,31 @@ static int PyBobLearnEMMAPGMMTrainer_init_base_trainer(PyBobLearnEMMAPGMMTrainer
   PyObject* keyword_alpha            = Py_BuildValue("s", kwlist2[1]);
 
   auto keyword_relevance_factor_ = make_safe(keyword_relevance_factor);
-  auto keyword_alpha_            = make_safe(keyword_alpha);  
-  
-  //Here we have to select which keyword argument to read  
-  if (kwargs && PyDict_Contains(kwargs, keyword_relevance_factor) && (PyArg_ParseTupleAndKeywords(args, kwargs, "O!dO!|O!O!d", kwlist1, 
-                                                                      &PyBobLearnEMGMMMachine_Type, &gmm_machine,
-                                                                      &aux,
-                                                                      &PyBool_Type, &update_means, 
-                                                                      &PyBool_Type, &update_variances, 
-                                                                      &PyBool_Type, &update_weights, 
-                                                                      &mean_var_update_responsibilities_threshold)))
-    reynolds_adaptation = true;    
-  else if (kwargs && PyDict_Contains(kwargs, keyword_alpha) && (PyArg_ParseTupleAndKeywords(args, kwargs, "O!dO!|O!O!d", kwlist2, 
-                                                                 &PyBobLearnEMGMMMachine_Type, &gmm_machine,
-                                                                 &aux,
-                                                                 &PyBool_Type, &update_means, 
-                                                                 &PyBool_Type, &update_variances, 
-                                                                 &PyBool_Type, &update_weights, 
-                                                                 &mean_var_update_responsibilities_threshold)))
+  auto keyword_alpha_            = make_safe(keyword_alpha);
+
+  //Here we have to select which keyword argument to read
+  if (kwargs && PyDict_Contains(kwargs, keyword_relevance_factor)){
+    if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O!d|O!O!O!d", kwlist1,
+          &PyBobLearnEMGMMMachine_Type, &gmm_machine,
+          &aux,
+          &PyBool_Type, &update_means,
+          &PyBool_Type, &update_variances,
+          &PyBool_Type, &update_weights,
+          &mean_var_update_responsibilities_threshold))
+      return -1;
+    reynolds_adaptation = true;
+  } else if (kwargs && PyDict_Contains(kwargs, keyword_alpha)){
+    if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O!d|O!O!O!d", kwlist2,
+          &PyBobLearnEMGMMMachine_Type, &gmm_machine,
+          &aux,
+          &PyBool_Type, &update_means,
+          &PyBool_Type, &update_variances,
+          &PyBool_Type, &update_weights,
+          &mean_var_update_responsibilities_threshold))
+      return -1;
     reynolds_adaptation = false;
-  else{
-    PyErr_Format(PyExc_RuntimeError, "%s. The second argument must be a keyword argument.", Py_TYPE(self)->tp_name);
+  } else {
+    PyErr_Format(PyExc_RuntimeError, "%s. One of the two keyword arguments '%s' or '%s' must be present.", Py_TYPE(self)->tp_name, kwlist1[1], kwlist2[1]);
     MAP_GMMTrainer_doc.print_usage();
     return -1;
   }
@@ -107,11 +111,11 @@ static int PyBobLearnEMMAPGMMTrainer_init_base_trainer(PyBobLearnEMMAPGMMTrainer
     relevance_factor = aux;
   else
     alpha = aux;
-  
-  
-  self->cxx.reset(new bob::learn::em::MAP_GMMTrainer(f(update_means), f(update_variances), f(update_weights), 
-                                                       mean_var_update_responsibilities_threshold, 
-                                                       reynolds_adaptation,relevance_factor, alpha, gmm_machine->cxx));
+
+
+  self->cxx.reset(new bob::learn::em::MAP_GMMTrainer(f(update_means), f(update_variances), f(update_weights),
+                                                       mean_var_update_responsibilities_threshold,
+                                                       reynolds_adaptation, relevance_factor, alpha, gmm_machine->cxx));
   return 0;
 
 }
@@ -202,12 +206,12 @@ PyObject* PyBobLearnEMMAPGMMTrainer_getRelevanceFactor(PyBobLearnEMMAPGMMTrainer
 }
 int PyBobLearnEMMAPGMMTrainer_setRelevanceFactor(PyBobLearnEMMAPGMMTrainerObject* self, PyObject* value, void*){
   BOB_TRY
-  
+
   if(!PyBob_NumberCheck(value)){
     PyErr_Format(PyExc_RuntimeError, "%s %s expects a double", Py_TYPE(self)->tp_name, relevance_factor.name());
     return -1;
   }
-  
+
   self->cxx->setRelevanceFactor(PyFloat_AS_DOUBLE(value));
   return 0;
   BOB_CATCH_MEMBER("relevance_factor could not be set", 0)
@@ -228,12 +232,12 @@ PyObject* PyBobLearnEMMAPGMMTrainer_getAlpha(PyBobLearnEMMAPGMMTrainerObject* se
 }
 int PyBobLearnEMMAPGMMTrainer_setAlpha(PyBobLearnEMMAPGMMTrainerObject* self, PyObject* value, void*){
   BOB_TRY
-  
+
   if(!PyBob_NumberCheck(value)){
     PyErr_Format(PyExc_RuntimeError, "%s %s expects a double", Py_TYPE(self)->tp_name, alpha.name());
     return -1;
   }
-  
+
   self->cxx->setAlpha(PyFloat_AS_DOUBLE(value));
   return 0;
   BOB_CATCH_MEMBER("alpha could not be set", 0)
@@ -241,7 +245,7 @@ int PyBobLearnEMMAPGMMTrainer_setAlpha(PyBobLearnEMMAPGMMTrainerObject* self, Py
 
 
 
-static PyGetSetDef PyBobLearnEMMAPGMMTrainer_getseters[] = { 
+static PyGetSetDef PyBobLearnEMMAPGMMTrainer_getseters[] = {
   {
     alpha.name(),
     (getter)PyBobLearnEMMAPGMMTrainer_getAlpha,
@@ -282,14 +286,14 @@ static PyObject* PyBobLearnEMMAPGMMTrainer_initialize(PyBobLearnEMMAPGMMTrainerO
   char** kwlist = initialize.kwlist(0);
 
   PyBobLearnEMGMMMachineObject* gmm_machine = 0;
-  PyBlitzArrayObject* data                  = 0;  
+  PyBlitzArrayObject* data                  = 0;
 
   if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O!|O&", kwlist, &PyBobLearnEMGMMMachine_Type, &gmm_machine,
                                                                   &PyBlitzArray_Converter, &data)) return 0;
   if(data != NULL)
     auto data_ = make_safe(data);
   self->cxx->initialize(*gmm_machine->cxx);
-  
+
   BOB_CATCH_MEMBER("cannot perform the initialize method", 0)
 
   Py_RETURN_NONE;
@@ -324,16 +328,16 @@ static PyObject* PyBobLearnEMMAPGMMTrainer_eStep(PyBobLearnEMMAPGMMTrainerObject
   auto data_ = make_safe(data);
 
 
-  // perform check on the input  
+  // perform check on the input
   if (data->type_num != NPY_FLOAT64){
     PyErr_Format(PyExc_TypeError, "`%s' only supports 64-bit float arrays for input array `%s`", Py_TYPE(self)->tp_name, eStep.name());
     return 0;
-  }  
+  }
 
   if (data->ndim != 2){
     PyErr_Format(PyExc_TypeError, "`%s' only processes 2D arrays of float64 for `%s`", Py_TYPE(self)->tp_name, eStep.name());
     return 0;
-  }  
+  }
 
   if (data->shape[1] != (Py_ssize_t)gmm_machine->cxx->getNInputs() ) {
     PyErr_Format(PyExc_TypeError, "`%s' 2D `input` array should have the shape [N, %" PY_FORMAT_SIZE_T "d] not [N, %" PY_FORMAT_SIZE_T "d] for `%s`", Py_TYPE(self)->tp_name, gmm_machine->cxx->getNInputs(), data->shape[1], eStep.name());
@@ -353,8 +357,8 @@ static PyObject* PyBobLearnEMMAPGMMTrainer_eStep(PyBobLearnEMMAPGMMTrainerObject
 static auto mStep = bob::extension::FunctionDoc(
   "mStep",
 
-   "Performs a maximum a posteriori (MAP) update of the GMM:"  
-   "* parameters using the accumulated statistics in :py:class:`bob.learn.em.GMMBaseTrainer.m_ss` and the" 
+   "Performs a maximum a posteriori (MAP) update of the GMM:"
+   "* parameters using the accumulated statistics in :py:class:`bob.learn.em.GMMBaseTrainer.m_ss` and the"
    "* parameters of the prior model",
   "",
   true
@@ -369,7 +373,7 @@ static PyObject* PyBobLearnEMMAPGMMTrainer_mStep(PyBobLearnEMMAPGMMTrainerObject
   char** kwlist = mStep.kwlist(0);
 
   PyBobLearnEMGMMMachineObject* gmm_machine;
-  PyBlitzArrayObject* data                  = 0;  
+  PyBlitzArrayObject* data                  = 0;
 
   if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O!|O&", kwlist, &PyBobLearnEMGMMMachine_Type, &gmm_machine,
                                                                   &PyBlitzArray_Converter, &data)) return 0;
@@ -473,6 +477,5 @@ bool init_BobLearnEMMAPGMMTrainer(PyObject* module)
 
   // add the type to the module
   Py_INCREF(&PyBobLearnEMMAPGMMTrainer_Type);
-  return PyModule_AddObject(module, "_MAP_GMMTrainer", (PyObject*)&PyBobLearnEMMAPGMMTrainer_Type) >= 0;
+  return PyModule_AddObject(module, "MAP_GMMTrainer", (PyObject*)&PyBobLearnEMMAPGMMTrainer_Type) >= 0;
 }
-
diff --git a/bob/learn/em/__MAP_gmm_trainer__.py b/bob/learn/em/__MAP_gmm_trainer__.py
deleted file mode 100644
index 322d88f..0000000
--- a/bob/learn/em/__MAP_gmm_trainer__.py
+++ /dev/null
@@ -1,48 +0,0 @@
-#!/usr/bin/env python
-# vim: set fileencoding=utf-8 :
-# Tiago de Freitas Pereira <tiago.pereira@idiap.ch>
-# Mon Jan 23 18:31:10 2015
-#
-# Copyright (C) 2011-2015 Idiap Research Institute, Martigny, Switzerland
-
-from ._library import _MAP_GMMTrainer
-import numpy
-
-# define the class
-class MAP_GMMTrainer(_MAP_GMMTrainer):
-
-  def __init__(self, prior_gmm, update_means=True, update_variances=False, update_weights=False, **kwargs):
-    """
-    :py:class:`bob.learn.em.MAP_GMMTrainer` constructor
-
-    Keyword Parameters:
-      update_means
-
-      update_variances
-
-      update_weights
-
-      prior_gmm
-        A :py:class:`bob.learn.em.GMMMachine` to be adapted
-      convergence_threshold
-        Convergence threshold
-      max_iterations
-        Number of maximum iterations
-      converge_by_likelihood
-        Tells whether we compute log_likelihood as a convergence criteria, or not 
-      alpha
-        Set directly the alpha parameter (Eq (14) from [Reynolds2000]_), ignoring zeroth order statistics as a weighting factor.
-      relevance_factor
-        If set the :py:class:`bob.learn.em.MAP_GMMTrainer.reynolds_adaptation` parameters, will apply the Reynolds Adaptation procedure. See Eq (14) from [Reynolds2000]_  
-    """
-
-    if kwargs.get('alpha')!=None:
-      alpha = kwargs.get('alpha')
-      _MAP_GMMTrainer.__init__(self, prior_gmm,alpha=alpha, update_means=update_means, update_variances=update_variances,update_weights=update_weights)
-    else:
-      relevance_factor = kwargs.get('relevance_factor')
-      _MAP_GMMTrainer.__init__(self, prior_gmm, relevance_factor=relevance_factor, update_means=update_means, update_variances=update_variances,update_weights=update_weights)
-    
-
-# copy the documentation from the base class
-__doc__ = _MAP_GMMTrainer.__doc__
diff --git a/bob/learn/em/__init__.py b/bob/learn/em/__init__.py
index 445a35d..bf6f3fa 100644
--- a/bob/learn/em/__init__.py
+++ b/bob/learn/em/__init__.py
@@ -7,11 +7,9 @@ import bob.learn.linear
 import bob.extension
 bob.extension.load_bob_library('bob.learn.em', __file__)
 
-#from ._old_library import *
 from ._library import *
 from . import version
 from .version import module as __version__
-from .__MAP_gmm_trainer__ import *
 from .train import *
 
 def ztnorm_same_value(vect_a, vect_b):
diff --git a/bob/learn/em/test/test_em.py b/bob/learn/em/test/test_em.py
index bd7ffc4..0dc6281 100644
--- a/bob/learn/em/test/test_em.py
+++ b/bob/learn/em/test/test_em.py
@@ -15,7 +15,7 @@ from bob.io.base.test_utils import datafile
 
 from bob.learn.em import KMeansMachine, GMMMachine, KMeansTrainer, \
     ML_GMMTrainer, MAP_GMMTrainer
-    
+
 import bob.learn.em
 
 #, MAP_GMMTrainer
@@ -49,23 +49,23 @@ def test_gmm_ML_1():
 
   # Trains a GMMMachine with ML_GMMTrainer
 
-  ar = bob.io.base.load(datafile("faithful.torch3_f64.hdf5", __name__, path="../data/"))  
+  ar = bob.io.base.load(datafile("faithful.torch3_f64.hdf5", __name__, path="../data/"))
   gmm = loadGMM()
-  
+
   ml_gmmtrainer = ML_GMMTrainer(True, True, True)
   #ml_gmmtrainer.train(gmm, ar)
   bob.learn.em.train(ml_gmmtrainer, gmm, ar, convergence_threshold=0.001)
 
   #config = bob.io.base.HDF5File(datafile('gmm_ML.hdf5", __name__), 'w')
   #gmm.save(config)
-  
+
   gmm_ref = GMMMachine(bob.io.base.HDF5File(datafile('gmm_ML.hdf5', __name__, path="../data/")))
   gmm_ref_32bit_debug = GMMMachine(bob.io.base.HDF5File(datafile('gmm_ML_32bit_debug.hdf5', __name__, path="../data/")))
   gmm_ref_32bit_release = GMMMachine(bob.io.base.HDF5File(datafile('gmm_ML_32bit_release.hdf5', __name__, path="../data/")))
 
   assert (gmm == gmm_ref) or (gmm == gmm_ref_32bit_release) or (gmm == gmm_ref_32bit_release)
 
- 
+
 def test_gmm_ML_2():
 
   # Trains a GMMMachine with ML_GMMTrainer; compares to an old reference
@@ -86,11 +86,11 @@ def test_gmm_ML_2():
   max_iter_gmm = 25
   accuracy = 0.00001
   ml_gmmtrainer = ML_GMMTrainer(True, True, True, prior)
-  
+
   # Run ML
   #ml_gmmtrainer.train(gmm, ar)
   bob.learn.em.train(ml_gmmtrainer, gmm, ar, max_iterations = max_iter_gmm, convergence_threshold=accuracy)
-  
+
   # Test results
   # Load torch3vision reference
   meansML_ref = bob.io.base.load(datafile('meansAfterML.hdf5', __name__, path="../data/"))
@@ -114,8 +114,8 @@ def test_gmm_MAP_1():
   gmm = GMMMachine(bob.io.base.HDF5File(datafile("gmm_ML.hdf5", __name__, path="../data/")))
   gmmprior = GMMMachine(bob.io.base.HDF5File(datafile("gmm_ML.hdf5", __name__, path="../data/")))
 
-  map_gmmtrainer = MAP_GMMTrainer(update_means=True, update_variances=False, update_weights=False, prior_gmm=gmmprior, relevance_factor=4.)  
-  
+  map_gmmtrainer = MAP_GMMTrainer(update_means=True, update_variances=False, update_weights=False, prior_gmm=gmmprior, relevance_factor=4.)
+
   #map_gmmtrainer.train(gmm, ar)
   bob.learn.em.train(map_gmmtrainer, gmm, ar)
 
@@ -183,7 +183,7 @@ def test_gmm_MAP_3():
   max_iter_gmm = 1
   accuracy = 0.00001
   map_factor = 0.5
-  map_gmmtrainer = MAP_GMMTrainer(prior_gmm, alpha=map_factor, update_means=True, update_variances=False, update_weights=False, convergence_threshold=prior)
+  map_gmmtrainer = MAP_GMMTrainer(prior_gmm, alpha=map_factor, update_means=True, update_variances=False, update_weights=False, mean_var_update_responsibilities_threshold=accuracy)
   #map_gmmtrainer.max_iterations = max_iter_gmm
   #map_gmmtrainer.convergence_threshold = accuracy
 
@@ -192,7 +192,7 @@ def test_gmm_MAP_3():
 
   # Train
   #map_gmmtrainer.train(gmm, ar)
-  bob.learn.em.train(map_gmmtrainer, gmm, ar, max_iterations = max_iter_gmm, convergence_threshold=accuracy)
+  bob.learn.em.train(map_gmmtrainer, gmm, ar, max_iterations = max_iter_gmm, convergence_threshold=prior)
 
   # Test results
   # Load torch3vision reference
-- 
GitLab