From 674ceb3fa4776679c14bd399d73eda17b382b45c Mon Sep 17 00:00:00 2001
From: Tiago Freitas Pereira <tiagofrepereira@gmail.com>
Date: Wed, 15 Jul 2015 12:08:35 +0200
Subject: [PATCH] A couple of fixes on bob.learn.em.EMPCATrainer:   - Issue #9 
  - Bug in the bindings   - Fixed documentation

---
 bob/learn/em/cpp/EMPCATrainer.cpp |  6 ++-
 bob/learn/em/empca_trainer.cpp    | 81 ++++++++++++++++++-------------
 bob/learn/em/test/test_em.py      | 43 ++++++++++++++++
 doc/index.rst                     |  4 ++
 4 files changed, 97 insertions(+), 37 deletions(-)

diff --git a/bob/learn/em/cpp/EMPCATrainer.cpp b/bob/learn/em/cpp/EMPCATrainer.cpp
index c27a010..1217be2 100644
--- a/bob/learn/em/cpp/EMPCATrainer.cpp
+++ b/bob/learn/em/cpp/EMPCATrainer.cpp
@@ -16,6 +16,7 @@
 #include <bob.math/linear.h>
 #include <bob.math/det.h>
 #include <bob.math/inv.h>
+#include <bob.math/pinv.h>
 #include <bob.math/stats.h>
 
 bob::learn::em::EMPCATrainer::EMPCATrainer(bool compute_likelihood):
@@ -172,6 +173,7 @@ void bob::learn::em::EMPCATrainer::initMembers(
   m_tmp_dxf.resize(n_outputs, n_features);
   m_tmp_d.resize(n_outputs);
   m_tmp_f.resize(n_features);
+  
   m_tmp_dxd_1.resize(n_outputs, n_outputs);
   m_tmp_dxd_2.resize(n_outputs, n_outputs);
   m_tmp_fxd_1.resize(n_features, n_outputs);
@@ -240,8 +242,8 @@ void bob::learn::em::EMPCATrainer::computeInvM()
 {
   // Compute inverse(M), where M = W^T * W + sigma2 * Id
   bob::math::eye(m_tmp_dxd_1); // m_tmp_dxd_1 = Id
-  m_tmp_dxd_1 *= m_sigma2; // m_tmp_dxd_1 = sigma2 * Id
-  m_tmp_dxd_1 += m_inW; // m_tmp_dxd_1 = M = W^T * W + sigma2 * Id
+  m_tmp_dxd_1 *= m_sigma2; // m_tmp_dxd_1 = sigma2 * Id  
+  m_tmp_dxd_1 += m_inW; // m_tmp_dxd_1 = M = W^T * W + sigma2 * Id  
   bob::math::inv(m_tmp_dxd_1, m_invM); // m_invM = inv(M)
 }
 
diff --git a/bob/learn/em/empca_trainer.cpp b/bob/learn/em/empca_trainer.cpp
index 0b42bca..dae075f 100644
--- a/bob/learn/em/empca_trainer.cpp
+++ b/bob/learn/em/empca_trainer.cpp
@@ -15,21 +15,28 @@
 
 static auto EMPCATrainer_doc = bob::extension::ClassDoc(
   BOB_EXT_MODULE_PREFIX ".EMPCATrainer",
-  ""
+   "Trains a :py:class:`bob.learn.linear.Machine` using an Expectation-Maximization algorithm on the given dataset [Bishop1999]_ [Roweis1998]_ \n\n"
+    "Notations used are the ones from [Bishop1999]_\n\n"
+    "The probabilistic model is given by: :math:`t = Wx + \\mu + \\epsilon`\n\n"
+    " - :math:`t` is the observed data (dimension :math:`f`)\n"
+    " - :math:`W` is a  projection matrix (dimension :math:`f \\times d`)\n"
+    " - :math:`x` is the projected data (dimension :math:`d < f`)\n"
+    " - :math:`\\mu` is the mean of the data (dimension :math:`f`)\n"
+    " - :math:`\\epsilon` is the noise of the data (dimension :math:`f`) \n"
+    " - Gaussian with zero-mean and covariance matrix :math:`\\sigma^2 Id`"
 
 ).add_constructor(
   bob::extension::FunctionDoc(
     "__init__",
+
     "Creates a EMPCATrainer",
     "",
     true
   )
-  .add_prototype("convergence_threshold","")
   .add_prototype("other","")
   .add_prototype("","")
 
   .add_parameter("other", ":py:class:`bob.learn.em.EMPCATrainer`", "A EMPCATrainer object to be copied.")
-  .add_parameter("convergence_threshold", "float", "")
 
 );
 
@@ -47,22 +54,6 @@ static int PyBobLearnEMEMPCATrainer_init_copy(PyBobLearnEMEMPCATrainerObject* se
   return 0;
 }
 
-static int PyBobLearnEMEMPCATrainer_init_number(PyBobLearnEMEMPCATrainerObject* self, PyObject* args, PyObject* kwargs) {
-
-  char** kwlist = EMPCATrainer_doc.kwlist(0);
-  double convergence_threshold    = 0.0001;
-  //Parsing the input argments
-  if (!PyArg_ParseTupleAndKeywords(args, kwargs, "d", kwlist, &convergence_threshold))
-    return -1;
-
-  if(convergence_threshold < 0){
-    PyErr_Format(PyExc_TypeError, "convergence_threshold argument must be greater than to zero");
-    return -1;
-  }
-
-  self->cxx.reset(new bob::learn::em::EMPCATrainer(convergence_threshold));
-  return 0;
-}
 
 static int PyBobLearnEMEMPCATrainer_init(PyBobLearnEMEMPCATrainerObject* self, PyObject* args, PyObject* kwargs) {
   BOB_TRY
@@ -76,21 +67,7 @@ static int PyBobLearnEMEMPCATrainer_init(PyBobLearnEMEMPCATrainerObject* self, P
       return 0;
     }
     case 1:{
-      //Reading the input argument
-      PyObject* arg = 0;
-      if (PyTuple_Size(args))
-        arg = PyTuple_GET_ITEM(args, 0);
-      else {
-        PyObject* tmp = PyDict_Values(kwargs);
-        auto tmp_ = make_safe(tmp);
-        arg = PyList_GET_ITEM(tmp, 0);
-      }
-
-      // If the constructor input is EMPCATrainer object
-      if (PyBobLearnEMEMPCATrainer_Check(arg))
-        return PyBobLearnEMEMPCATrainer_init_copy(self, args, kwargs);
-      else if(PyString_Check(arg))
-        return PyBobLearnEMEMPCATrainer_init_number(self, args, kwargs);
+      return PyBobLearnEMEMPCATrainer_init_copy(self, args, kwargs);
     }
     default:{
       PyErr_Format(PyExc_RuntimeError, "number of arguments mismatch - %s requires 0 or 1 arguments, but you provided %d (see help)", Py_TYPE(self)->tp_name, nargs);
@@ -139,7 +116,41 @@ static PyObject* PyBobLearnEMEMPCATrainer_RichCompare(PyBobLearnEMEMPCATrainerOb
 /************ Variables Section ***********************************/
 /******************************************************************/
 
+
+/***** sigma_2 *****/
+static auto sigma2 = bob::extension::VariableDoc(
+  "sigma2",
+  "float",
+  "The noise sigma2 of the probabilistic model",
+  ""
+);
+PyObject* PyBobLearnEMEMPCATrainer_getSigma2(PyBobLearnEMEMPCATrainerObject* self, void*){
+  BOB_TRY
+  return Py_BuildValue("d",self->cxx->getSigma2());
+  BOB_CATCH_MEMBER("sigma2 could not be read", 0)
+}
+int PyBobLearnEMEMPCATrainer_setSigma2(PyBobLearnEMEMPCATrainerObject* self, PyObject* value, void*){
+  BOB_TRY
+
+  if(!PyBob_NumberCheck(value)){
+    PyErr_Format(PyExc_RuntimeError, "%s %s expects a float", Py_TYPE(self)->tp_name, sigma2.name());
+    return -1;
+  }
+
+  self->cxx->setSigma2(PyFloat_AS_DOUBLE(value));
+  return 0;
+  BOB_CATCH_MEMBER("sigma2 could not be set", -1)
+}
+
+
 static PyGetSetDef PyBobLearnEMEMPCATrainer_getseters[] = {
+  {
+    sigma2.name(),
+    (getter)PyBobLearnEMEMPCATrainer_getSigma2,
+    (setter)PyBobLearnEMEMPCATrainer_setSigma2,
+    sigma2.doc(),
+    0
+  },
   {0}  // Sentinel
 };
 
@@ -257,7 +268,7 @@ static auto compute_likelihood = bob::extension::FunctionDoc(
   0,
   true
 )
-.add_prototype("linear_machine,data")
+.add_prototype("linear_machine")
 .add_parameter("linear_machine", ":py:class:`bob.learn.linear.Machine`", "LinearMachine Object");
 static PyObject* PyBobLearnEMEMPCATrainer_compute_likelihood(PyBobLearnEMEMPCATrainerObject* self, PyObject* args, PyObject* kwargs) {
   BOB_TRY
diff --git a/bob/learn/em/test/test_em.py b/bob/learn/em/test/test_em.py
index 97f1a2b..9acd5ce 100644
--- a/bob/learn/em/test/test_em.py
+++ b/bob/learn/em/test/test_em.py
@@ -253,3 +253,46 @@ def test_custom_trainer():
 
   for i in range(0, 2):
     assert (ar[i+1] == machine.means[i, :]).all()
+    
+    
+    
+def test_EMPCA():
+
+  # Tests our Probabilistic PCA trainer for linear machines for a simple
+  # problem:
+  ar=numpy.array([
+    [1, 2, 3],
+    [2, 4, 19],
+    [3, 6, 5],
+    [4, 8, 13],
+    ], dtype='float64')
+
+  # Expected llh 1 and 2 (Reference values)
+  exp_llh1 =  -32.8443
+  exp_llh2 =  -30.8559
+
+  # Do two iterations of EM to check the training procedure
+  T = bob.learn.em.EMPCATrainer()
+  m = bob.learn.linear.Machine(3,2)
+  # Initialization of the trainer
+  T.initialize(m, ar)
+  # Sets ('random') initialization values for test purposes
+  w_init = numpy.array([1.62945, 0.270954, 1.81158, 1.67002, 0.253974,
+    1.93774], 'float64').reshape(3,2)
+  sigma2_init = 1.82675
+  m.weights = w_init
+  T.sigma2 = sigma2_init
+  # Checks that the log likehood matches the reference one
+  # This should be sufficient to check everything as it requires to use
+  # the new value of W and sigma2
+  # This does an E-Step, M-Step, computes the likelihood, and compares it to
+  # the reference value obtained using matlab
+  T.e_step(m, ar)
+  T.m_step(m, ar)
+  llh1 = T.compute_likelihood(m)
+  assert abs(exp_llh1 - llh1) < 2e-4
+  T.e_step(m, ar)
+  T.m_step(m, ar)
+  llh2 = T.compute_likelihood(m)
+  assert abs(exp_llh2 - llh2) < 2e-4    
+    
diff --git a/doc/index.rst b/doc/index.rst
index a4b6ef0..d4b7062 100644
--- a/doc/index.rst
+++ b/doc/index.rst
@@ -42,6 +42,10 @@ References
 ..   [ElShafey2014] *Laurent El Shafey, Chris McCool, Roy Wallace, Sebastien Marcel*. **'A Scalable Formulation of Probabilistic Linear Discriminant Analysis: Applied to Face Recognition'**, TPAMI'2014
 ..   [PrinceElder2007] *Prince and Elder*. **'Probabilistic Linear Discriminant Analysis for Inference About Identity'**, ICCV'2007
 ..   [LiFu2012] *Li, Fu, Mohammed, Elder and Prince*. **'Probabilistic Models for Inference about Identity'**,  TPAMI'2012
+
+..   [Bishop1999] Tipping, Michael E., and Christopher M. Bishop. "Probabilistic principal component analysis." Journal of the Royal Statistical Society: Series B (Statistical Methodology) 61.3 (1999): 611-622.
+..   [Roweis1998] Roweis, Sam. "EM algorithms for PCA and SPCA." Advances in neural information processing systems (1998): 626-632.
+
 ..   [WikiEM] `Expectation Maximization <http://en.wikipedia.org/wiki/Expectation%E2%80%93maximization_algorithm>`_
 
 
-- 
GitLab