From 674ceb3fa4776679c14bd399d73eda17b382b45c Mon Sep 17 00:00:00 2001 From: Tiago Freitas Pereira <tiagofrepereira@gmail.com> Date: Wed, 15 Jul 2015 12:08:35 +0200 Subject: [PATCH] A couple of fixes on bob.learn.em.EMPCATrainer: - Issue #9 - Bug in the bindings - Fixed documentation --- bob/learn/em/cpp/EMPCATrainer.cpp | 6 ++- bob/learn/em/empca_trainer.cpp | 81 ++++++++++++++++++------------- bob/learn/em/test/test_em.py | 43 ++++++++++++++++ doc/index.rst | 4 ++ 4 files changed, 97 insertions(+), 37 deletions(-) diff --git a/bob/learn/em/cpp/EMPCATrainer.cpp b/bob/learn/em/cpp/EMPCATrainer.cpp index c27a010..1217be2 100644 --- a/bob/learn/em/cpp/EMPCATrainer.cpp +++ b/bob/learn/em/cpp/EMPCATrainer.cpp @@ -16,6 +16,7 @@ #include <bob.math/linear.h> #include <bob.math/det.h> #include <bob.math/inv.h> +#include <bob.math/pinv.h> #include <bob.math/stats.h> bob::learn::em::EMPCATrainer::EMPCATrainer(bool compute_likelihood): @@ -172,6 +173,7 @@ void bob::learn::em::EMPCATrainer::initMembers( m_tmp_dxf.resize(n_outputs, n_features); m_tmp_d.resize(n_outputs); m_tmp_f.resize(n_features); + m_tmp_dxd_1.resize(n_outputs, n_outputs); m_tmp_dxd_2.resize(n_outputs, n_outputs); m_tmp_fxd_1.resize(n_features, n_outputs); @@ -240,8 +242,8 @@ void bob::learn::em::EMPCATrainer::computeInvM() { // Compute inverse(M), where M = W^T * W + sigma2 * Id bob::math::eye(m_tmp_dxd_1); // m_tmp_dxd_1 = Id - m_tmp_dxd_1 *= m_sigma2; // m_tmp_dxd_1 = sigma2 * Id - m_tmp_dxd_1 += m_inW; // m_tmp_dxd_1 = M = W^T * W + sigma2 * Id + m_tmp_dxd_1 *= m_sigma2; // m_tmp_dxd_1 = sigma2 * Id + m_tmp_dxd_1 += m_inW; // m_tmp_dxd_1 = M = W^T * W + sigma2 * Id bob::math::inv(m_tmp_dxd_1, m_invM); // m_invM = inv(M) } diff --git a/bob/learn/em/empca_trainer.cpp b/bob/learn/em/empca_trainer.cpp index 0b42bca..dae075f 100644 --- a/bob/learn/em/empca_trainer.cpp +++ b/bob/learn/em/empca_trainer.cpp @@ -15,21 +15,28 @@ static auto EMPCATrainer_doc = bob::extension::ClassDoc( BOB_EXT_MODULE_PREFIX ".EMPCATrainer", - "" + "Trains a :py:class:`bob.learn.linear.Machine` using an Expectation-Maximization algorithm on the given dataset [Bishop1999]_ [Roweis1998]_ \n\n" + "Notations used are the ones from [Bishop1999]_\n\n" + "The probabilistic model is given by: :math:`t = Wx + \\mu + \\epsilon`\n\n" + " - :math:`t` is the observed data (dimension :math:`f`)\n" + " - :math:`W` is a projection matrix (dimension :math:`f \\times d`)\n" + " - :math:`x` is the projected data (dimension :math:`d < f`)\n" + " - :math:`\\mu` is the mean of the data (dimension :math:`f`)\n" + " - :math:`\\epsilon` is the noise of the data (dimension :math:`f`) \n" + " - Gaussian with zero-mean and covariance matrix :math:`\\sigma^2 Id`" ).add_constructor( bob::extension::FunctionDoc( "__init__", + "Creates a EMPCATrainer", "", true ) - .add_prototype("convergence_threshold","") .add_prototype("other","") .add_prototype("","") .add_parameter("other", ":py:class:`bob.learn.em.EMPCATrainer`", "A EMPCATrainer object to be copied.") - .add_parameter("convergence_threshold", "float", "") ); @@ -47,22 +54,6 @@ static int PyBobLearnEMEMPCATrainer_init_copy(PyBobLearnEMEMPCATrainerObject* se return 0; } -static int PyBobLearnEMEMPCATrainer_init_number(PyBobLearnEMEMPCATrainerObject* self, PyObject* args, PyObject* kwargs) { - - char** kwlist = EMPCATrainer_doc.kwlist(0); - double convergence_threshold = 0.0001; - //Parsing the input argments - if (!PyArg_ParseTupleAndKeywords(args, kwargs, "d", kwlist, &convergence_threshold)) - return -1; - - if(convergence_threshold < 0){ - PyErr_Format(PyExc_TypeError, "convergence_threshold argument must be greater than to zero"); - return -1; - } - - self->cxx.reset(new bob::learn::em::EMPCATrainer(convergence_threshold)); - return 0; -} static int PyBobLearnEMEMPCATrainer_init(PyBobLearnEMEMPCATrainerObject* self, PyObject* args, PyObject* kwargs) { BOB_TRY @@ -76,21 +67,7 @@ static int PyBobLearnEMEMPCATrainer_init(PyBobLearnEMEMPCATrainerObject* self, P return 0; } case 1:{ - //Reading the input argument - PyObject* arg = 0; - if (PyTuple_Size(args)) - arg = PyTuple_GET_ITEM(args, 0); - else { - PyObject* tmp = PyDict_Values(kwargs); - auto tmp_ = make_safe(tmp); - arg = PyList_GET_ITEM(tmp, 0); - } - - // If the constructor input is EMPCATrainer object - if (PyBobLearnEMEMPCATrainer_Check(arg)) - return PyBobLearnEMEMPCATrainer_init_copy(self, args, kwargs); - else if(PyString_Check(arg)) - return PyBobLearnEMEMPCATrainer_init_number(self, args, kwargs); + return PyBobLearnEMEMPCATrainer_init_copy(self, args, kwargs); } default:{ PyErr_Format(PyExc_RuntimeError, "number of arguments mismatch - %s requires 0 or 1 arguments, but you provided %d (see help)", Py_TYPE(self)->tp_name, nargs); @@ -139,7 +116,41 @@ static PyObject* PyBobLearnEMEMPCATrainer_RichCompare(PyBobLearnEMEMPCATrainerOb /************ Variables Section ***********************************/ /******************************************************************/ + +/***** sigma_2 *****/ +static auto sigma2 = bob::extension::VariableDoc( + "sigma2", + "float", + "The noise sigma2 of the probabilistic model", + "" +); +PyObject* PyBobLearnEMEMPCATrainer_getSigma2(PyBobLearnEMEMPCATrainerObject* self, void*){ + BOB_TRY + return Py_BuildValue("d",self->cxx->getSigma2()); + BOB_CATCH_MEMBER("sigma2 could not be read", 0) +} +int PyBobLearnEMEMPCATrainer_setSigma2(PyBobLearnEMEMPCATrainerObject* self, PyObject* value, void*){ + BOB_TRY + + if(!PyBob_NumberCheck(value)){ + PyErr_Format(PyExc_RuntimeError, "%s %s expects a float", Py_TYPE(self)->tp_name, sigma2.name()); + return -1; + } + + self->cxx->setSigma2(PyFloat_AS_DOUBLE(value)); + return 0; + BOB_CATCH_MEMBER("sigma2 could not be set", -1) +} + + static PyGetSetDef PyBobLearnEMEMPCATrainer_getseters[] = { + { + sigma2.name(), + (getter)PyBobLearnEMEMPCATrainer_getSigma2, + (setter)PyBobLearnEMEMPCATrainer_setSigma2, + sigma2.doc(), + 0 + }, {0} // Sentinel }; @@ -257,7 +268,7 @@ static auto compute_likelihood = bob::extension::FunctionDoc( 0, true ) -.add_prototype("linear_machine,data") +.add_prototype("linear_machine") .add_parameter("linear_machine", ":py:class:`bob.learn.linear.Machine`", "LinearMachine Object"); static PyObject* PyBobLearnEMEMPCATrainer_compute_likelihood(PyBobLearnEMEMPCATrainerObject* self, PyObject* args, PyObject* kwargs) { BOB_TRY diff --git a/bob/learn/em/test/test_em.py b/bob/learn/em/test/test_em.py index 97f1a2b..9acd5ce 100644 --- a/bob/learn/em/test/test_em.py +++ b/bob/learn/em/test/test_em.py @@ -253,3 +253,46 @@ def test_custom_trainer(): for i in range(0, 2): assert (ar[i+1] == machine.means[i, :]).all() + + + +def test_EMPCA(): + + # Tests our Probabilistic PCA trainer for linear machines for a simple + # problem: + ar=numpy.array([ + [1, 2, 3], + [2, 4, 19], + [3, 6, 5], + [4, 8, 13], + ], dtype='float64') + + # Expected llh 1 and 2 (Reference values) + exp_llh1 = -32.8443 + exp_llh2 = -30.8559 + + # Do two iterations of EM to check the training procedure + T = bob.learn.em.EMPCATrainer() + m = bob.learn.linear.Machine(3,2) + # Initialization of the trainer + T.initialize(m, ar) + # Sets ('random') initialization values for test purposes + w_init = numpy.array([1.62945, 0.270954, 1.81158, 1.67002, 0.253974, + 1.93774], 'float64').reshape(3,2) + sigma2_init = 1.82675 + m.weights = w_init + T.sigma2 = sigma2_init + # Checks that the log likehood matches the reference one + # This should be sufficient to check everything as it requires to use + # the new value of W and sigma2 + # This does an E-Step, M-Step, computes the likelihood, and compares it to + # the reference value obtained using matlab + T.e_step(m, ar) + T.m_step(m, ar) + llh1 = T.compute_likelihood(m) + assert abs(exp_llh1 - llh1) < 2e-4 + T.e_step(m, ar) + T.m_step(m, ar) + llh2 = T.compute_likelihood(m) + assert abs(exp_llh2 - llh2) < 2e-4 + diff --git a/doc/index.rst b/doc/index.rst index a4b6ef0..d4b7062 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -42,6 +42,10 @@ References .. [ElShafey2014] *Laurent El Shafey, Chris McCool, Roy Wallace, Sebastien Marcel*. **'A Scalable Formulation of Probabilistic Linear Discriminant Analysis: Applied to Face Recognition'**, TPAMI'2014 .. [PrinceElder2007] *Prince and Elder*. **'Probabilistic Linear Discriminant Analysis for Inference About Identity'**, ICCV'2007 .. [LiFu2012] *Li, Fu, Mohammed, Elder and Prince*. **'Probabilistic Models for Inference about Identity'**, TPAMI'2012 + +.. [Bishop1999] Tipping, Michael E., and Christopher M. Bishop. "Probabilistic principal component analysis." Journal of the Royal Statistical Society: Series B (Statistical Methodology) 61.3 (1999): 611-622. +.. [Roweis1998] Roweis, Sam. "EM algorithms for PCA and SPCA." Advances in neural information processing systems (1998): 626-632. + .. [WikiEM] `Expectation Maximization <http://en.wikipedia.org/wiki/Expectation%E2%80%93maximization_algorithm>`_ -- GitLab