From 8a408beb5261fe342749d6773625b66e8209d5bd Mon Sep 17 00:00:00 2001
From: Tiago Freitas Pereira <tiagofrepereira@gmail.com>
Date: Mon, 13 Jul 2015 18:01:15 +0200
Subject: [PATCH] Solved issue #2. Now it is possible to compute the likelihood
 using 2D arrays

---
 bob/learn/em/cpp/GMMMachine.cpp               | 19 +++++++++++++++
 bob/learn/em/gmm_machine.cpp                  | 24 ++++++++++++-------
 .../em/include/bob.learn.em/GMMMachine.h      |  9 +++++++
 bob/learn/em/test/test_gmm.py                 | 23 ++++++++++++++++++
 4 files changed, 67 insertions(+), 8 deletions(-)

diff --git a/bob/learn/em/cpp/GMMMachine.cpp b/bob/learn/em/cpp/GMMMachine.cpp
index a3cbe44..d2512b7 100644
--- a/bob/learn/em/cpp/GMMMachine.cpp
+++ b/bob/learn/em/cpp/GMMMachine.cpp
@@ -233,6 +233,7 @@ double bob::learn::em::GMMMachine::logLikelihood(const blitz::Array<double, 1> &
   return logLikelihood_(x,log_weighted_gaussian_likelihoods);
 }
 
+
 double bob::learn::em::GMMMachine::logLikelihood_(const blitz::Array<double, 1> &x,
   blitz::Array<double,1> &log_weighted_gaussian_likelihoods) const
 {
@@ -250,6 +251,22 @@ double bob::learn::em::GMMMachine::logLikelihood_(const blitz::Array<double, 1>
   return log_likelihood;
 }
 
+
+double bob::learn::em::GMMMachine::logLikelihood(const blitz::Array<double, 2> &x) const {
+  // Check dimension
+  bob::core::array::assertSameDimensionLength(x.extent(1), m_n_inputs);
+  // Call the other logLikelihood_ (overloaded) function
+
+
+  double sum_ll = 0;
+  for (int i=0; i<x.extent(0); i++)
+    sum_ll+= logLikelihood_(x(i,blitz::Range::all()));
+
+  return sum_ll/x.extent(0);  
+}
+
+
+
 double bob::learn::em::GMMMachine::logLikelihood(const blitz::Array<double, 1> &x) const {
   // Check dimension
   bob::core::array::assertSameDimensionLength(x.extent(0), m_n_inputs);
@@ -258,6 +275,8 @@ double bob::learn::em::GMMMachine::logLikelihood(const blitz::Array<double, 1> &
   return logLikelihood_(x,m_cache_log_weighted_gaussian_likelihoods);
 }
 
+
+
 double bob::learn::em::GMMMachine::logLikelihood_(const blitz::Array<double, 1> &x) const {
   // Call the other logLikelihood (overloaded) function
   // (log_weighted_gaussian_likelihoods will be discarded)
diff --git a/bob/learn/em/gmm_machine.cpp b/bob/learn/em/gmm_machine.cpp
index 4057bc5..86c965c 100644
--- a/bob/learn/em/gmm_machine.cpp
+++ b/bob/learn/em/gmm_machine.cpp
@@ -643,7 +643,8 @@ static PyObject* PyBobLearnEMGMMMachine_resize(PyBobLearnEMGMMMachineObject* sel
 static auto log_likelihood = bob::extension::FunctionDoc(
   "log_likelihood",
   "Output the log likelihood of the sample, x, i.e. :math:`log(p(x|GMM))`. Inputs are checked.",
-  ".. note:: The :py:meth:`__call__` function is an alias for this.",
+  ".. note:: The :py:meth:`__call__` function is an alias for this. \n "
+  "If `input` is 2D the average along the samples will be computed (:math:`\\frac{log(p(x|GMM))}{N}`) ",
   true
 )
 .add_prototype("input","output")
@@ -667,21 +668,28 @@ static PyObject* PyBobLearnEMGMMMachine_loglikelihood(PyBobLearnEMGMMMachineObje
     return 0;
   }
 
-  if (input->ndim != 1){
-    PyErr_Format(PyExc_TypeError, "`%s' only processes 1D arrays of float64", Py_TYPE(self)->tp_name);
+  if (input->ndim > 2){
+    PyErr_Format(PyExc_TypeError, "`%s' only processes 1D or 2D arrays of float64", Py_TYPE(self)->tp_name);
     log_likelihood.print_usage();
     return 0;
   }
-
-  if (input->shape[0] != (Py_ssize_t)self->cxx->getNInputs()){
+  
+  int shape_index = input->ndim - 1; //Getting the index of the dimensionality (0 for 1D arrays, 1 for 2D arrays)
+  
+  if (input->shape[shape_index] != (Py_ssize_t)self->cxx->getNInputs()){
     PyErr_Format(PyExc_TypeError, "`%s' 1D `input` array should have %" PY_FORMAT_SIZE_T "d elements, not %" PY_FORMAT_SIZE_T "d", Py_TYPE(self)->tp_name, self->cxx->getNInputs(), input->shape[0]);
     log_likelihood.print_usage();
     return 0;
   }
-
-  double value = self->cxx->logLikelihood(*PyBlitzArrayCxx_AsBlitz<double,1>(input));
+  
+  double value = 0;
+  if (input->ndim == 1)
+    value = self->cxx->logLikelihood(*PyBlitzArrayCxx_AsBlitz<double,1>(input));
+  else
+    value = self->cxx->logLikelihood(*PyBlitzArrayCxx_AsBlitz<double,2>(input));
+  
+  
   return Py_BuildValue("d", value);
-
   BOB_CATCH_MEMBER("cannot compute the likelihood", 0)
 }
 
diff --git a/bob/learn/em/include/bob.learn.em/GMMMachine.h b/bob/learn/em/include/bob.learn.em/GMMMachine.h
index c3b1246..0a46f82 100644
--- a/bob/learn/em/include/bob.learn.em/GMMMachine.h
+++ b/bob/learn/em/include/bob.learn.em/GMMMachine.h
@@ -231,6 +231,15 @@ class GMMMachine
      */
     double logLikelihood(const blitz::Array<double, 1> &x) const;
 
+
+    /**
+     * Output the averaged log likelihood of a set of samples, x, i.e. log(p(x|GMM))
+     * @param[in]  x The sample
+     * Dimension of the input is checked
+     */
+    double logLikelihood(const blitz::Array<double, 2> &x) const;
+
+
     /**
      * Output the log likelihood of the sample, x, i.e. log(p(x|GMM))
      * @param[in]  x The sample
diff --git a/bob/learn/em/test/test_gmm.py b/bob/learn/em/test/test_gmm.py
index cc678c1..1733c66 100644
--- a/bob/learn/em/test/test_gmm.py
+++ b/bob/learn/em/test/test_gmm.py
@@ -238,3 +238,26 @@ def test_GMMMachine_3():
   # implementation
   matlab_ll_ref = -2.361583051672024e+02
   assert abs(gmm(data) - matlab_ll_ref) < 1e-10
+  
+  
+def test_GMMMachine_4():
+
+  import numpy
+  numpy.random.seed(3) # FIXING A SEED
+
+  data = numpy.random.rand(100,50) #Doesn't matter if it is ramdom. The average of 1D array (in python) MUST output the same result for the 2D array (in C++)
+  
+  gmm = GMMMachine(2, 50)
+  gmm.weights   = bob.io.base.load(datafile('weights.hdf5', __name__, path="../data/"))
+  gmm.means     = bob.io.base.load(datafile('means.hdf5', __name__, path="../data/"))
+  gmm.variances = bob.io.base.load(datafile('variances.hdf5', __name__, path="../data/"))
+
+
+  ll = 0
+  for i in range(data.shape[0]):
+    ll += gmm(data[i,:])
+  ll /= data.shape[0]
+  
+  assert ll==gmm(data)
+  
+  
-- 
GitLab