diff --git a/bob/learn/em/cpp/GMMMachine.cpp b/bob/learn/em/cpp/GMMMachine.cpp index a3cbe444ab525157c196148d5bafdc311fd0830a..d2512b7085f09d9938fbed063a092251d974b5c9 100644 --- a/bob/learn/em/cpp/GMMMachine.cpp +++ b/bob/learn/em/cpp/GMMMachine.cpp @@ -233,6 +233,7 @@ double bob::learn::em::GMMMachine::logLikelihood(const blitz::Array<double, 1> & return logLikelihood_(x,log_weighted_gaussian_likelihoods); } + double bob::learn::em::GMMMachine::logLikelihood_(const blitz::Array<double, 1> &x, blitz::Array<double,1> &log_weighted_gaussian_likelihoods) const { @@ -250,6 +251,22 @@ double bob::learn::em::GMMMachine::logLikelihood_(const blitz::Array<double, 1> return log_likelihood; } + +double bob::learn::em::GMMMachine::logLikelihood(const blitz::Array<double, 2> &x) const { + // Check dimension + bob::core::array::assertSameDimensionLength(x.extent(1), m_n_inputs); + // Call the other logLikelihood_ (overloaded) function + + + double sum_ll = 0; + for (int i=0; i<x.extent(0); i++) + sum_ll+= logLikelihood_(x(i,blitz::Range::all())); + + return sum_ll/x.extent(0); +} + + + double bob::learn::em::GMMMachine::logLikelihood(const blitz::Array<double, 1> &x) const { // Check dimension bob::core::array::assertSameDimensionLength(x.extent(0), m_n_inputs); @@ -258,6 +275,8 @@ double bob::learn::em::GMMMachine::logLikelihood(const blitz::Array<double, 1> & return logLikelihood_(x,m_cache_log_weighted_gaussian_likelihoods); } + + double bob::learn::em::GMMMachine::logLikelihood_(const blitz::Array<double, 1> &x) const { // Call the other logLikelihood (overloaded) function // (log_weighted_gaussian_likelihoods will be discarded) diff --git a/bob/learn/em/gmm_machine.cpp b/bob/learn/em/gmm_machine.cpp index 4057bc58bc87426cc0c8551e65828ab1df730c88..86c965c25779685f232e40835cc3da789024ca7f 100644 --- a/bob/learn/em/gmm_machine.cpp +++ b/bob/learn/em/gmm_machine.cpp @@ -643,7 +643,8 @@ static PyObject* PyBobLearnEMGMMMachine_resize(PyBobLearnEMGMMMachineObject* sel static auto log_likelihood = bob::extension::FunctionDoc( "log_likelihood", "Output the log likelihood of the sample, x, i.e. :math:`log(p(x|GMM))`. Inputs are checked.", - ".. note:: The :py:meth:`__call__` function is an alias for this.", + ".. note:: The :py:meth:`__call__` function is an alias for this. \n " + "If `input` is 2D the average along the samples will be computed (:math:`\\frac{log(p(x|GMM))}{N}`) ", true ) .add_prototype("input","output") @@ -667,21 +668,28 @@ static PyObject* PyBobLearnEMGMMMachine_loglikelihood(PyBobLearnEMGMMMachineObje return 0; } - if (input->ndim != 1){ - PyErr_Format(PyExc_TypeError, "`%s' only processes 1D arrays of float64", Py_TYPE(self)->tp_name); + if (input->ndim > 2){ + PyErr_Format(PyExc_TypeError, "`%s' only processes 1D or 2D arrays of float64", Py_TYPE(self)->tp_name); log_likelihood.print_usage(); return 0; } - - if (input->shape[0] != (Py_ssize_t)self->cxx->getNInputs()){ + + int shape_index = input->ndim - 1; //Getting the index of the dimensionality (0 for 1D arrays, 1 for 2D arrays) + + if (input->shape[shape_index] != (Py_ssize_t)self->cxx->getNInputs()){ PyErr_Format(PyExc_TypeError, "`%s' 1D `input` array should have %" PY_FORMAT_SIZE_T "d elements, not %" PY_FORMAT_SIZE_T "d", Py_TYPE(self)->tp_name, self->cxx->getNInputs(), input->shape[0]); log_likelihood.print_usage(); return 0; } - - double value = self->cxx->logLikelihood(*PyBlitzArrayCxx_AsBlitz<double,1>(input)); + + double value = 0; + if (input->ndim == 1) + value = self->cxx->logLikelihood(*PyBlitzArrayCxx_AsBlitz<double,1>(input)); + else + value = self->cxx->logLikelihood(*PyBlitzArrayCxx_AsBlitz<double,2>(input)); + + return Py_BuildValue("d", value); - BOB_CATCH_MEMBER("cannot compute the likelihood", 0) } diff --git a/bob/learn/em/include/bob.learn.em/GMMMachine.h b/bob/learn/em/include/bob.learn.em/GMMMachine.h index c3b124621e67ac93ee3c3e85eb2615738b3c7413..0a46f822b22acce84d9faa3914729a1e48b7d115 100644 --- a/bob/learn/em/include/bob.learn.em/GMMMachine.h +++ b/bob/learn/em/include/bob.learn.em/GMMMachine.h @@ -231,6 +231,15 @@ class GMMMachine */ double logLikelihood(const blitz::Array<double, 1> &x) const; + + /** + * Output the averaged log likelihood of a set of samples, x, i.e. log(p(x|GMM)) + * @param[in] x The sample + * Dimension of the input is checked + */ + double logLikelihood(const blitz::Array<double, 2> &x) const; + + /** * Output the log likelihood of the sample, x, i.e. log(p(x|GMM)) * @param[in] x The sample diff --git a/bob/learn/em/test/test_gmm.py b/bob/learn/em/test/test_gmm.py index cc678c1d18ccc40f9e465215d4a2cbd0072123b7..1733c6692104c6ab40b6e4df7f4ee8367b9fa80d 100644 --- a/bob/learn/em/test/test_gmm.py +++ b/bob/learn/em/test/test_gmm.py @@ -238,3 +238,26 @@ def test_GMMMachine_3(): # implementation matlab_ll_ref = -2.361583051672024e+02 assert abs(gmm(data) - matlab_ll_ref) < 1e-10 + + +def test_GMMMachine_4(): + + import numpy + numpy.random.seed(3) # FIXING A SEED + + data = numpy.random.rand(100,50) #Doesn't matter if it is ramdom. The average of 1D array (in python) MUST output the same result for the 2D array (in C++) + + gmm = GMMMachine(2, 50) + gmm.weights = bob.io.base.load(datafile('weights.hdf5', __name__, path="../data/")) + gmm.means = bob.io.base.load(datafile('means.hdf5', __name__, path="../data/")) + gmm.variances = bob.io.base.load(datafile('variances.hdf5', __name__, path="../data/")) + + + ll = 0 + for i in range(data.shape[0]): + ll += gmm(data[i,:]) + ll /= data.shape[0] + + assert ll==gmm(data) + +