From e1498574378f647527265c55fe4a10f7ebc30709 Mon Sep 17 00:00:00 2001 From: Andre Anjos <andre.dos.anjos@gmail.com> Date: Thu, 5 Dec 2013 17:32:55 +0100 Subject: [PATCH] Implemented most of scatter functionality --- setup.py | 1 + xbob/math/main.cpp | 148 +++++ xbob/math/scatter.cpp | 515 ++++++++++++++++++ xbob/math/scatter.h | 13 + xbob/math/stats.cpp | 302 ---------- .../test/{test_stats.py => test_scatter.py} | 35 +- 6 files changed, 695 insertions(+), 319 deletions(-) create mode 100644 xbob/math/scatter.cpp create mode 100644 xbob/math/scatter.h delete mode 100644 xbob/math/stats.cpp rename xbob/math/test/{test_stats.py => test_scatter.py} (79%) diff --git a/setup.py b/setup.py index 33d9b01..bc8af0e 100644 --- a/setup.py +++ b/setup.py @@ -46,6 +46,7 @@ setup( "xbob/math/linsolve.cpp", "xbob/math/pavx.cpp", "xbob/math/norminv.cpp", + "xbob/math/scatter.cpp", "xbob/math/main.cpp", ], packages = packages, diff --git a/xbob/math/main.cpp b/xbob/math/main.cpp index 04d3d00..1df4c45 100644 --- a/xbob/math/main.cpp +++ b/xbob/math/main.cpp @@ -17,6 +17,7 @@ #include "linsolve.h" #include "pavx.h" #include "norminv.h" +#include "scatter.h" PyDoc_STRVAR(s_histogram_intersection_str, "histogram_intersection"); PyDoc_STRVAR(s_histogram_intersection_doc, @@ -263,6 +264,129 @@ It is equivalent as calling :py:func:`norminv(p, 0, 1)`. The value\n\ Reference: `<http://home.online.no/~pjacklam/notes/invnorm/>`_\n\ "); +PyDoc_STRVAR(s_scatter_str, "scatter"); +PyDoc_STRVAR(s_scatter_doc, +"scatter(a) -> (array, array)\n\ +scatter(a, s) -> array\n\ +scatter(a, s, m) -> None\n\ +\n\ +Computes the scatter matrix of a 2D array *considering data is organized\n\ +row-wise* (each sample is a row, each feature is a column). The\n\ +resulting array ``s`` is squared with extents equal to the\n\ +number of columns in ``a``. The resulting array ``m`` is a 1D array\n\ +with the row means of ``a``. This method supports only 32 or 64-bit\n\ +float arrays as input.\n\ +\n\ +This function supports many calling modes, but you should provide, at\n\ +least, the input data matrix ``a``. All non-provided arguments will be\n\ +allocated internally and returned.\n\ +"); + +PyDoc_STRVAR(s_scatter_nocheck_str, "scatter_"); +PyDoc_STRVAR(s_scatter_nocheck_doc, +"scatter_(a, s, m) -> None\n\ +\n\ +Computes the scatter matrix of a 2D array *considering data is organized\n\ +row-wise* (each sample is a row, each feature is a column). The\n\ +resulting array ``s`` is squared with extents equal to the\n\ +number of columns in ``a``. The resulting array ``m`` is a 1D array\n\ +with the row means of ``a``. This method supports only 32 or 64-bit\n\ +float arrays as input.\n\ +\n\ +.. warning::\n\ +\n\ + THIS VARIANT DOES NOT PERFORM ANY CHECKS ON THE INPUT MATRICES AND IS,\n\ + FASTER THEN THE VARIANT NOT ENDING IN ``_``. Use it when you are sure\n\ + your input matrices sizes match.\n\ +"); + +PyDoc_STRVAR(s_scatters_str, "scatters"); +PyDoc_STRVAR(s_scatters_doc, +"scatters(data) -> (array, array, array)\n\ +scatters(data, sw, sb) -> array\n\ +scatters(data, sw, sb, m) -> None\n\ +\n\ +Computes the within-class (``sw``) and between-class (``sb``) scatter\n\ +matrices of a set of 2D arrays considering data is organized row-wise\n\ +(each sample is a row, each feature is a column).\n\ +\n\ +This function supports many calling modes, but you should provide, at\n\ +least, the input data matrices ``data``, which should be sequence of\n\ +2D 32-bit or 64-bit float values in which every row of each matrix\n\ +represents one observation for a given class. **Every matrix in\n\ +``data`` should have exactly the same number of columns.**\n\ +\n\ +The returned values ``sw`` and ``sb`` are square matrices with the\n\ +same number of rows and columns as the number of columns in ``data``.\n\ +The returned value ``m`` (last call variant) is a 1D array with the\n\ +same length as number of columns in each ``data`` matrix and represents\n\ +the ensemble mean with no prior (i.e., biased towards classes with more\n\ +samples.\n\ +\n\ +Strategy implemented:\n\ +\n\ +1. Evaluate the overall mean (``m``), class means (:math:`m_k`) and the\n\ + total class counts (:math:`N`).\n\ +2. Evaluate ``sw`` and ``sb`` using normal loops.\n\ +\n\ +Note that ``sw`` and ``sb``, in this implementation, will be normalized\n\ +by N-1 (number of samples) and K (number of classes). This procedure\n\ +makes the eigen values scaled by (N-1)/K, effectively increasing their\n\ +values. The main motivation for this normalization are numerical\n\ +precision concerns with the increasing number of samples causing a\n\ +rather large Sw matrix. A normalization strategy mitigates this\n\ +problem. The eigen vectors will see no effect on this normalization as\n\ +they are normalized in the euclidean sense (:math:`||a|| = 1`) so that\n\ +does not change those.\n\ +"); + +PyDoc_STRVAR(s_scatters_nocheck_str, "scatters_"); +PyDoc_STRVAR(s_scatters_nocheck_doc, +"scatters_(data, sw, sb) -> None\n\ +scatters_(data, sw, sb, m) -> None\n\ +\n\ +Computes the within-class (``sw``) and between-class (``sb``) scatter\n\ +matrices of a set of 2D arrays considering data is organized row-wise\n\ +(each sample is a row, each feature is a column).\n\ +\n\ +.. warning::\n\ +\n\ + THIS VARIANT DOES NOT PERFORM ANY CHECKS ON THE INPUT MATRICES AND IS,\n\ + FASTER THEN THE VARIANT NOT ENDING IN ``_``. Use it when you are sure\n\ + your input matrices sizes match.\n\ +\n\ +This function supports many calling modes, but you should provide, at\n\ +least, the input data matrices ``data``, which should be sequence of\n\ +2D 32-bit or 64-bit float values in which every row of each matrix\n\ +represents one observation for a given class. **Every matrix in\n\ +``data`` should have exactly the same number of columns.**\n\ +\n\ +The returned values ``sw`` and ``sb`` are square matrices with the\n\ +same number of rows and columns as the number of columns in ``data``.\n\ +The returned value ``m`` (last call variant) is a 1D array with the\n\ +same length as number of columns in each ``data`` matrix and represents\n\ +the ensemble mean with no prior (i.e., biased towards classes with more\n\ +samples. **In this variant, you should pre-allocate all output matrices\n\ +so that scatters (and the overall mean) are stored on your provided\n\ +arrays**.\n\ +\n\ +Strategy implemented:\n\ +\n\ +1. Evaluate the overall mean (``m``), class means (:math:`m_k`) and the\n\ + total class counts (:math:`N`).\n\ +2. Evaluate ``sw`` and ``sb`` using normal loops.\n\ +\n\ +Note that ``sw`` and ``sb``, in this implementation, will be normalized\n\ +by N-1 (number of samples) and K (number of classes). This procedure\n\ +makes the eigen values scaled by (N-1)/K, effectively increasing their\n\ +values. The main motivation for this normalization are numerical\n\ +precision concerns with the increasing number of samples causing a\n\ +rather large Sw matrix. A normalization strategy mitigates this\n\ +problem. The eigen vectors will see no effect on this normalization as\n\ +they are normalized in the euclidean sense (:math:`||a|| = 1`) so that\n\ +does not change those.\n\ +"); + static PyMethodDef module_methods[] = { { s_histogram_intersection_str, @@ -354,6 +478,30 @@ static PyMethodDef module_methods[] = { METH_VARARGS|METH_KEYWORDS, s_normsinv_doc }, + { + s_scatter_str, + (PyCFunction)py_scatter, + METH_VARARGS|METH_KEYWORDS, + s_scatter_doc + }, + { + s_scatter_nocheck_str, + (PyCFunction)py_scatter_nocheck, + METH_VARARGS|METH_KEYWORDS, + s_scatter_nocheck_doc + }, + { + s_scatters_str, + (PyCFunction)py_scatters, + METH_VARARGS|METH_KEYWORDS, + s_scatters_doc + }, + { + s_scatters_nocheck_str, + (PyCFunction)py_scatters_nocheck, + METH_VARARGS|METH_KEYWORDS, + s_scatters_nocheck_doc + }, {0} /* Sentinel */ }; diff --git a/xbob/math/scatter.cpp b/xbob/math/scatter.cpp new file mode 100644 index 0000000..1fcbe7e --- /dev/null +++ b/xbob/math/scatter.cpp @@ -0,0 +1,515 @@ +/** + * @date Mon Jun 20 11:47:58 2011 +0200 + * @author Andre Anjos <andre.anjos@idiap.ch> + * + * @brief Python bindings to statistical methods + * + * Copyright (C) 2011-2013 Idiap Research Institute, Martigny, Switzerland + */ + +#include "scatter.h" +#include <xbob.blitz/cppapi.h> +#include <bob/math/stats.h> + +PyObject* py_scatter (PyObject*, PyObject* args, PyObject* kwds) { + + /* Parses input arguments in a single shot */ + static const char* const_kwlist[] = { "a", "s", "m", 0 /* Sentinel */ }; + static char** kwlist = const_cast<char**>(const_kwlist); + + PyBlitzArrayObject* a = 0; + PyBlitzArrayObject* s = 0; + PyBlitzArrayObject* m = 0; + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O&|O&O&", + kwlist, + &PyBlitzArray_Converter, &a, + &PyBlitzArray_OutputConverter, &s, + &PyBlitzArray_OutputConverter, &m + )) return 0; + + // basic checks + if (a->ndim != 2 || (a->type_num != NPY_FLOAT32 && a->type_num != NPY_FLOAT64)) { + PyErr_SetString(PyExc_TypeError, "input data matrix `a' should be either a 32 or 64-bit float 2D array"); + Py_DECREF(a); + Py_XDECREF(s); + Py_XDECREF(m); + return 0; + } + + if (s && (s->ndim != 2 || (s->type_num != a->type_num))) { + PyErr_SetString(PyExc_TypeError, "output data matrix `s' should be either a 32 or 64-bit float 2D array, matching the data type of `a'"); + Py_DECREF(a); + Py_DECREF(s); + Py_XDECREF(m); + } + + if (m && (m->ndim != 1 || (m->type_num != a->type_num))) { + PyErr_SetString(PyExc_TypeError, "output data vector `m' should be either a 32 or 64-bit float 1D array, matching the data type of `a'"); + Py_DECREF(a); + Py_XDECREF(s); + Py_DECREF(m); + } + + // allocates data not passed by the user + bool user_s = s; + if (!s) { + Py_ssize_t sshape[2] = {a->shape[1], a->shape[1]}; + s = (PyBlitzArrayObject*)PyBlitzArray_SimpleNew(a->type_num, 2, sshape); + } + + bool user_m = m; + if (!m) m = (PyBlitzArrayObject*)PyBlitzArray_SimpleNew(a->type_num, 1, &a->shape[1]); + + try { + switch (a->type_num) { + case NPY_FLOAT32: + bob::math::scatter( + *PyBlitzArrayCxx_AsBlitz<float,2>(a), + *PyBlitzArrayCxx_AsBlitz<float,2>(s), + *PyBlitzArrayCxx_AsBlitz<float,1>(m) + ); + break; + + case NPY_FLOAT64: + bob::math::scatter( + *PyBlitzArrayCxx_AsBlitz<double,2>(a), + *PyBlitzArrayCxx_AsBlitz<double,2>(s), + *PyBlitzArrayCxx_AsBlitz<double,1>(m) + ); + break; + + default: + PyErr_Format(PyExc_TypeError, "scatter calculation currently not implemented for type '%s'", PyBlitzArray_TypenumAsString(a->type_num)); + } + } + catch (std::exception& e) { + PyErr_SetString(PyExc_RuntimeError, e.what()); + } + catch (...) { + PyErr_SetString(PyExc_RuntimeError, "scatter calculation failed: unknown exception caught"); + } + + Py_DECREF(a); + if (PyErr_Occurred()) { + Py_DECREF(s); + Py_DECREF(m); + return 0; + } + + int returns = 2 - (user_s + user_m); + + if (!returns) { + Py_DECREF(s); + Py_DECREF(m); + Py_RETURN_NONE; + } + + // if you get here, it means the user is interested on stuff to be returned + PyObject* retval = PyTuple_New(returns); + if (!retval) { + Py_DECREF(s); + Py_DECREF(m); + } + + // fill from the back + if (!user_m) PyTuple_SET_ITEM(--retval, returns, (PyObject*)m); + else Py_DECREF(m); + if (!user_s) PyTuple_SET_ITEM(--retval, returns, (PyObject*)s); + else Py_DECREF(s); + + return retval; + +} + +PyObject* py_scatter_nocheck (PyObject*, PyObject* args, PyObject* kwds) { + + /* Parses input arguments in a single shot */ + static const char* const_kwlist[] = { "a", "s", "m", 0 /* Sentinel */ }; + static char** kwlist = const_cast<char**>(const_kwlist); + + PyBlitzArrayObject* a = 0; + PyBlitzArrayObject* s = 0; + PyBlitzArrayObject* m = 0; + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O&O&O&", + kwlist, + &PyBlitzArray_Converter, &a, + &PyBlitzArray_OutputConverter, &s, + &PyBlitzArray_OutputConverter, &m + )) return 0; + + // basic checks + if (a->ndim != 2 || (a->type_num != NPY_FLOAT32 && a->type_num != NPY_FLOAT64)) { + PyErr_SetString(PyExc_TypeError, "input data matrix `a' should be either a 32 or 64-bit float 2D array"); + Py_DECREF(a); + Py_DECREF(s); + Py_DECREF(m); + return 0; + } + + if (s->ndim != 2 || (s->type_num != a->type_num)) { + PyErr_SetString(PyExc_TypeError, "output data matrix `s' should be either a 32 or 64-bit float 2D array, matching the data type of `a'"); + Py_DECREF(a); + Py_DECREF(s); + Py_DECREF(m); + } + + if (m->ndim != 1 || (m->type_num != a->type_num)) { + PyErr_SetString(PyExc_TypeError, "output data vector `m' should be either a 32 or 64-bit float 1D array, matching the data type of `a'"); + Py_DECREF(a); + Py_DECREF(s); + Py_DECREF(m); + } + + try { + switch (a->type_num) { + case NPY_FLOAT32: + bob::math::scatter( + *PyBlitzArrayCxx_AsBlitz<float,2>(a), + *PyBlitzArrayCxx_AsBlitz<float,2>(s), + *PyBlitzArrayCxx_AsBlitz<float,1>(m) + ); + break; + + case NPY_FLOAT64: + bob::math::scatter( + *PyBlitzArrayCxx_AsBlitz<double,2>(a), + *PyBlitzArrayCxx_AsBlitz<double,2>(s), + *PyBlitzArrayCxx_AsBlitz<double,1>(m) + ); + break; + + default: + PyErr_Format(PyExc_TypeError, "(no-check) scatter calculation currently not implemented for type '%s'", PyBlitzArray_TypenumAsString(a->type_num)); + } + } + catch (std::exception& e) { + PyErr_SetString(PyExc_RuntimeError, e.what()); + } + catch (...) { + PyErr_SetString(PyExc_RuntimeError, "(no-check) scatter calculation failed: unknown exception caught"); + } + + Py_DECREF(a); + Py_DECREF(s); + Py_DECREF(m); + + if (PyErr_Occurred()) return 0; + + Py_RETURN_NONE; + +} + +/** + * Converts the input iterable d into a tuple of PyBlitzArrayObject's. Checks + * each array is 2D and of type NPY_FLOAT32 or NPY_FLOAT64, consistently. + * Returns 0 if a problem occurs, 1 otherwise. + */ +int BzTuple_Converter(PyObject* o, PyObject** a) { + + PyObject* tmp = PySequence_Tuple(o); + if (!tmp) return 0; + + if (PyTuple_GET_SIZE(o) < 2) { + PyErr_SetString(PyExc_TypeError, "input data object must be a sequence or iterable with at least 2 2D arrays with 32 or 64-bit floats"); + Py_DECREF(tmp); + return 0; + } + + PyBlitzArrayObject* first = 0; + int status = PyBlitzArray_Converter(PyTuple_GET_ITEM(tmp, 0), &first); + if (!status) { + Py_DECREF(tmp); + return 0; + } + + if (first->ndim != 2 || + (first->type_num != NPY_FLOAT32 && first->type_num != NPY_FLOAT64)) { + PyErr_SetString(PyExc_TypeError, "input data object must be a sequence or iterable with at least 2 2D arrays with 32 or 64-bit floats - the first array does not conform"); + Py_DECREF(first); + Py_DECREF(tmp); + } + + PyObject* retval = PyTuple_New(PyTuple_GET_SIZE(tmp)); + if (!retval) { + Py_DECREF(tmp); + Py_DECREF(first); + return 0; + } + + PyTuple_SET_ITEM(retval, 0, (PyObject*)first); + + for (Py_ssize_t i=1; i<PyTuple_GET_SIZE(tmp); ++i) { + + PyBlitzArrayObject* next = 0; + PyObject* item = PyTuple_GET_ITEM(tmp, i); //borrowed + int status = PyBlitzArray_Converter(item, &next); + if (!status) { + Py_DECREF(retval); + Py_DECREF(tmp); + return 0; + } + if (next->type_num != first->type_num) { + PyErr_Format(PyExc_TypeError, "array at data[%" PY_FORMAT_SIZE_T "d] does not have the same data type as the first array on the sequence (%s != %s)", i, PyBlitzArray_TypenumAsString(next->type_num), PyBlitzArray_TypenumAsString(first->type_num)); + Py_DECREF(next); + Py_DECREF(retval); + Py_DECREF(tmp); + return 0; + } + if (next->ndim != 2) { + PyErr_Format(PyExc_TypeError, "array at data[%" PY_FORMAT_SIZE_T "d] does not have two dimensions, but %" PY_FORMAT_SIZE_T "d", i, next->ndim); + Py_DECREF(next); + Py_DECREF(retval); + Py_DECREF(tmp); + return 0; + } + + PyTuple_SET_ITEM(retval, i, (PyObject*)next); //steals `next' + + } + + Py_DECREF(tmp); + *a = retval; + + return 1; + +} + +PyObject* py_scatters (PyObject*, PyObject* args, PyObject* kwds) { + + /* Parses input arguments in a single shot */ + static const char* const_kwlist[] = { "data", "sw", "sb", "m", 0 /* Sentinel */ }; + static char** kwlist = const_cast<char**>(const_kwlist); + + PyObject* data = 0; + PyBlitzArrayObject* sw = 0; + PyBlitzArrayObject* sb = 0; + PyBlitzArrayObject* m = 0; + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O&|O&O&O&", kwlist, + &BzTuple_Converter, &data, + &PyBlitzArray_OutputConverter, &sw, + &PyBlitzArray_OutputConverter, &sb, + &PyBlitzArray_OutputConverter, &m + )) return 0; + + PyBlitzArrayObject* first = (PyBlitzArrayObject*)PyTuple_GET_ITEM(data, 0); + + if (sw && (sw->ndim != 2 || (sw->type_num != first->type_num))) { + PyErr_SetString(PyExc_TypeError, "output data matrix `sw' should be either a 32 or 64-bit float 2D array, matching the data type of `data'"); + Py_DECREF(data); + Py_DECREF(sw); + Py_XDECREF(sb); + Py_XDECREF(m); + } + + if (sb && (sb->ndim != 2 || (sb->type_num != first->type_num))) { + PyErr_SetString(PyExc_TypeError, "output data matrix `sb' should be either a 32 or 64-bit float 2D array, matching the data type of `data'"); + Py_DECREF(data); + Py_XDECREF(sw); + Py_DECREF(sb); + Py_XDECREF(m); + } + + if (m && (m->ndim != 1 || (m->type_num != first->type_num))) { + PyErr_SetString(PyExc_TypeError, "output data vector `m' should be either a 32 or 64-bit float 1D array, matching the data type of `data'"); + Py_DECREF(data); + Py_XDECREF(sw); + Py_XDECREF(sb); + Py_DECREF(m); + } + + // allocates data not passed by the user + bool user_sw = sw; + if (!sw) { + Py_ssize_t sshape[2] = {first->shape[1], first->shape[1]}; + sw = (PyBlitzArrayObject*)PyBlitzArray_SimpleNew(first->type_num, 2, sshape); + } + + bool user_sb = sb; + if (!sb) { + Py_ssize_t sshape[2] = {first->shape[1], first->shape[1]}; + sb = (PyBlitzArrayObject*)PyBlitzArray_SimpleNew(first->type_num, 2, sshape); + } + + bool user_m = m; + if (!m) m = (PyBlitzArrayObject*)PyBlitzArray_SimpleNew(first->type_num, 1, &first->shape[1]); + + try { + switch (first->type_num) { + case NPY_FLOAT32: + { + std::vector<blitz::Array<float,2>> cxxdata; + for (Py_ssize_t i=1; i<PyTuple_GET_SIZE(data); ++i) { + cxxdata.push_back(*PyBlitzArrayCxx_AsBlitz<float,2> + ((PyBlitzArrayObject*)PyTuple_GET_ITEM(data,i))); + bob::math::scatters(cxxdata, + *PyBlitzArrayCxx_AsBlitz<float,2>(sw), + *PyBlitzArrayCxx_AsBlitz<float,2>(sb), + *PyBlitzArrayCxx_AsBlitz<float,1>(m) + ); + } + } + break; + + case NPY_FLOAT64: + { + std::vector<blitz::Array<double,2>> cxxdata; + for (Py_ssize_t i=1; i<PyTuple_GET_SIZE(data); ++i) { + cxxdata.push_back(*PyBlitzArrayCxx_AsBlitz<double,2> + ((PyBlitzArrayObject*)PyTuple_GET_ITEM(data,i))); + bob::math::scatters(cxxdata, + *PyBlitzArrayCxx_AsBlitz<double,2>(sw), + *PyBlitzArrayCxx_AsBlitz<double,2>(sb), + *PyBlitzArrayCxx_AsBlitz<double,1>(m) + ); + } + } + break; + + default: + PyErr_Format(PyExc_TypeError, "scatters calculation currently not implemented for type '%s'", PyBlitzArray_TypenumAsString(first->type_num)); + } + } + catch (std::exception& e) { + PyErr_SetString(PyExc_RuntimeError, e.what()); + } + catch (...) { + PyErr_SetString(PyExc_RuntimeError, "scatters calculation failed: unknown exception caught"); + } + + Py_DECREF(data); + if (PyErr_Occurred()) { + Py_DECREF(sw); + Py_DECREF(sb); + Py_DECREF(m); + return 0; + } + + int returns = 3 - (user_sw + user_sb + user_m); + + if (!returns) { + Py_DECREF(sw); + Py_DECREF(sb); + Py_DECREF(m); + Py_RETURN_NONE; + } + + // if you get here, it means the user is interested on stuff to be returned + PyObject* retval = PyTuple_New(returns); + if (!retval) { + Py_DECREF(sw); + Py_DECREF(sb); + Py_DECREF(m); + } + + // fill from the back + if (!user_m) PyTuple_SET_ITEM(--retval, returns, (PyObject*)m); + else Py_DECREF(m); + if (!user_sb) PyTuple_SET_ITEM(--retval, returns, (PyObject*)sb); + else Py_DECREF(sb); + if (!user_sw) PyTuple_SET_ITEM(--retval, returns, (PyObject*)sw); + else Py_DECREF(sw); + + return retval; + +} + +PyObject* py_scatters_nocheck (PyObject*, PyObject* args, PyObject* kwds) { + + /* Parses input arguments in a single shot */ + static const char* const_kwlist[] = { "data", "sw", "sb", "m", 0 /* Sentinel */ }; + static char** kwlist = const_cast<char**>(const_kwlist); + + PyObject* data = 0; + PyBlitzArrayObject* sw = 0; + PyBlitzArrayObject* sb = 0; + PyBlitzArrayObject* m = 0; + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O&O&O&O&", kwlist, + &BzTuple_Converter, &data, + &PyBlitzArray_OutputConverter, &sw, + &PyBlitzArray_OutputConverter, &sb, + &PyBlitzArray_OutputConverter, &m + )) return 0; + + PyBlitzArrayObject* first = (PyBlitzArrayObject*)PyTuple_GET_ITEM(data, 0); + + if (sw->ndim != 2 || (sw->type_num != first->type_num)) { + PyErr_SetString(PyExc_TypeError, "output data matrix `sw' should be either a 32 or 64-bit float 2D array, matching the data type of `data'"); + Py_DECREF(data); + Py_DECREF(sw); + Py_DECREF(sb); + Py_DECREF(m); + } + + if (sb->ndim != 2 || (sb->type_num != first->type_num)) { + PyErr_SetString(PyExc_TypeError, "output data matrix `sb' should be either a 32 or 64-bit float 2D array, matching the data type of `data'"); + Py_DECREF(data); + Py_DECREF(sw); + Py_DECREF(sb); + Py_DECREF(m); + } + + if (m->ndim != 1 || (m->type_num != first->type_num)) { + PyErr_SetString(PyExc_TypeError, "output data vector `m' should be either a 32 or 64-bit float 1D array, matching the data type of `data'"); + Py_DECREF(data); + Py_DECREF(sw); + Py_DECREF(sb); + Py_DECREF(m); + } + + try { + switch (first->type_num) { + case NPY_FLOAT32: + { + std::vector<blitz::Array<float,2>> cxxdata; + for (Py_ssize_t i=1; i<PyTuple_GET_SIZE(data); ++i) { + cxxdata.push_back(*PyBlitzArrayCxx_AsBlitz<float,2> + ((PyBlitzArrayObject*)PyTuple_GET_ITEM(data,i))); + bob::math::scatters_(cxxdata, + *PyBlitzArrayCxx_AsBlitz<float,2>(sw), + *PyBlitzArrayCxx_AsBlitz<float,2>(sb), + *PyBlitzArrayCxx_AsBlitz<float,1>(m) + ); + } + } + break; + + case NPY_FLOAT64: + { + std::vector<blitz::Array<double,2>> cxxdata; + for (Py_ssize_t i=1; i<PyTuple_GET_SIZE(data); ++i) { + cxxdata.push_back(*PyBlitzArrayCxx_AsBlitz<double,2> + ((PyBlitzArrayObject*)PyTuple_GET_ITEM(data,i))); + bob::math::scatters_(cxxdata, + *PyBlitzArrayCxx_AsBlitz<double,2>(sw), + *PyBlitzArrayCxx_AsBlitz<double,2>(sb), + *PyBlitzArrayCxx_AsBlitz<double,1>(m) + ); + } + } + break; + + default: + PyErr_Format(PyExc_TypeError, "(no-check) scatters calculation currently not implemented for type '%s'", PyBlitzArray_TypenumAsString(first->type_num)); + } + } + catch (std::exception& e) { + PyErr_SetString(PyExc_RuntimeError, e.what()); + } + catch (...) { + PyErr_SetString(PyExc_RuntimeError, "(no-check) scatters calculation failed: unknown exception caught"); + } + + Py_DECREF(data); + Py_DECREF(sw); + Py_DECREF(sb); + Py_DECREF(m); + + if (PyErr_Occurred()) return 0; + + Py_RETURN_NONE; + +} diff --git a/xbob/math/scatter.h b/xbob/math/scatter.h new file mode 100644 index 0000000..e45a389 --- /dev/null +++ b/xbob/math/scatter.h @@ -0,0 +1,13 @@ +/** + * @author Andre Anjos <andre.anjos@idiap.ch> + * @date Thu 5 Dec 12:10:18 2013 + * + * @brief Declaration of components relevant for main.cpp + */ + +#include <Python.h> + +PyObject* py_scatter(PyObject*, PyObject* args, PyObject* kwds); +PyObject* py_scatter_nocheck(PyObject*, PyObject* args, PyObject* kwds); +PyObject* py_scatters(PyObject*, PyObject* args, PyObject* kwds); +PyObject* py_scatters_nocheck(PyObject*, PyObject* args, PyObject* kwds); diff --git a/xbob/math/stats.cpp b/xbob/math/stats.cpp deleted file mode 100644 index 32ce880..0000000 --- a/xbob/math/stats.cpp +++ /dev/null @@ -1,302 +0,0 @@ -/** - * @file math/python/stats.cc - * @date Mon Jun 20 11:47:58 2011 +0200 - * @author Andre Anjos <andre.anjos@idiap.ch> - * - * @brief Python bindings to statistical methods - * - * Copyright (C) 2011-2013 Idiap Research Institute, Martigny, Switzerland - */ - -#include <boost/python.hpp> -#include <bob/python/ndarray.h> -#include <boost/python/stl_iterator.hpp> -#include <bob/math/stats.h> - -using namespace boost::python; - -static const char* SCATTER_DOC1 = "Computes the scatter matrix of a 2D array considering data is organized row-wise (each sample is a row, each feature is a column). The resulting matrix 's' has to be square with extents equal to the number of columns in a."; - -static const char* SCATTER_DOC2 = "Computes the scatter matrix of a 2D array considering data is organized row-wise (each sample is a row, each feature is a column). This variant also returns the sample means in 'm'. The resulting arrays 'm' and 's' have to have the correct sizes (s should be square with extents equal to the number of columns in a and m should be a 1D vector with extents equal to the number of columns in a)."; - -static const char* SCATTER_DOC3 = "Computes the scatter matrix of a 2D array considering data is organized row-wise (each sample is a row, each feature is a column). This variant returns the sample means and the scatter matrix in a tuple. If you are looking for efficiency, prefer the variants that receive the output variable as one of the input parameters. This version will allocate the resulting arrays 'm' and 's' internally every time it is called."; - - -static const char* SCATTERS_DOC1 = "Computes the within-class and between-class scatter matrices of a set of 2D arrays considering data is organized row-wise (each sample is a row, each feature is a column). This implies that all the 2D arrays in 'data' should have the same number of columns. The resulting matrices 'sb' and 'sw' have to be square with extents equal to the number of columns in the arrays of 'data'."; - -static const char* SCATTERS_DOC2 = "Computes the within-class and between-class scatter matrices of a set of 2D arrays considering data is organized row-wise (each sample is a row, each feature is a column). This implies that all the 2D arrays in 'data' should have the same number of columns. This variant also returns the total sample means in 'm'. The resulting arrays 'm', 'sb' and 'sw' have to have the correct sizes ('sb' and 'sw' should be square with extents equal to the number of columns in the arrays of 'data' and 'm' should be a 1D vector with extents equal to the number of columns in the arrays of 'data')."; - -static const char* SCATTERS_DOC3 = "Computes the within-class and between-class scatter matrices of a set of 2D arrays considering data is organized row-wise (each sample is a row, each feature is a column). This implies that all the 2D arrays in 'data' should have the same number of columns. This variant returns the sample means and the scatter matrices 'sb' and 'sw' in a tuple. If you are looking for efficiency, prefer the variants that receive the output variable as one of the input parameters. This variant will allocate the resulting arrays 'm' and 'sb' and 'sw' internally every time it is called."; - - -template <typename T> static tuple scatter_inner(bob::python::const_ndarray A) { - const bob::core::array::typeinfo& info = A.type(); - bob::python::ndarray S(info.dtype, info.shape[1], info.shape[1]); - blitz::Array<T,2> S_ = S.bz<T,2>(); - bob::python::ndarray M(info.dtype, info.shape[1]); - blitz::Array<T,1> M_ = M.bz<T,1>(); - bob::math::scatter(A.bz<T,2>(), S_, M_); - return make_tuple(S,M); -} - -static tuple scatter(bob::python::const_ndarray A) { - const bob::core::array::typeinfo& info = A.type(); - switch (info.dtype) { - case bob::core::array::t_float32: - return scatter_inner<float>(A); - case bob::core::array::t_float64: - return scatter_inner<double>(A); - default: - PYTHON_ERROR(TypeError, "scatter matrix computation does not support '%s'", info.str().c_str()); - } -} - -template <typename T> -static void scatter_nocheck_inner(bob::python::const_ndarray A, bob::python::ndarray S) { - blitz::Array<T,2> S_ = S.bz<T,2>(); - bob::math::scatter_<T>(A.bz<T,2>(), S_); -} - -static void scatter_nocheck(bob::python::const_ndarray A, bob::python::ndarray S) { - const bob::core::array::typeinfo& info = A.type(); - switch (info.dtype) { - case bob::core::array::t_float32: - return scatter_nocheck_inner<float>(A, S); - case bob::core::array::t_float64: - return scatter_nocheck_inner<double>(A, S); - default: - PYTHON_ERROR(TypeError, "(unchecked) scatter matrix computation does not support '%s'", info.str().c_str()); - } -} - -template <typename T> -static void scatter_check_inner(bob::python::const_ndarray A, bob::python::ndarray S) { - blitz::Array<T,2> S_ = S.bz<T,2>(); - bob::math::scatter<T>(A.bz<T,2>(), S_); -} - -static void scatter_check(bob::python::const_ndarray A, bob::python::ndarray S) { - const bob::core::array::typeinfo& info = A.type(); - switch (info.dtype) { - case bob::core::array::t_float32: - return scatter_check_inner<float>(A, S); - case bob::core::array::t_float64: - return scatter_check_inner<double>(A, S); - default: - PYTHON_ERROR(TypeError, "scatter matrix computation does not support '%s'", info.str().c_str()); - } -} - -template <typename T> -static void scatter_M_nocheck_inner(bob::python::const_ndarray A, bob::python::ndarray S, - bob::python::ndarray M) { - blitz::Array<T,2> S_ = S.bz<T,2>(); - blitz::Array<T,1> M_ = M.bz<T,1>(); - bob::math::scatter_<T>(A.bz<T,2>(), S_, M_); -} - -static void scatter_M_nocheck(bob::python::const_ndarray A, bob::python::ndarray S, - bob::python::ndarray M) { - const bob::core::array::typeinfo& info = A.type(); - switch (info.dtype) { - case bob::core::array::t_float32: - return scatter_M_nocheck_inner<float>(A, S, M); - case bob::core::array::t_float64: - return scatter_M_nocheck_inner<double>(A, S, M); - default: - PYTHON_ERROR(TypeError, "(unchecked) scatter matrix computation does not support '%s'", info.str().c_str()); - } -} - -template <typename T> -static void scatter_M_check_inner(bob::python::const_ndarray A, bob::python::ndarray S, - bob::python::ndarray M) { - blitz::Array<T,2> S_ = S.bz<T,2>(); - blitz::Array<T,1> M_ = M.bz<T,1>(); - bob::math::scatter<T>(A.bz<T,2>(), S_, M_); -} - -static void scatter_M_check(bob::python::const_ndarray A, bob::python::ndarray S, - bob::python::ndarray M) { - const bob::core::array::typeinfo& info = A.type(); - switch (info.dtype) { - case bob::core::array::t_float32: - return scatter_M_check_inner<float>(A, S, M); - case bob::core::array::t_float64: - return scatter_M_check_inner<double>(A, S, M); - default: - PYTHON_ERROR(TypeError, "scatter matrix computation does not support '%s'", info.str().c_str()); - } -} - - -template <typename T> -static tuple scatters_inner(std::vector<bob::python::const_ndarray>& data) -{ - std::vector<blitz::Array<T,2> > vdata; - for(std::vector<bob::python::const_ndarray>::iterator it=data.begin(); - it!=data.end(); ++it) - vdata.push_back(it->bz<T,2>()); - const bob::core::array::typeinfo& info = data[0].type(); - bob::python::ndarray Sb(info.dtype, info.shape[1], info.shape[1]); - blitz::Array<T,2> Sb_ = Sb.bz<T,2>(); - bob::python::ndarray Sw(info.dtype, info.shape[1], info.shape[1]); - blitz::Array<T,2> Sw_ = Sw.bz<T,2>(); - bob::python::ndarray M(info.dtype, info.shape[1]); - blitz::Array<T,1> M_ = M.bz<T,1>(); - bob::math::scatters(vdata, Sw_, Sb_, M_); - return make_tuple(Sw, Sb, M); -} - -static tuple scatters(object data) { - stl_input_iterator<bob::python::const_ndarray> dbegin(data), dend; - std::vector<bob::python::const_ndarray> vdata(dbegin, dend); - const bob::core::array::typeinfo& info = vdata[0].type(); - switch (info.dtype) { - case bob::core::array::t_float32: - return scatters_inner<float>(vdata); - case bob::core::array::t_float64: - return scatters_inner<double>(vdata); - default: - PYTHON_ERROR(TypeError, "scatters matrix computation does not support '%s'", info.str().c_str()); - } -} - -template <typename T> -static void scatters_nocheck_inner(std::vector<bob::python::const_ndarray>& data, - bob::python::ndarray Sw, bob::python::ndarray Sb) -{ - std::vector<blitz::Array<T,2> > vdata; - for(std::vector<bob::python::const_ndarray>::iterator it=data.begin(); - it!=data.end(); ++it) - vdata.push_back(it->bz<T,2>()); - blitz::Array<T,2> Sw_ = Sw.bz<T,2>(); - blitz::Array<T,2> Sb_ = Sb.bz<T,2>(); - bob::math::scatters_<T>(vdata, Sw_, Sb_); -} - -static void scatters_nocheck(object data, bob::python::ndarray Sw, - bob::python::ndarray Sb) -{ - stl_input_iterator<bob::python::const_ndarray> dbegin(data), dend; - std::vector<bob::python::const_ndarray> vdata(dbegin, dend); - const bob::core::array::typeinfo& info = vdata[0].type(); - switch (info.dtype) { - case bob::core::array::t_float32: - return scatters_nocheck_inner<float>(vdata, Sw, Sb); - case bob::core::array::t_float64: - return scatters_nocheck_inner<double>(vdata, Sw, Sb); - default: - PYTHON_ERROR(TypeError, "(unchecked) scatters matrix computation does not support '%s'", info.str().c_str()); - } -} - -template <typename T> -static void scatters_check_inner(std::vector<bob::python::const_ndarray>& data, - bob::python::ndarray Sw, bob::python::ndarray Sb) -{ - std::vector<blitz::Array<T,2> > vdata; - for(std::vector<bob::python::const_ndarray>::iterator it=data.begin(); - it!=data.end(); ++it) - vdata.push_back(it->bz<T,2>()); - blitz::Array<T,2> Sw_ = Sw.bz<T,2>(); - blitz::Array<T,2> Sb_ = Sb.bz<T,2>(); - bob::math::scatters<T>(vdata, Sw_, Sb_); -} - -static void scatters_check(object data, bob::python::ndarray Sw, - bob::python::ndarray Sb) -{ - stl_input_iterator<bob::python::const_ndarray> dbegin(data), dend; - std::vector<bob::python::const_ndarray> vdata(dbegin, dend); - const bob::core::array::typeinfo& info = vdata[0].type(); - switch (info.dtype) { - case bob::core::array::t_float32: - return scatters_check_inner<float>(vdata, Sw, Sb); - case bob::core::array::t_float64: - return scatters_check_inner<double>(vdata, Sw, Sb); - default: - PYTHON_ERROR(TypeError, "scatters matrix computation does not support '%s'", info.str().c_str()); - } -} - -template <typename T> -static void scatters_M_nocheck_inner(std::vector<bob::python::const_ndarray>& data, - bob::python::ndarray Sw, bob::python::ndarray Sb, bob::python::ndarray M) -{ - std::vector<blitz::Array<T,2> > vdata; - for(std::vector<bob::python::const_ndarray>::iterator it=data.begin(); - it!=data.end(); ++it) - vdata.push_back(it->bz<T,2>()); - blitz::Array<T,2> Sw_ = Sw.bz<T,2>(); - blitz::Array<T,2> Sb_ = Sb.bz<T,2>(); - blitz::Array<T,1> M_ = M.bz<T,1>(); - bob::math::scatters_<T>(vdata, Sw_, Sb_, M_); -} - -static void scatters_M_nocheck(object data, bob::python::ndarray Sw, - bob::python::ndarray Sb, bob::python::ndarray M) -{ - stl_input_iterator<bob::python::const_ndarray> dbegin(data), dend; - std::vector<bob::python::const_ndarray> vdata(dbegin, dend); - const bob::core::array::typeinfo& info = vdata[0].type(); - switch (info.dtype) { - case bob::core::array::t_float32: - return scatters_M_nocheck_inner<float>(vdata, Sw, Sb, M); - case bob::core::array::t_float64: - return scatters_M_nocheck_inner<double>(vdata, Sw, Sb, M); - default: - PYTHON_ERROR(TypeError, "(unchecked) scatters matrix computation does not support '%s'", info.str().c_str()); - } -} - -template <typename T> -static void scatters_M_check_inner(std::vector<bob::python::const_ndarray>& data, - bob::python::ndarray Sw, bob::python::ndarray Sb, bob::python::ndarray M) -{ - std::vector<blitz::Array<T,2> > vdata; - for(std::vector<bob::python::const_ndarray>::iterator it=data.begin(); - it!=data.end(); ++it) - vdata.push_back(it->bz<T,2>()); - blitz::Array<T,2> Sw_ = Sw.bz<T,2>(); - blitz::Array<T,2> Sb_ = Sb.bz<T,2>(); - blitz::Array<T,1> M_ = M.bz<T,1>(); - bob::math::scatters<T>(vdata, Sw_, Sb_, M_); -} - -static void scatters_M_check(object data, bob::python::ndarray Sw, - bob::python::ndarray Sb, bob::python::ndarray M) -{ - stl_input_iterator<bob::python::const_ndarray> dbegin(data), dend; - std::vector<bob::python::const_ndarray> vdata(dbegin, dend); - const bob::core::array::typeinfo& info = vdata[0].type(); - switch (info.dtype) { - case bob::core::array::t_float32: - return scatters_M_check_inner<float>(vdata, Sw, Sb, M); - case bob::core::array::t_float64: - return scatters_M_check_inner<double>(vdata, Sw, Sb, M); - default: - PYTHON_ERROR(TypeError, "scatters matrix computation does not support '%s'", info.str().c_str()); - } -} - - -void bind_math_stats() { - // Scatter of a matrix - def("scatter_", &scatter_nocheck, (arg("a"), arg("s")), SCATTER_DOC1); - def("scatter", &scatter_check, (arg("a"), arg("s")), SCATTER_DOC1); - - def("scatter_", &scatter_M_nocheck, (arg("a"), arg("s"), arg("m")), SCATTER_DOC2); - def("scatter", &scatter_M_check, (arg("a"), arg("s"), arg("m")), SCATTER_DOC2); - - def("scatter", &scatter, (arg("a")), SCATTER_DOC3); - - // Between-class and within-class scatter matrices of a set of matrices - def("scatters_", &scatters_nocheck, (arg("data"), arg("sw"), arg("sb")), SCATTERS_DOC1); - def("scatters", &scatters_check, (arg("data"), arg("sw"), arg("sb")), SCATTERS_DOC1); - - def("scatters_", &scatters_M_nocheck, (arg("data"), arg("sw"), arg("sb"), arg("m")), SCATTERS_DOC2); - def("scatters", &scatters_M_check, (arg("data"), arg("sw"), arg("sb"), arg("m")), SCATTERS_DOC2); - - def("scatters", &scatters, (arg("data")), SCATTERS_DOC3); -} diff --git a/xbob/math/test/test_stats.py b/xbob/math/test/test_scatter.py similarity index 79% rename from xbob/math/test/test_stats.py rename to xbob/math/test/test_scatter.py index 51deb5d..e56a0de 100644 --- a/xbob/math/test/test_stats.py +++ b/xbob/math/test/test_scatter.py @@ -38,45 +38,46 @@ def py_scatters(data): return (Sw, Sb, mu) -def setup_func(): +def test_scatter(): - self.data = list(bob.db.iris.data().values()) - self.data_c0 = numpy.vstack(bob.db.iris.data()['setosa']) - -@nose.tools.with_setup(setup_func) -def test_scatter(self): + data = numpy.random.rand(50,4) # This test demonstrates how to use the scatter matrix function of bob. - S, M = scatter(self.data_c0.T) - S /= (self.data_c0.shape[1]-1) + S, M = scatter(data.T) + S /= (data.shape[1]-1) # Do the same with numpy and compare. Note that with numpy we are computing # the covariance matrix which is the scatter matrix divided by (N-1). - K = numpy.array(numpy.cov(self.data_c0)) - M_ = means(self.data_c0.T) + K = numpy.array(numpy.cov(data)) + M_ = means(data.T) assert (abs(S-K) < 1e-10).all() assert (abs(M-M_) < 1e-10).all() -@nose.tools.with_setup(setup_func) -def test_scatters(self): +def test_scatters(): + + data = [ + numpy.random.rand(50,4), + numpy.random.rand(50,4), + numpy.random.rand(50,4), + ] # Compares bob's implementation against pythonic one # 1. python - Sw_, Sb_, m_ = py_scatters(self.data) + Sw_, Sb_, m_ = py_scatters(data) # 2.a. bob - Sw, Sb, m = scatters(self.data) + Sw, Sb, m = scatters(data) # 3.a. comparison assert numpy.allclose(Sw, Sw_) assert numpy.allclose(Sb, Sb_) assert numpy.allclose(m, m_) - N = self.data[0].shape[1] + N = data[0].shape[1] # 2.b. bob Sw = numpy.ndarray((N,N), numpy.float64) Sb = numpy.ndarray((N,N), numpy.float64) m = numpy.ndarray((N,), numpy.float64) - scatters(self.data, Sw, Sb, m) + scatters(data, Sw, Sb, m) # 3.b comparison assert numpy.allclose(Sw, Sw_) assert numpy.allclose(Sb, Sb_) @@ -85,7 +86,7 @@ def test_scatters(self): # 2.c. bob Sw = numpy.ndarray((N,N), numpy.float64) Sb = numpy.ndarray((N,N), numpy.float64) - scatters(self.data, Sw, Sb) + scatters(data, Sw, Sb) # 3.c comparison assert numpy.allclose(Sw, Sw_) assert numpy.allclose(Sb, Sb_) -- GitLab