diff --git a/setup.py b/setup.py index 9a45af261a760065e009507b8ee43536a1c19052..4912d2718228462ec26ff0dae418da436378b501 100644 --- a/setup.py +++ b/setup.py @@ -63,6 +63,7 @@ setup( ), Extension("xbob.learn.mlp._library", [ + "xbob/learn/mlp/shuffler.cpp", "xbob/learn/mlp/cost.cpp", "xbob/learn/mlp/machine.cpp", "xbob/learn/mlp/main.cpp", diff --git a/xbob/learn/mlp/include/xbob.learn.mlp/api.h b/xbob/learn/mlp/include/xbob.learn.mlp/api.h index c23e5820865ebd96a0ec9796ef28c4acb1aeafbd..8271870e26d4a6f0fcb9d2af685b2e5289a1baba 100644 --- a/xbob/learn/mlp/include/xbob.learn.mlp/api.h +++ b/xbob/learn/mlp/include/xbob.learn.mlp/api.h @@ -14,6 +14,7 @@ #include <bob/trainer/Cost.h> #include <bob/trainer/SquareError.h> #include <bob/trainer/CrossEntropyLoss.h> +#include <bob/trainer/DataShuffler.h> #define XBOB_LEARN_MLP_MODULE_PREFIX xbob.learn.mlp #define XBOB_LEARN_MLP_MODULE_NAME _library @@ -34,6 +35,9 @@ enum _PyBobLearnMLP_ENUM{ PyBobLearnCost_Check_NUM, PyBobLearnSquareError_Type_NUM, PyBobLearnCrossEntropyLoss_Type_NUM, + // Bindings for xbob.learn.mlp.DataShuffler + PyBobLearnDataShuffler_Type_NUM, + PyBobLearnDataShuffler_Check_NUM, // Total number of C API pointers PyXbobLearnMLP_API_pointers }; @@ -85,6 +89,16 @@ typedef struct { #define PyBobLearnCrossEntropyLoss_Type_TYPE PyTypeObject +typedef struct { + PyObject_HEAD + bob::trainer::DataShuffler* cxx; +} PyBobLearnDataShufflerObject; + +#define PyBobLearnDataShuffler_Type_TYPE PyTypeObject + +#define PyBobLearnDataShuffler_Check_RET int +#define PyBobLearnDataShuffler_Check_PROTO (PyObject* o) + #ifdef XBOB_LEARN_MLP_MODULE /* This section is used when compiling `xbob.learn.mlp' itself */ @@ -117,6 +131,14 @@ typedef struct { extern PyBobLearnCrossEntropyLoss_Type_TYPE PyBobLearnCrossEntropyLoss_Type; + /******************************************** + * Bindings for xbob.learn.mlp.DataShuffler * + ********************************************/ + + extern PyBobLearnDataShuffler_Type_TYPE PyBobLearnDataShuffler_Type; + + PyBobLearnDataShuffler_Check_RET PyBobLearnDataShuffler_Check PyBobLearnDataShuffler_Check_PROTO; + #else /* This section is used in modules that use `xbob.learn.mlp's' C-API */ @@ -171,6 +193,14 @@ typedef struct { # define PyBobLearnCrossEntropyLoss_Type (*(PyBobLearnCrossEntropyLoss_Type_TYPE *)PyXbobLearnMLP_API[PyBobLearnCrossEntropyLoss_Type_NUM]) + /******************************************** + * Bindings for xbob.learn.mlp.DataShuffler * + ********************************************/ + +# define PyBobLearnDataShuffler_Type (*(PyBobLearnDataShuffler_Type_TYPE *)PyXbobLearnMLP_API[PyBobLearnDataShuffler_Type_NUM]) + +# define PyBobLearnDataShuffler_Check (*(PyBobLearnDataShuffler_Check_RET (*)PyBobLearnDataShuffler_Check_PROTO) PyXbobLearnMLP_API[PyBobLearnDataShuffler_Check_NUM]) + # if !defined(NO_IMPORT_ARRAY) /** diff --git a/xbob/learn/mlp/main.cpp b/xbob/learn/mlp/main.cpp index 9ffeee989428760369b44a0a91c746bea98fc5df..6ba1691f4d116e8b6e6770c917a2b5ebcab79cd8 100644 --- a/xbob/learn/mlp/main.cpp +++ b/xbob/learn/mlp/main.cpp @@ -50,6 +50,9 @@ static PyObject* create_module (void) { PyBobLearnCrossEntropyLoss_Type.tp_base = &PyBobLearnCost_Type; if (PyType_Ready(&PyBobLearnCrossEntropyLoss_Type) < 0) return 0; + PyBobLearnDataShuffler_Type.tp_new = PyType_GenericNew; + if (PyType_Ready(&PyBobLearnDataShuffler_Type) < 0) return 0; + # if PY_VERSION_HEX >= 0x03000000 PyObject* m = PyModule_Create(&module_definition); # else @@ -75,6 +78,9 @@ static PyObject* create_module (void) { Py_INCREF(&PyBobLearnCrossEntropyLoss_Type); if (PyModule_AddObject(m, "CrossEntropyLoss", (PyObject *)&PyBobLearnCrossEntropyLoss_Type) < 0) return 0; + Py_INCREF(&PyBobLearnDataShuffler_Type); + if (PyModule_AddObject(m, "DataShuffler", (PyObject *)&PyBobLearnDataShuffler_Type) < 0) return 0; + static void* PyXbobLearnMLP_API[PyXbobLearnMLP_API_pointers]; /* exhaustive list of C APIs */ @@ -107,6 +113,14 @@ static PyObject* create_module (void) { PyXbobLearnMLP_API[PyBobLearnCrossEntropyLoss_Type_NUM] = (void *)&PyBobLearnCrossEntropyLoss_Type; + /******************************************** + * Bindings for xbob.learn.mlp.DataShuffler * + ********************************************/ + + PyXbobLearnMLP_API[PyBobLearnDataShuffler_Type_NUM] = (void *)&PyBobLearnDataShuffler_Type; + + PyXbobLearnMLP_API[PyBobLearnDataShuffler_Check_NUM] = (void *)&PyBobLearnDataShuffler_Check; + #if PY_VERSION_HEX >= 0x02070000 /* defines the PyCapsule */ diff --git a/xbob/learn/mlp/shuffler.cpp b/xbob/learn/mlp/shuffler.cpp new file mode 100644 index 0000000000000000000000000000000000000000..766249ba7af06957014bfd4c4773242a3c1b5392 --- /dev/null +++ b/xbob/learn/mlp/shuffler.cpp @@ -0,0 +1,195 @@ +/** + * @author Andre Anjos <andre.anjos@idiap.ch> + * @date Tue 29 Apr 2014 16:16:59 CEST + * + * @brief Bindings for the data shuffler + * + * Copyright (C) 2011-2014 Idiap Research Institute, Martigny, Switzerland + */ + +#define XBOB_LEARN_MLP_MODULE +#include <xbob.blitz/cppapi.h> +#include <xbob.blitz/cleanup.h> +#include <xbob.learn.mlp/api.h> +#include <structmember.h> + +/********************************************* + * Implementation of DataShuffler base class * + *********************************************/ + +PyDoc_STRVAR(s_shuffler_str, XBOB_EXT_MODULE_PREFIX ".DataShuffler"); + +PyDoc_STRVAR(s_shuffler_doc, +"DataShuffler(data, target) -> New DataShuffler\n\ +\n\ +Serves data from a training set, in a random way.\n\ +\n\ +Objects of this class are capable of being populated with data\n\ +from one or multiple classes and matching target values. Once\n\ +setup, the shuffer can randomly select a number of vectors and\n\ +accompaning targets for the different classes, filling up user\n\ +containers.\n\ +\n\ +Data shufflers are particular useful for training neural networks.\n\ +\n\ +Keyword arguments:\n\ +\n\ +data, sequence of array-like 2D float64\n\ + The input data are divided into sets corresponding to the\n\ + elements of each input class. Within the class array, each\n\ + row is expected to correspond to one observation of that class.\n\ +\n\ +target, sequence of array-like 1D float64\n\ + The target arrays correspond to the targets for each of the\n\ + input arrays. The number of targets must match the number of\n\ + 2D array objects given in ``data``.\n\ +\n"); + +static int PyBobLearnDataShuffler_init +(PyBobLearnDataShufflerObject* self, PyObject* args, PyObject* kwds) { + + /* Parses input arguments in a single shot */ + static const char* const_kwlist[] = {"data", "target", 0}; + static char** kwlist = const_cast<char**>(const_kwlist); + + PyObject* data = 0; + PyObject* target = 0; + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO", kwlist, + &data, &target)) return -1; + + /* Check lengths */ + Py_ssize_t data_length = PyObject_Length(data); + if (data_length == -1) return -1; + Py_ssize_t target_length = PyObject_Length(target); + if (target_length == -1) return -1; + + if (data_length < 2) { + PyErr_Format(PyExc_RuntimeError, "`%s' requires an iterable for parameter `data' leading to, at least, two entries (representing two classes), but you have passed something that has only %" PY_FORMAT_SIZE_T "d entries", Py_TYPE(self)->tp_name, data_length); + return 0; + } + + if (target_length != data_length) { + PyErr_Format(PyExc_RuntimeError, "`%s' requires an iterable for parameter `target' leading to the same number of targets (%" PY_FORMAT_SIZE_T "d) as data arrays, but you have passed something that has only %" PY_FORMAT_SIZE_T "d entries", Py_TYPE(self)->tp_name, data_length, target_length); + return 0; + } + + /* Checks and converts all data entries */ + std::vector<blitz::Array<double,2> > data_seq; + std::vector<boost::shared_ptr<PyBlitzArrayObject>> data_seq_; + + PyObject* iterator = PyObject_GetIter(data); + if (!iterator) return 0; + auto iterator_ = make_safe(iterator); + + while (PyObject* item = PyIter_Next(iterator)) { + auto item_ = make_safe(item); + + PyBlitzArrayObject* bz = 0; + + if (!PyBlitzArray_Converter(item, &bz)) { + PyErr_Format(PyExc_TypeError, "`%s' could not convert object of type `%s' at position %" PY_FORMAT_SIZE_T "d of input sequence `data' into an array - check your input", Py_TYPE(self)->tp_name, Py_TYPE(item)->tp_name, data_seq.size()); + return 0; + } + + if (bz->ndim != 2 || bz->type_num != NPY_FLOAT64) { + PyErr_Format(PyExc_TypeError, "`%s' only supports 2D 64-bit float arrays for input sequence `data' (or any other object coercible to that), but at position %" PY_FORMAT_SIZE_T "d I have found an object with %" PY_FORMAT_SIZE_T "d dimensions and with type `%s' which is not compatible - check your input", Py_TYPE(self)->tp_name, data_seq.size(), bz->ndim, PyBlitzArray_TypenumAsString(bz->type_num)); + Py_DECREF(bz); + return 0; + } + + data_seq_.push_back(make_safe(bz)); ///< prevents data deletion + data_seq.push_back(*PyBlitzArrayCxx_AsBlitz<double,2>(bz)); ///< only a view! + } + + if (PyErr_Occurred()) return 0; + + /* Checks and converts all target entries */ + std::vector<blitz::Array<double,1>> target_seq; + std::vector<boost::shared_ptr<PyBlitzArrayObject>> target_seq_; + + iterator = PyObject_GetIter(target); + if (!iterator) return 0; + iterator_ = make_safe(iterator); + + while (PyObject* item = PyIter_Next(iterator)) { + auto item_ = make_safe(item); + + PyBlitzArrayObject* bz = 0; + + if (!PyBlitzArray_Converter(item, &bz)) { + PyErr_Format(PyExc_TypeError, "`%s' could not convert object of type `%s' at position %" PY_FORMAT_SIZE_T "d of input sequence `target' into an array - check your input", Py_TYPE(self)->tp_name, Py_TYPE(item)->tp_name, target_seq.size()); + return 0; + } + + if (bz->ndim != 1 || bz->type_num != NPY_FLOAT64) { + PyErr_Format(PyExc_TypeError, "`%s' only supports 1D 64-bit float arrays for input sequence `target' (or any other object coercible to that), but at position %" PY_FORMAT_SIZE_T "d I have found an object with %" PY_FORMAT_SIZE_T "d dimensions and with type `%s' which is not compatible - check your input", Py_TYPE(self)->tp_name, target_seq.size(), bz->ndim, PyBlitzArray_TypenumAsString(bz->type_num)); + Py_DECREF(bz); + return 0; + } + + target_seq_.push_back(make_safe(bz)); ///< prevents target deletion + target_seq.push_back(*PyBlitzArrayCxx_AsBlitz<double,1>(bz)); ///< only a view! + } + + if (PyErr_Occurred()) return 0; + + // proceed to object initialization + try { + self->cxx = new bob::trainer::DataShuffler(data_seq, target_seq); + } + catch (std::exception& ex) { + PyErr_SetString(PyExc_RuntimeError, ex.what()); + return -1; + } + catch (...) { + PyErr_Format(PyExc_RuntimeError, "cannot create new object of type `%s' - unknown exception thrown", Py_TYPE(self)->tp_name); + return -1; + } + + return 0; + +} + +int PyBobLearnDataShuffler_Check(PyObject* o) { + return PyObject_IsInstance(o, reinterpret_cast<PyObject*>(&PyBobLearnDataShuffler_Type)); +} + +PyTypeObject PyBobLearnDataShuffler_Type = { + PyVarObject_HEAD_INIT(0, 0) + s_shuffler_str, /* tp_name */ + sizeof(PyBobLearnDataShufflerObject), /* tp_basicsize */ + 0, /* tp_itemsize */ + 0, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_compare */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + 0, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */ + s_shuffler_doc, /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + 0, /* tp_methods */ + 0, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + (initproc)PyBobLearnDataShuffler_init, /* tp_init */ +}; diff --git a/xbob/learn/mlp/test_shuffler.py b/xbob/learn/mlp/test_shuffler.py index bcfee7f08c48eb22249213f7d04b612d395827c9..ffc15b087b0d620d8db94f63c6df5b559c52dd29 100644 --- a/xbob/learn/mlp/test_shuffler.py +++ b/xbob/learn/mlp/test_shuffler.py @@ -199,7 +199,7 @@ def test_normalization_big(): target1 = numpy.array([1], dtype='float64') set2 = [] - draw32 = xbob.core.random.normal(mean=3.0, sigma=2.0, dtyle=float) + draw32 = xbob.core.random.normal(mean=3.0, sigma=2.0, dtype=float) for i in range(10000): set2.append(numpy.array([draw32(rng)], dtype='float64')) set2 = numpy.array(set2)