Skip to content
Snippets Groups Projects
Commit ce0774e2 authored by André Anjos's avatar André Anjos :speech_balloon:
Browse files

First implementation of bindings for DataShuffler

parent 7fe05522
No related branches found
No related tags found
No related merge requests found
......@@ -63,6 +63,7 @@ setup(
),
Extension("xbob.learn.mlp._library",
[
"xbob/learn/mlp/shuffler.cpp",
"xbob/learn/mlp/cost.cpp",
"xbob/learn/mlp/machine.cpp",
"xbob/learn/mlp/main.cpp",
......
......@@ -14,6 +14,7 @@
#include <bob/trainer/Cost.h>
#include <bob/trainer/SquareError.h>
#include <bob/trainer/CrossEntropyLoss.h>
#include <bob/trainer/DataShuffler.h>
#define XBOB_LEARN_MLP_MODULE_PREFIX xbob.learn.mlp
#define XBOB_LEARN_MLP_MODULE_NAME _library
......@@ -34,6 +35,9 @@ enum _PyBobLearnMLP_ENUM{
PyBobLearnCost_Check_NUM,
PyBobLearnSquareError_Type_NUM,
PyBobLearnCrossEntropyLoss_Type_NUM,
// Bindings for xbob.learn.mlp.DataShuffler
PyBobLearnDataShuffler_Type_NUM,
PyBobLearnDataShuffler_Check_NUM,
// Total number of C API pointers
PyXbobLearnMLP_API_pointers
};
......@@ -85,6 +89,16 @@ typedef struct {
#define PyBobLearnCrossEntropyLoss_Type_TYPE PyTypeObject
typedef struct {
PyObject_HEAD
bob::trainer::DataShuffler* cxx;
} PyBobLearnDataShufflerObject;
#define PyBobLearnDataShuffler_Type_TYPE PyTypeObject
#define PyBobLearnDataShuffler_Check_RET int
#define PyBobLearnDataShuffler_Check_PROTO (PyObject* o)
#ifdef XBOB_LEARN_MLP_MODULE
/* This section is used when compiling `xbob.learn.mlp' itself */
......@@ -117,6 +131,14 @@ typedef struct {
extern PyBobLearnCrossEntropyLoss_Type_TYPE PyBobLearnCrossEntropyLoss_Type;
/********************************************
* Bindings for xbob.learn.mlp.DataShuffler *
********************************************/
extern PyBobLearnDataShuffler_Type_TYPE PyBobLearnDataShuffler_Type;
PyBobLearnDataShuffler_Check_RET PyBobLearnDataShuffler_Check PyBobLearnDataShuffler_Check_PROTO;
#else
/* This section is used in modules that use `xbob.learn.mlp's' C-API */
......@@ -171,6 +193,14 @@ typedef struct {
# define PyBobLearnCrossEntropyLoss_Type (*(PyBobLearnCrossEntropyLoss_Type_TYPE *)PyXbobLearnMLP_API[PyBobLearnCrossEntropyLoss_Type_NUM])
/********************************************
* Bindings for xbob.learn.mlp.DataShuffler *
********************************************/
# define PyBobLearnDataShuffler_Type (*(PyBobLearnDataShuffler_Type_TYPE *)PyXbobLearnMLP_API[PyBobLearnDataShuffler_Type_NUM])
# define PyBobLearnDataShuffler_Check (*(PyBobLearnDataShuffler_Check_RET (*)PyBobLearnDataShuffler_Check_PROTO) PyXbobLearnMLP_API[PyBobLearnDataShuffler_Check_NUM])
# if !defined(NO_IMPORT_ARRAY)
/**
......
......@@ -50,6 +50,9 @@ static PyObject* create_module (void) {
PyBobLearnCrossEntropyLoss_Type.tp_base = &PyBobLearnCost_Type;
if (PyType_Ready(&PyBobLearnCrossEntropyLoss_Type) < 0) return 0;
PyBobLearnDataShuffler_Type.tp_new = PyType_GenericNew;
if (PyType_Ready(&PyBobLearnDataShuffler_Type) < 0) return 0;
# if PY_VERSION_HEX >= 0x03000000
PyObject* m = PyModule_Create(&module_definition);
# else
......@@ -75,6 +78,9 @@ static PyObject* create_module (void) {
Py_INCREF(&PyBobLearnCrossEntropyLoss_Type);
if (PyModule_AddObject(m, "CrossEntropyLoss", (PyObject *)&PyBobLearnCrossEntropyLoss_Type) < 0) return 0;
Py_INCREF(&PyBobLearnDataShuffler_Type);
if (PyModule_AddObject(m, "DataShuffler", (PyObject *)&PyBobLearnDataShuffler_Type) < 0) return 0;
static void* PyXbobLearnMLP_API[PyXbobLearnMLP_API_pointers];
/* exhaustive list of C APIs */
......@@ -107,6 +113,14 @@ static PyObject* create_module (void) {
PyXbobLearnMLP_API[PyBobLearnCrossEntropyLoss_Type_NUM] = (void *)&PyBobLearnCrossEntropyLoss_Type;
/********************************************
* Bindings for xbob.learn.mlp.DataShuffler *
********************************************/
PyXbobLearnMLP_API[PyBobLearnDataShuffler_Type_NUM] = (void *)&PyBobLearnDataShuffler_Type;
PyXbobLearnMLP_API[PyBobLearnDataShuffler_Check_NUM] = (void *)&PyBobLearnDataShuffler_Check;
#if PY_VERSION_HEX >= 0x02070000
/* defines the PyCapsule */
......
/**
* @author Andre Anjos <andre.anjos@idiap.ch>
* @date Tue 29 Apr 2014 16:16:59 CEST
*
* @brief Bindings for the data shuffler
*
* Copyright (C) 2011-2014 Idiap Research Institute, Martigny, Switzerland
*/
#define XBOB_LEARN_MLP_MODULE
#include <xbob.blitz/cppapi.h>
#include <xbob.blitz/cleanup.h>
#include <xbob.learn.mlp/api.h>
#include <structmember.h>
/*********************************************
* Implementation of DataShuffler base class *
*********************************************/
PyDoc_STRVAR(s_shuffler_str, XBOB_EXT_MODULE_PREFIX ".DataShuffler");
PyDoc_STRVAR(s_shuffler_doc,
"DataShuffler(data, target) -> New DataShuffler\n\
\n\
Serves data from a training set, in a random way.\n\
\n\
Objects of this class are capable of being populated with data\n\
from one or multiple classes and matching target values. Once\n\
setup, the shuffer can randomly select a number of vectors and\n\
accompaning targets for the different classes, filling up user\n\
containers.\n\
\n\
Data shufflers are particular useful for training neural networks.\n\
\n\
Keyword arguments:\n\
\n\
data, sequence of array-like 2D float64\n\
The input data are divided into sets corresponding to the\n\
elements of each input class. Within the class array, each\n\
row is expected to correspond to one observation of that class.\n\
\n\
target, sequence of array-like 1D float64\n\
The target arrays correspond to the targets for each of the\n\
input arrays. The number of targets must match the number of\n\
2D array objects given in ``data``.\n\
\n");
static int PyBobLearnDataShuffler_init
(PyBobLearnDataShufflerObject* self, PyObject* args, PyObject* kwds) {
/* Parses input arguments in a single shot */
static const char* const_kwlist[] = {"data", "target", 0};
static char** kwlist = const_cast<char**>(const_kwlist);
PyObject* data = 0;
PyObject* target = 0;
if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO", kwlist,
&data, &target)) return -1;
/* Check lengths */
Py_ssize_t data_length = PyObject_Length(data);
if (data_length == -1) return -1;
Py_ssize_t target_length = PyObject_Length(target);
if (target_length == -1) return -1;
if (data_length < 2) {
PyErr_Format(PyExc_RuntimeError, "`%s' requires an iterable for parameter `data' leading to, at least, two entries (representing two classes), but you have passed something that has only %" PY_FORMAT_SIZE_T "d entries", Py_TYPE(self)->tp_name, data_length);
return 0;
}
if (target_length != data_length) {
PyErr_Format(PyExc_RuntimeError, "`%s' requires an iterable for parameter `target' leading to the same number of targets (%" PY_FORMAT_SIZE_T "d) as data arrays, but you have passed something that has only %" PY_FORMAT_SIZE_T "d entries", Py_TYPE(self)->tp_name, data_length, target_length);
return 0;
}
/* Checks and converts all data entries */
std::vector<blitz::Array<double,2> > data_seq;
std::vector<boost::shared_ptr<PyBlitzArrayObject>> data_seq_;
PyObject* iterator = PyObject_GetIter(data);
if (!iterator) return 0;
auto iterator_ = make_safe(iterator);
while (PyObject* item = PyIter_Next(iterator)) {
auto item_ = make_safe(item);
PyBlitzArrayObject* bz = 0;
if (!PyBlitzArray_Converter(item, &bz)) {
PyErr_Format(PyExc_TypeError, "`%s' could not convert object of type `%s' at position %" PY_FORMAT_SIZE_T "d of input sequence `data' into an array - check your input", Py_TYPE(self)->tp_name, Py_TYPE(item)->tp_name, data_seq.size());
return 0;
}
if (bz->ndim != 2 || bz->type_num != NPY_FLOAT64) {
PyErr_Format(PyExc_TypeError, "`%s' only supports 2D 64-bit float arrays for input sequence `data' (or any other object coercible to that), but at position %" PY_FORMAT_SIZE_T "d I have found an object with %" PY_FORMAT_SIZE_T "d dimensions and with type `%s' which is not compatible - check your input", Py_TYPE(self)->tp_name, data_seq.size(), bz->ndim, PyBlitzArray_TypenumAsString(bz->type_num));
Py_DECREF(bz);
return 0;
}
data_seq_.push_back(make_safe(bz)); ///< prevents data deletion
data_seq.push_back(*PyBlitzArrayCxx_AsBlitz<double,2>(bz)); ///< only a view!
}
if (PyErr_Occurred()) return 0;
/* Checks and converts all target entries */
std::vector<blitz::Array<double,1>> target_seq;
std::vector<boost::shared_ptr<PyBlitzArrayObject>> target_seq_;
iterator = PyObject_GetIter(target);
if (!iterator) return 0;
iterator_ = make_safe(iterator);
while (PyObject* item = PyIter_Next(iterator)) {
auto item_ = make_safe(item);
PyBlitzArrayObject* bz = 0;
if (!PyBlitzArray_Converter(item, &bz)) {
PyErr_Format(PyExc_TypeError, "`%s' could not convert object of type `%s' at position %" PY_FORMAT_SIZE_T "d of input sequence `target' into an array - check your input", Py_TYPE(self)->tp_name, Py_TYPE(item)->tp_name, target_seq.size());
return 0;
}
if (bz->ndim != 1 || bz->type_num != NPY_FLOAT64) {
PyErr_Format(PyExc_TypeError, "`%s' only supports 1D 64-bit float arrays for input sequence `target' (or any other object coercible to that), but at position %" PY_FORMAT_SIZE_T "d I have found an object with %" PY_FORMAT_SIZE_T "d dimensions and with type `%s' which is not compatible - check your input", Py_TYPE(self)->tp_name, target_seq.size(), bz->ndim, PyBlitzArray_TypenumAsString(bz->type_num));
Py_DECREF(bz);
return 0;
}
target_seq_.push_back(make_safe(bz)); ///< prevents target deletion
target_seq.push_back(*PyBlitzArrayCxx_AsBlitz<double,1>(bz)); ///< only a view!
}
if (PyErr_Occurred()) return 0;
// proceed to object initialization
try {
self->cxx = new bob::trainer::DataShuffler(data_seq, target_seq);
}
catch (std::exception& ex) {
PyErr_SetString(PyExc_RuntimeError, ex.what());
return -1;
}
catch (...) {
PyErr_Format(PyExc_RuntimeError, "cannot create new object of type `%s' - unknown exception thrown", Py_TYPE(self)->tp_name);
return -1;
}
return 0;
}
int PyBobLearnDataShuffler_Check(PyObject* o) {
return PyObject_IsInstance(o, reinterpret_cast<PyObject*>(&PyBobLearnDataShuffler_Type));
}
PyTypeObject PyBobLearnDataShuffler_Type = {
PyVarObject_HEAD_INIT(0, 0)
s_shuffler_str, /* tp_name */
sizeof(PyBobLearnDataShufflerObject), /* tp_basicsize */
0, /* tp_itemsize */
0, /* tp_dealloc */
0, /* tp_print */
0, /* tp_getattr */
0, /* tp_setattr */
0, /* tp_compare */
0, /* tp_repr */
0, /* tp_as_number */
0, /* tp_as_sequence */
0, /* tp_as_mapping */
0, /* tp_hash */
0, /* tp_call */
0, /* tp_str */
0, /* tp_getattro */
0, /* tp_setattro */
0, /* tp_as_buffer */
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
s_shuffler_doc, /* tp_doc */
0, /* tp_traverse */
0, /* tp_clear */
0, /* tp_richcompare */
0, /* tp_weaklistoffset */
0, /* tp_iter */
0, /* tp_iternext */
0, /* tp_methods */
0, /* tp_members */
0, /* tp_getset */
0, /* tp_base */
0, /* tp_dict */
0, /* tp_descr_get */
0, /* tp_descr_set */
0, /* tp_dictoffset */
(initproc)PyBobLearnDataShuffler_init, /* tp_init */
};
......@@ -199,7 +199,7 @@ def test_normalization_big():
target1 = numpy.array([1], dtype='float64')
set2 = []
draw32 = xbob.core.random.normal(mean=3.0, sigma=2.0, dtyle=float)
draw32 = xbob.core.random.normal(mean=3.0, sigma=2.0, dtype=float)
for i in range(10000):
set2.append(numpy.array([draw32(rng)], dtype='float64'))
set2 = numpy.array(set2)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment