diff --git a/setup.py b/setup.py
index a797ee5d07f1214bcccc09015ecd30ef7d0329fe..9a45af261a760065e009507b8ee43536a1c19052 100644
--- a/setup.py
+++ b/setup.py
@@ -63,6 +63,7 @@ setup(
         ),
       Extension("xbob.learn.mlp._library",
         [
+          "xbob/learn/mlp/cost.cpp",
           "xbob/learn/mlp/machine.cpp",
           "xbob/learn/mlp/main.cpp",
           ],
diff --git a/xbob/learn/mlp/cost.cpp b/xbob/learn/mlp/cost.cpp
index 54a9b242e83dd4f86f71afd753141fe542eaccb3..aea34622601841664ba95e33dee756c1db383bc4 100644
--- a/xbob/learn/mlp/cost.cpp
+++ b/xbob/learn/mlp/cost.cpp
@@ -11,47 +11,674 @@
 #include <xbob.blitz/cppapi.h>
 #include <xbob.blitz/cleanup.h>
 #include <xbob.learn.mlp/api.h>
+#include <xbob.learn.activation/api.h>
 #include <structmember.h>
+#include <boost/function.hpp>
+#include <boost/bind.hpp>
 
-/****************************************
- * Implementation of Machine base class *
- ****************************************/
-
-PyDoc_STRVAR(s_machine_str, XBOB_EXT_MODULE_PREFIX ".Machine");
-
-PyDoc_STRVAR(s_machine_doc,
-"Machine(shape)\n\
-Machine(config)\n\
-Machine(other)\n\
-\n\
-A Multi-layer Perceptron Machine.\n\
-\n\
-An MLP Machine is a representation of a Multi-Layer Perceptron.\n\
-This implementation is feed-forward and fully-connected. The\n\
-implementation allows setting of input normalization values and\n\
-a global activation function. References to fully-connected\n\
-feed-forward networks:\n\
-\n\
-  Bishop's Pattern Recognition and Machine Learning, Chapter 5.\n\
-  Figure 5.1 shows what we mean.\n\
-\n\
-MLPs normally are multi-layered systems, with 1 or more hidden\n\
-layers. As a special case, this implementation also supports\n\
-connecting the input directly to the output by means of a single\n\
-weight matrix. This is equivalent of a\n\
-:py:class:`xbob.learn.linear.Machine`, with the advantage it can\n\
-be trained by trainers defined in this package.\n\
-\n\
-An MLP can be constructed in different ways. In the first form,\n\
-the user specifies the machine shape as sequence (e.g. a tuple).\n\
-The sequence should contain the number of inputs (first element),\n\
-number of outputs (last element) and the number of neurons in\n\
-each hidden layer (elements between the first and last element\n\
-of given tuple). The activation function will be set to\n\
-hyperbolic tangent. The machine is remains **uninitialized**.\n\
-In the second form the user passes a pre-opened HDF5 file\n\
-pointing to the machine information to be loaded in memory.\n\
-Finally, in the last form (copy constructor), the user passes\n\
-another :py:class:`Machine` that will be fully copied.\n\
+/*************************************
+ * Implementation of Cost base class *
+ *************************************/
+
+PyDoc_STRVAR(s_cost_str, XBOB_EXT_MODULE_PREFIX ".Cost");
+
+PyDoc_STRVAR(s_cost_doc,
+"A base class for evaluating the performance cost.\n\
+\n\
+This is the base class for all concrete (C++ only) loss\n\
+function implementations. You cannot instantiate objects\n\
+of this type directly, use one of the derived classes.\n\
 ");
 
+static int PyBobLearnCost_init
+(PyBobLearnCostObject* self, PyObject* args, PyObject* kwds) {
+
+  PyErr_Format(PyExc_NotImplementedError, "cannot instantiate objects of type `%s', use one of the derived classes", Py_TYPE(self)->tp_name);
+  return -1;
+
+}
+
+int PyBobLearnCost_Check(PyObject* o) {
+  return PyObject_IsInstance(o, reinterpret_cast<PyObject*>(&PyBobLearnCost_Type));
+}
+
+static PyObject* PyBobLearnCost_RichCompare
+(PyBobLearnCostObject* self, PyObject* other, int op) {
+
+  if (!PyBobLearnCost_Check(other)) {
+    PyErr_Format(PyExc_TypeError, "cannot compare `%s' with `%s'",
+        Py_TYPE(self)->tp_name, Py_TYPE(other)->tp_name);
+    return 0;
+  }
+
+  auto other_ = reinterpret_cast<PyBobLearnCostObject*>(other);
+
+  switch (op) {
+    case Py_EQ:
+      if (self->cxx->str() == other_->cxx->str()) Py_RETURN_TRUE;
+      Py_RETURN_FALSE;
+      break;
+    case Py_NE:
+      if (self->cxx->str() != other_->cxx->str()) Py_RETURN_TRUE;
+      Py_RETURN_FALSE;
+      break;
+    default:
+      Py_INCREF(Py_NotImplemented);
+      return Py_NotImplemented;
+  }
+
+}
+
+#if PY_VERSION_HEX >= 0x03000000
+#  define PYOBJECT_STR PyObject_Str
+#else
+#  define PYOBJECT_STR PyObject_Unicode
+#endif
+
+PyObject* PyBobLearnCost_Repr(PyBobLearnCostObject* self) {
+
+  /**
+   * Expected output:
+   *
+   * <xbob.learn.linear.Cost [...]>
+   */
+
+  auto retval = PyUnicode_FromFormat("<%s [act: %s]>",
+        Py_TYPE(self)->tp_name, self->cxx->str().c_str());
+
+#if PYTHON_VERSION_HEX < 0x03000000
+  if (!retval) return 0;
+  PyObject* tmp = PyObject_Str(retval);
+  Py_DECREF(retval);
+  retval = tmp;
+#endif
+
+  return retval;
+
+}
+
+PyObject* PyBobLearnCost_Str(PyBobLearnCostObject* self) {
+  return Py_BuildValue("s", self->cxx->str().c_str());
+}
+
+/**
+ * Checks if a array `a1' and `a2' have a matching shape.
+ */
+static int have_same_shape (PyBlitzArrayObject* a1, PyBlitzArrayObject* a2) {
+
+  if (a1->ndim != a2->ndim) return 0;
+
+  for (Py_ssize_t k=0; k<a1->ndim; ++k) {
+    if (a1->shape[k] != a2->shape[k]) return 0;
+  }
+
+  return 1;
+}
+
+static PyObject* apply_scalar(PyBobLearnCostObject* self, const char*,
+    boost::function<double (double, double)> function,
+    PyObject* args, PyObject* kwds) {
+
+  static const char* const_kwlist[] = {"output", "target", 0};
+  static char** kwlist = const_cast<char**>(const_kwlist);
+
+  double output = 0.;
+  double target = 0.;
+
+  if (!PyArg_ParseTupleAndKeywords(args, kwds, "dd", kwlist,
+        &output, &target)) return 0;
+
+  return Py_BuildValue("d", function(output, target));
+
+}
+
+/**
+ * Maps all elements of arr through function() into `result'
+ */
+static PyObject* apply_array(PyBobLearnCostObject* self, const char* fname,
+    boost::function<double (double, double)> function,
+    PyObject* args, PyObject* kwds) {
+
+  static const char* const_kwlist[] = {"output", "target", "result", 0};
+  static char** kwlist = const_cast<char**>(const_kwlist);
+
+  PyBlitzArrayObject* output = 0;
+  PyBlitzArrayObject* target = 0;
+  PyBlitzArrayObject* result = 0;
+
+  if (!PyArg_ParseTupleAndKeywords(args, kwds, "O&O&|O&", kwlist,
+        &PyBlitzArray_Converter, &output,
+        &PyBlitzArray_Converter, &target,
+        &PyBlitzArray_OutputConverter, &result
+        )) return 0;
+
+  //protects acquired resources through this scope
+  auto output_ = make_safe(output);
+  auto target_ = make_safe(target);
+  auto result_ = make_xsafe(result);
+
+  if (output->type_num != NPY_FLOAT64) {
+    PyErr_Format(PyExc_TypeError, "`%s.%s' only supports 64-bit float arrays for input array `output'", Py_TYPE(self)->tp_name, fname);
+    return 0;
+  }
+
+  if (target->type_num != NPY_FLOAT64) {
+    PyErr_Format(PyExc_TypeError, "`%s.%s' only supports 64-bit float arrays for input array `target'", Py_TYPE(self)->tp_name, fname);
+    return 0;
+  }
+
+  if (result && result->type_num != NPY_FLOAT64) {
+    PyErr_Format(PyExc_TypeError, "`%s.%s' only supports 64-bit float arrays for output array `result'", Py_TYPE(self)->tp_name, fname);
+    return 0;
+  }
+
+  if (!have_same_shape(output, target)) {
+    PyErr_Format(PyExc_RuntimeError, "`%s.%s' requires input arrays `output' and `target' to have the same shape, but you provided arrays with different shapes", Py_TYPE(self)->tp_name, fname);
+    return 0;
+  }
+
+  if (result && !have_same_shape(output, result)) {
+    PyErr_Format(PyExc_RuntimeError, "`%s.%s' requires output array `result' to have the same shape as input arrays `output' and `target', but you provided arrays with different shapes", Py_TYPE(self)->tp_name, fname);
+    return 0;
+  }
+
+  /** if ``result`` was not pre-allocated, do it now **/
+  if (!result) {
+    result = (PyBlitzArrayObject*)PyBlitzArray_SimpleNew(NPY_FLOAT64, output->ndim, output->shape);
+    result_ = make_safe(result);
+  }
+
+  switch (output->ndim) {
+    case 1:
+      {
+        blitz::Array<double,1>& output_ =
+          *PyBlitzArrayCxx_AsBlitz<double,1>(output);
+        blitz::Array<double,1>& target_ =
+          *PyBlitzArrayCxx_AsBlitz<double,1>(target);
+        blitz::Array<double,1>& result_ =
+          *PyBlitzArrayCxx_AsBlitz<double,1>(result);
+        for (int k=0; k<output_.extent(0); ++k)
+          result_(k) = function(output_(k), target_(k));
+      }
+      break;
+
+    case 2:
+      {
+        blitz::Array<double,2>& output_ =
+          *PyBlitzArrayCxx_AsBlitz<double,2>(output);
+        blitz::Array<double,2>& target_ =
+          *PyBlitzArrayCxx_AsBlitz<double,2>(target);
+        blitz::Array<double,2>& result_ =
+          *PyBlitzArrayCxx_AsBlitz<double,2>(result);
+        for (int k=0; k<output_.extent(0); ++k)
+          for (int l=0; l<output_.extent(1); ++l)
+            result_(k,l) = function(output_(k,l), target_(k,l));
+      }
+      break;
+
+    case 3:
+      {
+        blitz::Array<double,3>& output_ =
+          *PyBlitzArrayCxx_AsBlitz<double,3>(output);
+        blitz::Array<double,3>& target_ =
+          *PyBlitzArrayCxx_AsBlitz<double,3>(target);
+        blitz::Array<double,3>& result_ =
+          *PyBlitzArrayCxx_AsBlitz<double,3>(result);
+        for (int k=0; k<output_.extent(0); ++k)
+          for (int l=0; l<output_.extent(1); ++l)
+            for (int m=0; m<output_.extent(2); ++m)
+              result_(k,l,m) = function(output_(k,l,m), target_(k,l,m));
+      }
+      break;
+
+    case 4:
+      {
+        blitz::Array<double,4>& output_ =
+          *PyBlitzArrayCxx_AsBlitz<double,4>(output);
+        blitz::Array<double,4>& target_ =
+          *PyBlitzArrayCxx_AsBlitz<double,4>(target);
+        blitz::Array<double,4>& result_ =
+          *PyBlitzArrayCxx_AsBlitz<double,4>(result);
+        for (int k=0; k<output_.extent(0); ++k)
+          for (int l=0; l<output_.extent(1); ++l)
+            for (int m=0; m<output_.extent(2); ++m)
+              for (int n=0; n<output_.extent(3); ++n)
+                result_(k,l,m,n) = function(output_(k,l,m,n), target_(k,l,m,n));
+      }
+      break;
+
+    default:
+      PyErr_Format(PyExc_RuntimeError, "`%s.%s' only accepts 1, 2, 3 or 4-dimensional double arrays (not %" PY_FORMAT_SIZE_T "dD arrays)", Py_TYPE(self)->tp_name, fname, output->ndim);
+    return 0;
+
+  }
+
+  Py_INCREF(result);
+  return PyBlitzArray_NUMPY_WRAP(reinterpret_cast<PyObject*>(result));
+
+}
+
+PyDoc_STRVAR(s_f_str, "f");
+PyDoc_STRVAR(s_f_doc,
+"o.f(output, target, [result]) -> result\n\
+\n\
+Computes the cost, given the current and expected outputs.\n\
+\n\
+Keyword arguments:\n\
+\n\
+output, ND array, float64 | scalar\n\
+  Real output from the machine. May be a N-dimensional array\n\
+or a plain scalar.\n\
+\n\
+target, ND array, float64 | scalar\n\
+  Target output you are training to achieve. The data type\n\
+and extents for this object must match that of ``target``.\n\
+\n\
+result (optional), ND array, float64\n\
+  Where to place the result from the calculation. You can\n\
+pass this argument if the input are N-dimensional arrays.\n\
+Otherwise, it is an error to pass such a container. If the\n\
+inputs are arrays and an object for ``result`` is passed,\n\
+then its dimensions and data-type must match that of both\n\
+``output`` and ``result``.\n\
+\n\
+Returns the cost as a scalar, if the input were scalars or\n\
+as an array with matching size of ``output`` and ``target``\n\
+otherwise.\n\
+");
+
+static PyObject* PyBobLearnCost_f
+(PyBobLearnCostObject* self, PyObject* args, PyObject* kwds) {
+
+  PyObject* arg = 0; ///< borrowed (don't delete)
+  if (PyTuple_Size(args)) arg = PyTuple_GET_ITEM(args, 0);
+  else {
+    PyObject* tmp = PyDict_Values(kwds);
+    auto tmp_ = make_safe(tmp);
+    arg = PyList_GET_ITEM(tmp, 0);
+  }
+
+  if (PyNumber_Check(arg) && !(PyArray_Check(arg) || PyBlitzArray_Check(arg)))
+    return apply_scalar(self, s_f_str,
+        boost::bind(&bob::trainer::Cost::f, self->cxx, _1, _2), args, kwds);
+
+  return apply_array(self, s_f_str,
+      boost::bind(&bob::trainer::Cost::f, self->cxx, _1, _2), args, kwds);
+
+}
+
+PyDoc_STRVAR(s_f_prime_str, "f_prime");
+PyDoc_STRVAR(s_f_prime_doc,
+"o.f_prime(output, target, [result]) -> result\n\
+\n\
+Computes the derivative of the cost w.r.t. output.\n\
+\n\
+Keyword arguments:\n\
+\n\
+output, ND array, float64 | scalar\n\
+  Real output from the machine. May be a N-dimensional array\n\
+or a plain scalar.\n\
+\n\
+target, ND array, float64 | scalar\n\
+  Target output you are training to achieve. The data type\n\
+and extents for this object must match that of ``target``.\n\
+\n\
+result (optional), ND array, float64\n\
+  Where to place the result from the calculation. You can\n\
+pass this argument if the input are N-dimensional arrays.\n\
+Otherwise, it is an error to pass such a container. If the\n\
+inputs are arrays and an object for ``result`` is passed,\n\
+then its dimensions and data-type must match that of both\n\
+``output`` and ``result``.\n\
+\n\
+Returns the cost as a scalar, if the input were scalars or\n\
+as an array with matching size of ``output`` and ``target``\n\
+otherwise.\n\
+");
+
+static PyObject* PyBobLearnCost_f_prime
+(PyBobLearnCostObject* self, PyObject* args, PyObject* kwds) {
+
+  PyObject* arg = 0; ///< borrowed (don't delete)
+  if (PyTuple_Size(args)) arg = PyTuple_GET_ITEM(args, 0);
+  else {
+    PyObject* tmp = PyDict_Values(kwds);
+    auto tmp_ = make_safe(tmp);
+    arg = PyList_GET_ITEM(tmp, 0);
+  }
+
+  if (PyNumber_Check(arg) && !(PyArray_Check(arg) || PyBlitzArray_Check(arg)))
+    return apply_scalar(self, s_f_prime_str,
+        boost::bind(&bob::trainer::Cost::f_prime,
+          self->cxx, _1, _2), args, kwds);
+
+  return apply_array(self, s_f_prime_str,
+      boost::bind(&bob::trainer::Cost::f_prime,
+        self->cxx, _1, _2), args, kwds);
+
+}
+
+PyDoc_STRVAR(s_error_str, "error");
+PyDoc_STRVAR(s_error_doc,
+"o.error(output, target, [result]) -> result\n\
+\n\
+Computes the back-propagated error for a given MLP ``output`` layer.\n\
+\n\
+Computes the back-propagated error for a given MLP ``output`` layer, given its activation function and outputs - i.e., the error back-propagated through the last layer neuron up to the synapse connecting the last hidden layer to the output layer.\n\
+\n\
+This implementation allows for optimization in the calculation of the back-propagated errors in cases where there is a possibility of mathematical simplification when using a certain combination of cost-function and activation. For example, using a ML-cost and a logistic activation function.\n\
+\n\
+Keyword arguments:\n\
+\n\
+output, ND array, float64 | scalar\n\
+  Real output from the machine. May be a N-dimensional array or a plain scalar.\n\
+\n\
+target, ND array, float64 | scalar\n\
+  Target output you are training to achieve. The data type and extents for this object must match that of ``target``.\n\
+\n\
+result (optional), ND array, float64\n\
+  Where to place the result from the calculation. You can pass this argument if the input are N-dimensional arrays. Otherwise, it is an error to pass such a container. If the inputs are arrays and an object for ``result`` is passed, then its dimensions and data-type must match that of both ``output`` and ``result``.\n\
+\n\
+Returns the cost as a scalar, if the input were scalars or as an array with matching size of ``output`` and ``target`` otherwise.\n\
+");
+
+static PyObject* PyBobLearnCost_error
+(PyBobLearnCostObject* self, PyObject* args, PyObject* kwds) {
+
+  PyObject* arg = 0; ///< borrowed (don't delete)
+  if (PyTuple_Size(args)) arg = PyTuple_GET_ITEM(args, 0);
+  else {
+    PyObject* tmp = PyDict_Values(kwds);
+    auto tmp_ = make_safe(tmp);
+    arg = PyList_GET_ITEM(tmp, 0);
+  }
+
+  if (PyNumber_Check(arg) && !(PyArray_Check(arg) || PyBlitzArray_Check(arg)))
+    return apply_scalar(self, s_error_str,
+        boost::bind(&bob::trainer::Cost::error, self->cxx, _1, _2), args, kwds);
+
+  return apply_array(self, s_error_str,
+      boost::bind(&bob::trainer::Cost::error, self->cxx, _1, _2), args, kwds);
+
+}
+
+static PyMethodDef PyBobLearnCost_methods[] = {
+  {
+    s_f_str,
+    (PyCFunction)PyBobLearnCost_f,
+    METH_VARARGS|METH_KEYWORDS,
+    s_f_doc
+  },
+  {
+    s_f_prime_str,
+    (PyCFunction)PyBobLearnCost_f_prime,
+    METH_VARARGS|METH_KEYWORDS,
+    s_f_prime_doc
+  },
+  {
+    s_error_str,
+    (PyCFunction)PyBobLearnCost_error,
+    METH_VARARGS|METH_KEYWORDS,
+    s_error_doc
+  },
+  {0} /* Sentinel */
+};
+
+PyTypeObject PyBobLearnCost_Type = {
+    PyVarObject_HEAD_INIT(0, 0)
+    s_cost_str,                               /* tp_name */
+    sizeof(PyBobLearnCostObject),             /* tp_basicsize */
+    0,                                        /* tp_itemsize */
+    0,                                        /* tp_dealloc */
+    0,                                        /* tp_print */
+    0,                                        /* tp_getattr */
+    0,                                        /* tp_setattr */
+    0,                                        /* tp_compare */
+    (reprfunc)PyBobLearnCost_Repr,            /* tp_repr */
+    0,                                        /* tp_as_number */
+    0,                                        /* tp_as_sequence */
+    0,                                        /* tp_as_mapping */
+    0,                                        /* tp_hash */
+    (ternaryfunc)PyBobLearnCost_f,            /* tp_call */
+    (reprfunc)PyBobLearnCost_Str,             /* tp_str */
+    0,                                        /* tp_getattro */
+    0,                                        /* tp_setattro */
+    0,                                        /* tp_as_buffer */
+    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
+    s_cost_doc,                               /* tp_doc */
+    0,                                        /* tp_traverse */
+    0,                                        /* tp_clear */
+    (richcmpfunc)PyBobLearnCost_RichCompare,  /* tp_richcompare */
+    0,                                        /* tp_weaklistoffset */
+    0,                                        /* tp_iter */
+    0,                                        /* tp_iternext */
+    PyBobLearnCost_methods,                   /* tp_methods */
+    0,                                        /* tp_members */
+    0,                                        /* tp_getset */
+    0,                                        /* tp_base */
+    0,                                        /* tp_dict */
+    0,                                        /* tp_descr_get */
+    0,                                        /* tp_descr_set */
+    0,                                        /* tp_dictoffset */
+    (initproc)PyBobLearnCost_init             /* tp_init */
+};
+
+PyDoc_STRVAR(s_squareerror_str, XBOB_EXT_MODULE_PREFIX ".SquareError");
+
+PyDoc_STRVAR(s_squareerror_doc,
+"SquareError(actfun) -> new SquareError functor\n\
+\n\
+Calculates the Square-Error between output and target.\n\
+\n\
+The square error is defined as follows:\n\
+\n\
+.. math::\n\
+   J = \\frac{(\\hat{y} - y)^2}{2}\n\
+\n\
+where :math:`\\hat{y}` is the output estimated by your machine and\n\
+:math:`y` is the expected output.\n\
+\n\
+Keyword arguments:\n\
+\n\
+actfun\n\
+  The activation function object used at the last layer\n\
+\n\
+");
+
+static int PyBobLearnSquareError_init
+(PyBobLearnSquareErrorObject* self, PyObject* args, PyObject* kwds) {
+
+  /* Parses input arguments in a single shot */
+  static const char* const_kwlist[] = {"actfun", 0};
+  static char** kwlist = const_cast<char**>(const_kwlist);
+
+  PyObject* actfun = 0;
+
+  if (!PyArg_ParseTupleAndKeywords(args, kwds, "O!", kwlist,
+        &PyBobLearnActivation_Type, &actfun)) return -1;
+
+  try {
+    auto _actfun = reinterpret_cast<PyBobLearnActivationObject*>(actfun);
+    self->cxx = new bob::trainer::SquareError(_actfun->cxx);
+  }
+  catch (std::exception& ex) {
+    PyErr_SetString(PyExc_RuntimeError, ex.what());
+  }
+  catch (...) {
+    PyErr_Format(PyExc_RuntimeError, "cannot create new object of type `%s' - unknown exception thrown", Py_TYPE(self)->tp_name);
+  }
+
+  self->parent.cxx = self->cxx;
+
+  if (PyErr_Occurred()) return -1;
+
+  return 0;
+
+}
+
+static void PyBobLearnSquareError_delete
+(PyBobLearnSquareErrorObject* self) {
+
+  self->parent.cxx = 0;
+  delete self->cxx;
+  Py_TYPE(&self->parent)->tp_free((PyObject*)self);
+
+}
+
+PyTypeObject PyBobLearnSquareError_Type = {
+    PyVarObject_HEAD_INIT(0, 0)
+    s_squareerror_str,                        /*tp_name*/
+    sizeof(PyBobLearnSquareErrorObject),      /*tp_basicsize*/
+    0,                                        /*tp_itemsize*/
+    (destructor)PyBobLearnSquareError_delete, /*tp_dealloc*/
+    0,                                        /*tp_print*/
+    0,                                        /*tp_getattr*/
+    0,                                        /*tp_setattr*/
+    0,                                        /*tp_compare*/
+    0,                                        /*tp_repr*/
+    0,                                        /*tp_as_number*/
+    0,                                        /*tp_as_sequence*/
+    0,                                        /*tp_as_mapping*/
+    0,                                        /*tp_hash */
+    0,                                        /*tp_call*/
+    0,                                        /*tp_str*/
+    0,                                        /*tp_getattro*/
+    0,                                        /*tp_setattro*/
+    0,                                        /*tp_as_buffer*/
+    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
+    s_squareerror_doc,                        /* tp_doc */
+    0,		                                    /* tp_traverse */
+    0,		                                    /* tp_clear */
+    0,                                        /* tp_richcompare */
+    0,		                                    /* tp_weaklistoffset */
+    0,		                                    /* tp_iter */
+    0,		                                    /* tp_iternext */
+    0,                                        /* tp_methods */
+    0,                                        /* tp_members */
+    0,                                        /* tp_getset */
+    0,                                        /* tp_base */
+    0,                                        /* tp_dict */
+    0,                                        /* tp_descr_get */
+    0,                                        /* tp_descr_set */
+    0,                                        /* tp_dictoffset */
+    (initproc)PyBobLearnSquareError_init,     /* tp_init */
+};
+
+PyDoc_STRVAR(s_crossentropyloss_str, XBOB_EXT_MODULE_PREFIX ".CrossEntropyLoss");
+
+PyDoc_STRVAR(s_crossentropyloss_doc,
+"CrossEntropyLoss(actfun) -> new CrossEntropyLoss functor\n\
+\n\
+Calculates the Cross Entropy Loss between output and target.\n\
+\n\
+The cross entropy loss is defined as follows:\n\
+\n\
+.. math::\n\
+   J = - y \\cdot \\log{(\\hat{y})} - (1-y) \\log{(1-\\hat{y})}\n\
+\n\
+where :math:`\\hat{y}` is the output estimated by your machine and\n\
+:math:`y` is the expected output.\n\
+\n\
+Keyword arguments:\n\
+\n\
+actfun\n\
+  The activation function object used at the last layer. If you\n\
+  set this to :py:class:`xbob.learn.activation.Logistic`, a\n\
+  mathematical simplification is possible in which\n\
+  ``backprop_error()`` can benefit increasing the numerical\n\
+  stability of the training process. The simplification goes\n\
+  as follows:\n\
+  \n\
+  .. math::\n\
+     b = \\delta \\cdot \\varphi'(z)\n\
+  \n\
+  But, for the cross-entropy loss: \n\
+  \n\
+  .. math::\n\
+     \\delta = \\frac{\\hat{y} - y}{\\hat{y}(1 - \\hat{y})}\n\
+  \n\
+  and :math:`\\varphi'(z) = \\hat{y} - (1 - \\hat{y})`, so:\n\
+  \n\
+  .. math::\n\
+     b = \\hat{y} - y\n\
+\n\
+");
+
+static int PyBobLearnCrossEntropyLoss_init
+(PyBobLearnCrossEntropyLossObject* self, PyObject* args, PyObject* kwds) {
+
+  /* Parses input arguments in a single shot */
+  static const char* const_kwlist[] = {"actfun", 0};
+  static char** kwlist = const_cast<char**>(const_kwlist);
+
+  PyObject* actfun = 0;
+
+  if (!PyArg_ParseTupleAndKeywords(args, kwds, "O!", kwlist,
+        &PyBobLearnActivation_Type, &actfun)) return -1;
+
+  try {
+    auto _actfun = reinterpret_cast<PyBobLearnActivationObject*>(actfun);
+    self->cxx = new bob::trainer::CrossEntropyLoss(_actfun->cxx);
+  }
+  catch (std::exception& ex) {
+    PyErr_SetString(PyExc_RuntimeError, ex.what());
+  }
+  catch (...) {
+    PyErr_Format(PyExc_RuntimeError, "cannot create new object of type `%s' - unknown exception thrown", Py_TYPE(self)->tp_name);
+  }
+
+  self->parent.cxx = self->cxx;
+
+  if (PyErr_Occurred()) return -1;
+
+  return 0;
+
+}
+
+static void PyBobLearnCrossEntropyLoss_delete
+(PyBobLearnCrossEntropyLossObject* self) {
+
+  self->parent.cxx = 0;
+  delete self->cxx;
+  Py_TYPE(&self->parent)->tp_free((PyObject*)self);
+
+}
+
+PyTypeObject PyBobLearnCrossEntropyLoss_Type = {
+    PyVarObject_HEAD_INIT(0, 0)
+    s_crossentropyloss_str,                        /*tp_name*/
+    sizeof(PyBobLearnCrossEntropyLossObject),      /*tp_basicsize*/
+    0,                                             /*tp_itemsize*/
+    (destructor)PyBobLearnCrossEntropyLoss_delete, /*tp_dealloc*/
+    0,                                             /*tp_print*/
+    0,                                             /*tp_getattr*/
+    0,                                             /*tp_setattr*/
+    0,                                             /*tp_compare*/
+    0,                                             /*tp_repr*/
+    0,                                             /*tp_as_number*/
+    0,                                             /*tp_as_sequence*/
+    0,                                             /*tp_as_mapping*/
+    0,                                             /*tp_hash */
+    0,                                             /*tp_call*/
+    0,                                             /*tp_str*/
+    0,                                             /*tp_getattro*/
+    0,                                             /*tp_setattro*/
+    0,                                             /*tp_as_buffer*/
+    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,      /*tp_flags*/
+    s_crossentropyloss_doc,                        /* tp_doc */
+    0,		                                         /* tp_traverse */
+    0,		                                         /* tp_clear */
+    0,                                             /* tp_richcompare */
+    0,		                                         /* tp_weaklistoffset */
+    0,		                                         /* tp_iter */
+    0,		                                         /* tp_iternext */
+    0,                                             /* tp_methods */
+    0,                                             /* tp_members */
+    0,                                             /* tp_getset */
+    0,                                             /* tp_base */
+    0,                                             /* tp_dict */
+    0,                                             /* tp_descr_get */
+    0,                                             /* tp_descr_set */
+    0,                                             /* tp_dictoffset */
+    (initproc)PyBobLearnCrossEntropyLoss_init,     /* tp_init */
+};
diff --git a/xbob/learn/mlp/include/xbob.learn.mlp/api.h b/xbob/learn/mlp/include/xbob.learn.mlp/api.h
index 189c06f7cb7cde4bf63876f5e0b8ee8fa941f38d..c23e5820865ebd96a0ec9796ef28c4acb1aeafbd 100644
--- a/xbob/learn/mlp/include/xbob.learn.mlp/api.h
+++ b/xbob/learn/mlp/include/xbob.learn.mlp/api.h
@@ -11,6 +11,9 @@
 #include <Python.h>
 #include <xbob.learn.mlp/config.h>
 #include <bob/machine/MLP.h>
+#include <bob/trainer/Cost.h>
+#include <bob/trainer/SquareError.h>
+#include <bob/trainer/CrossEntropyLoss.h>
 
 #define XBOB_LEARN_MLP_MODULE_PREFIX xbob.learn.mlp
 #define XBOB_LEARN_MLP_MODULE_NAME _library
@@ -30,9 +33,7 @@ enum _PyBobLearnMLP_ENUM{
   PyBobLearnCost_Type_NUM,
   PyBobLearnCost_Check_NUM,
   PyBobLearnSquareError_Type_NUM,
-  PyBobLearnSquareError_Check_NUM,
   PyBobLearnCrossEntropyLoss_Type_NUM,
-  PyBobLearnCrossEntropyLoss_Check_NUM,
   // Total number of C API pointers
   PyXbobLearnMLP_API_pointers
 };
@@ -77,9 +78,6 @@ typedef struct {
 
 #define PyBobLearnSquareError_Type_TYPE PyTypeObject
 
-#define PyBobLearnSquareError_Check_RET int
-#define PyBobLearnSquareError_Check_PROTO (PyObject* o)
-
 typedef struct {
   PyBobLearnCostObject parent;
   bob::trainer::CrossEntropyLoss* cxx;
@@ -87,9 +85,6 @@ typedef struct {
 
 #define PyBobLearnCrossEntropyLoss_Type_TYPE PyTypeObject
 
-#define PyBobLearnCrossEntropyLoss_Check_RET int
-#define PyBobLearnCrossEntropyLoss_Check_PROTO (PyObject* o)
-
 #ifdef XBOB_LEARN_MLP_MODULE
 
   /* This section is used when compiling `xbob.learn.mlp' itself */
@@ -120,12 +115,8 @@ typedef struct {
 
   extern PyBobLearnSquareError_Type_TYPE PyBobLearnSquareError_Type;
 
-  PyBobLearnSquareError_Check_RET PyBobLearnSquareError_Check PyBobLearnSquareError_Check_PROTO;
-
   extern PyBobLearnCrossEntropyLoss_Type_TYPE PyBobLearnCrossEntropyLoss_Type;
 
-  PyBobLearnCrossEntropyLoss_Check_RET PyBobLearnCrossEntropyLoss_Check PyBobLearnCrossEntropyLoss_Check_PROTO;
-
 #else
 
   /* This section is used in modules that use `xbob.learn.mlp's' C-API */
@@ -178,12 +169,8 @@ typedef struct {
 
 # define PyBobLearnSquareError_Type (*(PyBobLearnSquareError_Type_TYPE *)PyXbobLearnMLP_API[PyBobLearnSquareError_Type_NUM])
 
-# define PyBobLearnSquareError_Check (*(PyBobLearnSquareError_Check_RET (*)PyBobLearnSquareError_Check_PROTO) PyXbobLearnMLP_API[PyBobLearnSquareError_Check_NUM])
-
 # define PyBobLearnCrossEntropyLoss_Type (*(PyBobLearnCrossEntropyLoss_Type_TYPE *)PyXbobLearnMLP_API[PyBobLearnCrossEntropyLoss_Type_NUM])
 
-# define PyBobLearnCrossEntropyLoss_Check (*(PyBobLearnCrossEntropyLoss_Check_RET (*)PyBobLearnCrossEntropyLoss_Check_PROTO) PyXbobLearnMLP_API[PyBobLearnCrossEntropyLoss_Check_NUM])
-
 # if !defined(NO_IMPORT_ARRAY)
 
   /**
diff --git a/xbob/learn/mlp/main.cpp b/xbob/learn/mlp/main.cpp
index e7d647f1aed990239897f68fd3500539d2dde96a..9ffeee989428760369b44a0a91c746bea98fc5df 100644
--- a/xbob/learn/mlp/main.cpp
+++ b/xbob/learn/mlp/main.cpp
@@ -47,7 +47,7 @@ static PyObject* create_module (void) {
   PyBobLearnSquareError_Type.tp_base = &PyBobLearnCost_Type;
   if (PyType_Ready(&PyBobLearnSquareError_Type) < 0) return 0;
 
-  PyBobLearnCrossEntropyLoss_Type.tp_new = &PyBobLearnCost_Type;
+  PyBobLearnCrossEntropyLoss_Type.tp_base = &PyBobLearnCost_Type;
   if (PyType_Ready(&PyBobLearnCrossEntropyLoss_Type) < 0) return 0;
 
 # if PY_VERSION_HEX >= 0x03000000
@@ -105,12 +105,8 @@ static PyObject* create_module (void) {
 
   PyXbobLearnMLP_API[PyBobLearnSquareError_Type_NUM] = (void *)&PyBobLearnSquareError_Type;
 
-  PyXbobLearnMLP_API[PyBobLearnSquareError_Check_NUM] = (void *)&PyBobLearnSquareError_Check;
-
   PyXbobLearnMLP_API[PyBobLearnCrossEntropyLoss_Type_NUM] = (void *)&PyBobLearnCrossEntropyLoss_Type;
 
-  PyXbobLearnMLP_API[PyBobLearnCrossEntropyLoss_Check_NUM] = (void *)&PyBobLearnCrossEntropyLoss_Check;
-
 #if PY_VERSION_HEX >= 0x02070000
 
   /* defines the PyCapsule */
diff --git a/xbob/learn/mlp/test_cost.py b/xbob/learn/mlp/test_cost.py
index 6d151d0eac96809821a9676ceb70b1e21b3a8388..a6c96372fd04a7a13b2066a0ed267f78fddb89af 100644
--- a/xbob/learn/mlp/test_cost.py
+++ b/xbob/learn/mlp/test_cost.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 # vim: set fileencoding=utf-8 :
 # Andre Anjos <andre.dos.anjos@gmail.com>
-# Fri  7 Jun 08:59:24 2013 
+# Fri  7 Jun 08:59:24 2013
 
 """Test cost functions
 """
@@ -9,9 +9,10 @@
 import numpy
 import math
 
-from .. import SquareError, CrossEntropyLoss
-from ...machine import LogisticActivation, IdentityActivation
-from . import gradient
+from . import SquareError, CrossEntropyLoss
+from .test_utils import estimate_gradient
+
+from xbob.learn.activation import Logistic, Identity
 
 def is_close(x, y, eps=1e-10):
   return (abs(x - y) < eps)
@@ -32,8 +33,8 @@ def rand_safe4(n, p, q, r, eps=2e-4):
   return numpy.random.rand(n,p,q,r)*(1-2*eps)+eps
 
 def test_square_error():
-  
-  op = SquareError(IdentityActivation())
+
+  op = SquareError(Identity())
   x = rand_safe(10) #10 random numbers between 0 and 1
   y = rand_safe(10) #10 random numbers between 0 and 1
 
@@ -43,8 +44,8 @@ def test_square_error():
     assert is_close(op.f(p,q), expected), 'SquareError does not perform as expected %g != %g' % (op.f(p,q), expected)
 
 def test_square_error_derivative():
-  
-  op = SquareError(IdentityActivation())
+
+  op = SquareError(Identity())
   x = rand_safe(10) #10 random numbers between 0 and 1
   y = rand_safe(10) #10 random numbers between 0 and 1
 
@@ -55,12 +56,12 @@ def test_square_error_derivative():
 
   # go for approximation
   for p,q in zip(x,y):
-    absdiff = abs(op.f_prime(p,q)-gradient.estimate(op.f,p,args=(q,)))
-    assert absdiff < 1e-4, 'SquareError derivative and estimation do not match to 10^-4: |%g-%g| = %g' % (op.f_prime(p,q), gradient.estimate(op.f,p,args=(q,)), absdiff)
+    absdiff = abs(op.f_prime(p,q)-estimate_gradient(op.f,p,args=(q,)))
+    assert absdiff < 1e-4, 'SquareError derivative and estimation do not match to 10^-4: |%g-%g| = %g' % (op.f_prime(p,q), estimate_gradient(op.f,p,args=(q,)), absdiff)
 
 def test_square_error_error():
-  
-  act = LogisticActivation()
+
+  act = Logistic()
   op = SquareError(act)
   x = rand_safe(10) #10 random numbers between 0 and 1
   y = rand_safe(10) #10 random numbers between 0 and 1
@@ -71,8 +72,8 @@ def test_square_error_error():
     assert is_close(op.error(p,q), expected), 'SquareError error does not perform as expected %g != %g' % (op.error(p,q), expected)
 
 def test_cross_entropy():
-  
-  op = CrossEntropyLoss(LogisticActivation())
+
+  op = CrossEntropyLoss(Logistic())
   x = rand_safe(10) #10 random numbers between 0 and 1
   y = rand_safe(10) #10 random numbers between 0 and 1
 
@@ -82,8 +83,8 @@ def test_cross_entropy():
     assert is_close(op.f(p,q), expected), 'CrossEntropyLoss does not perform as expected %g != %g' % (op.f(p,q), expected)
 
 def test_cross_entropy_derivative():
-  
-  op = CrossEntropyLoss(LogisticActivation())
+
+  op = CrossEntropyLoss(Logistic())
   x = rand_safe(10, eps=0.2) #10 random numbers between 0 and 1
   y = rand_safe(10, eps=0.2) #10 random numbers between 0 and 1
 
@@ -94,26 +95,26 @@ def test_cross_entropy_derivative():
 
   # go for approximation
   for p,q in zip(x,y):
-    reldiff = abs((op.f_prime(p,q)-gradient.estimate(op.f,p,args=(q,))) / op.f_prime(p,q))
-    assert reldiff < 1e-3, 'SquareError derivative and estimation do not match to 10^-4: |%g-%g| = %g' % (op.f_prime(p,q), gradient.estimate(op.f,p,args=(q,)), reldiff)
+    reldiff = abs((op.f_prime(p,q)-estimate_gradient(op.f,p,args=(q,))) / op.f_prime(p,q))
+    assert reldiff < 1e-3, 'SquareError derivative and estimation do not match to 10^-4: |%g-%g| = %g' % (op.f_prime(p,q), estimate_gradient(op.f,p,args=(q,)), reldiff)
 
 def test_square_error_equality():
 
-  op1 = SquareError(IdentityActivation())
-  op2 = SquareError(IdentityActivation())
+  op1 = SquareError(Identity())
+  op2 = SquareError(Identity())
 
   assert op1 == op2
 
 def test_cross_entropy_equality():
 
-  op1 = CrossEntropyLoss(IdentityActivation())
-  op2 = CrossEntropyLoss(IdentityActivation())
+  op1 = CrossEntropyLoss(Identity())
+  op2 = CrossEntropyLoss(Identity())
 
   assert op1 == op2
 
 def test_cross_entropy_error_with_logistic():
-  
-  act = LogisticActivation()
+
+  act = Logistic()
   op = CrossEntropyLoss(act)
   x = rand_safe(10) #10 random numbers between 0 and 1
   y = rand_safe(10) #10 random numbers between 0 and 1
@@ -121,11 +122,11 @@ def test_cross_entropy_error_with_logistic():
   # go for an exact match
   for p,q in zip(x,y):
     expected = p-q
-    assert is_close(op.error(p,q), expected), 'CrossEntropyLoss+LogisticActivation error does not perform as expected %g != %g' % (op.error(p,q), expected)
+    assert is_close(op.error(p,q), expected), 'CrossEntropyLoss+Logistic error does not perform as expected %g != %g' % (op.error(p,q), expected)
 
 def test_cross_entropy_error_without_logistic():
-  
-  act = IdentityActivation()
+
+  act = Identity()
   op = CrossEntropyLoss(act)
   x = rand_safe(10) #10 random numbers between 0 and 1
   y = rand_safe(10) #10 random numbers between 0 and 1
@@ -133,20 +134,20 @@ def test_cross_entropy_error_without_logistic():
   # go for an exact match
   for p,q in zip(x,y):
     expected = (p-q)/(p*(1-p))
-    assert is_close(op.error(p,q), expected), 'CrossEntropyLoss+IdentityActivation error does not perform as expected %g != %g' % (op.error(p,q), expected)
+    assert is_close(op.error(p,q), expected), 'CrossEntropyLoss+Identity error does not perform as expected %g != %g' % (op.error(p,q), expected)
 
 def test_cross_entropy_activation_detection():
 
-  op = CrossEntropyLoss(LogisticActivation())
+  op = CrossEntropyLoss(Logistic())
   assert op.logistic_activation
 
-  op = CrossEntropyLoss(IdentityActivation())
+  op = CrossEntropyLoss(Identity())
   assert op.logistic_activation == False
 
 def test_1d_ndarray():
 
   C = rand_safe0()
-  op = SquareError(IdentityActivation())
+  op = SquareError(Identity())
   O = rand_safe(10) #10 random numbers between 0 and 1
   T = rand_safe(10) #10 random numbers between 0 and 1
 
@@ -175,7 +176,7 @@ def test_1d_ndarray():
 def test_2d_ndarray():
 
   C = rand_safe0()
-  op = SquareError(IdentityActivation())
+  op = SquareError(Identity())
   O = rand_safe2(3,3)
   T = rand_safe2(3,3)
 
@@ -204,7 +205,7 @@ def test_2d_ndarray():
 def test_3d_ndarray():
 
   C = rand_safe0()
-  op = SquareError(IdentityActivation())
+  op = SquareError(Identity())
   O = rand_safe3(3,3,3)
   T = rand_safe3(3,3,3)
 
@@ -233,7 +234,7 @@ def test_3d_ndarray():
 def test_4d_ndarray():
 
   C = rand_safe0()
-  op = SquareError(IdentityActivation())
+  op = SquareError(Identity())
   O = rand_safe4(2,2,2,2)
   T = rand_safe4(2,2,2,2)
 
diff --git a/xbob/learn/mlp/test_utils.py b/xbob/learn/mlp/test_utils.py
index 4286c5edbdfb4461deaa861e63abef4ad5a1a3f5..3a8e7e5c5d33c77bc094ea49942b8ddbad284a90 100644
--- a/xbob/learn/mlp/test_utils.py
+++ b/xbob/learn/mlp/test_utils.py
@@ -173,7 +173,7 @@ class TrainableMachine(Machine):
 
     return retval
 
-def estimate(f, x, epsilon=1e-4, args=()):
+def estimate_gradient(f, x, epsilon=1e-4, args=()):
   """Estimate the gradient for a given callable f
 
   Suppose you have a function :math:`f'(x)` that purportedly computes
@@ -243,7 +243,7 @@ def estimate(f, x, epsilon=1e-4, args=()):
   else: # x is scalar
     return (f(x+epsilon, *args) - f(x-epsilon, *args)) / (2*epsilon)
 
-def estimate_for_machine(machine, X, cost, target):
+def estimate_gradient_for_machine(machine, X, cost, target):
 
   def func(weights):
     old = machine.weights