diff --git a/setup.py b/setup.py index 5cbc90eb57b1c0d13025f075053b47be44f7c0d0..f73037f97fa171e948d8d8dfe2aee3da51ce3240 100644 --- a/setup.py +++ b/setup.py @@ -54,9 +54,10 @@ setup( [ "xbob/machine/activation.cpp", "xbob/machine/identity_activation.cpp", - "xbob/machine/tanh_activation.cpp", - "xbob/machine/logistic_activation.cpp", "xbob/machine/linear_activation.cpp", + "xbob/machine/logistic_activation.cpp", + "xbob/machine/tanh_activation.cpp", + "xbob/machine/mult_tanh_activation.cpp", "xbob/machine/main.cpp", ], packages = packages, diff --git a/xbob/machine/mult_tanh_activation.cpp b/xbob/machine/mult_tanh_activation.cpp new file mode 100644 index 0000000000000000000000000000000000000000..92e67a59a9889cae906f1c1eba9d5df9136b92a9 --- /dev/null +++ b/xbob/machine/mult_tanh_activation.cpp @@ -0,0 +1,160 @@ +/** + * @author Andre Anjos <andre.anjos@idiap.ch> + * @date Mon 13 Jan 2014 17:25:32 CET + * + * @brief Implementation of the MultipliedHyperbolicTangent Activation function + */ + +#include <xbob.machine/api.h> + +PyDoc_STRVAR(s_multtanhactivation_str, + XBOB_EXT_MODULE_PREFIX ".MultipliedHyperbolicTangentActivation"); + +PyDoc_STRVAR(s_multtanhactivation_doc, +"MultipliedHyperbolicTangentActivation([C=1.0, [M=1.0]]) -> new MultipliedHyperbolicTangentActivation\n\ +\n\ +Computes :math:`f(z) = C \\cdot \\tanh(Mz)` as activation\n\ +function.\n\ +\n\ +Builds a new hyperbolic tangent activation function with a given\n\ +constant for the inner and outter products. Don't use this if you\n\ +just want to set the constants to the default values (1.0). In\n\ +such a case, prefer to use the more efficient\n\ +:py:class:`bob.machine.HyperbolicTangentActivation`.\n\ +"); + +typedef struct { + PyBobMachineActivationObject parent; + + /* Type-specific fields go here. */ + bob::machine::MultipliedHyperbolicTangentActivation* base; + +} PyBobMachineMultipliedHyperbolicTangentActivationObject; + +static int PyBobMachineMultipliedHyperbolicTangentActivation_init +(PyBobMachineMultipliedHyperbolicTangentActivationObject* self, PyObject* args, PyObject* kwds) { + + /* Parses input arguments in a single shot */ + static const char* const_kwlist[] = {0}; + static char** kwlist = const_cast<char**>(const_kwlist); + + double C = 1.0; + double M = 1.0; + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "|dd", kwlist, &C, &M)) + return -1; + + try { + self->base = new bob::machine::MultipliedHyperbolicTangentActivation(C, M); + } + catch (std::exception& ex) { + PyErr_SetString(PyExc_RuntimeError, ex.what()); + } + catch (...) { + PyErr_Format(PyExc_RuntimeError, "cannot create new object of type `%s' - unknown exception thrown", s_multtanhactivation_str); + } + + self->parent.base = self->base; + + if (PyErr_Occurred()) return -1; + + return 0; + +} + +static void PyBobMachineMultipliedHyperbolicTangentActivation_delete +(PyBobMachineMultipliedHyperbolicTangentActivationObject* self) { + + delete self->base; + self->parent.base = 0; + self->base = 0; + self->parent.ob_type->tp_free((PyObject*)self); + +} + +PyDoc_STRVAR(s_C_str, "C"); +PyDoc_STRVAR(s_C_doc, +"The outter multiplication factor for the multiplied hyperbolic\n\ +tangent function (read-only).\n\ +"); + +static PyObject* PyBobMachineMultipliedHyperbolicTangentActivation_C +(PyBobMachineMultipliedHyperbolicTangentActivationObject* self) { + + return Py_BuildValue("d", self->base->C()); + +} + +PyDoc_STRVAR(s_M_str, "M"); +PyDoc_STRVAR(s_M_doc, +"The inner multiplication factor for the multiplied hyperbolic\n\ +tangent function (read-only).\n\ +" +); + +static PyObject* PyBobMachineMultipliedHyperbolicTangentActivation_M +(PyBobMachineMultipliedHyperbolicTangentActivationObject* self) { + + return Py_BuildValue("d", self->base->M()); + +} + +static PyGetSetDef PyBobMachineMultipliedHyperbolicTangentActivation_getseters[] = { + { + s_C_str, + (getter)PyBobMachineMultipliedHyperbolicTangentActivation_C, + 0, + s_C_doc, + 0 + }, + { + s_M_str, + (getter)PyBobMachineMultipliedHyperbolicTangentActivation_M, + 0, + s_M_doc, + 0 + }, + {0} /* Sentinel */ +}; + +PyTypeObject PyBobMachineMultipliedHyperbolicTangentActivation_Type = { + PyObject_HEAD_INIT(0) + 0, /*ob_size*/ + 0, /*tp_name*/ + sizeof(PyBobMachineMultipliedHyperbolicTangentActivationObject), /*tp_basicsize*/ + 0, /*tp_itemsize*/ + (destructor)PyBobMachineMultipliedHyperbolicTangentActivation_delete, /*tp_dealloc*/ + 0, /*tp_print*/ + 0, /*tp_getattr*/ + 0, /*tp_setattr*/ + 0, /*tp_compare*/ + 0, /*tp_repr*/ + 0, /*tp_as_number*/ + 0, /*tp_as_sequence*/ + 0, /*tp_as_mapping*/ + 0, /*tp_hash */ + 0, /*tp_call*/ + 0, /*tp_str*/ + 0, /*tp_getattro*/ + 0, /*tp_setattro*/ + 0, /*tp_as_buffer*/ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/ + s_multtanhactivation_doc, /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + 0, /* tp_iter */ + 0, /* tp_iternext */ + 0, /* tp_methods */ + 0, /* tp_members */ + PyBobMachineMultipliedHyperbolicTangentActivation_getseters, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + (initproc)PyBobMachineMultipliedHyperbolicTangentActivation_init, /* tp_init */ + 0, /* tp_alloc */ + 0, /* tp_new */ +}; diff --git a/xbob/machine/test_activation.py b/xbob/machine/test_activation.py index 788e598f0369554279496ccd96774b1ab1f01df4..af0b7731ab4ce10abf2a3d7e02af6459a028b1e2 100644 --- a/xbob/machine/test_activation.py +++ b/xbob/machine/test_activation.py @@ -1,7 +1,7 @@ #!/usr/bin/env python # vim: set fileencoding=utf-8 : # Andre Anjos <andre.dos.anjos@gmail.com> -# Sun 2 Jun 16:42:52 2013 +# Sun 2 Jun 16:42:52 2013 """Test activation functions """ @@ -9,19 +9,19 @@ import numpy import math -from .. import IdentityActivation, \ +from . import IdentityActivation, \ LinearActivation, \ HyperbolicTangentActivation, \ MultipliedHyperbolicTangentActivation, \ LogisticActivation -from ...trainer.test import gradient +from .test_utils import estimate_gradient def is_close(x, y, eps=1e-10): return (abs(x - y) < eps) def test_identity(): - + op = IdentityActivation() x = numpy.random.rand(10) #10 random numbers between 0 and 1 @@ -30,7 +30,7 @@ def test_identity(): assert is_close(op.f(k), k), 'IdentityActivation does not perform identity %g != %g' % (op.f(k), k) def test_identity_derivative(): - + op = IdentityActivation() x = numpy.random.rand(10) #10 random numbers between 0 and 1 @@ -40,11 +40,11 @@ def test_identity_derivative(): # tries to estimate the gradient and check for k in x: - absdiff = abs(op.f_prime(k)-gradient.estimate(op.f,k)) - assert absdiff < 1e-4, 'IdentityActivation derivative and estimation do not match to 10^-4: |%g-%g| = %g' % (op.f_prime(k), gradient.estimate(op.f,k), absdiff) + absdiff = abs(op.f_prime(k)-estimate_gradient(op.f,k)) + assert absdiff < 1e-4, 'IdentityActivation derivative and estimation do not match to 10^-4: |%g-%g| = %g' % (op.f_prime(k), estimate_gradient(op.f,k), absdiff) def test_linear(): - + C = numpy.random.rand() op = LinearActivation(C) x = numpy.random.rand(10) #10 random numbers between 0 and 1 @@ -54,7 +54,7 @@ def test_linear(): assert is_close(op.f(k), (C*k)), 'LinearActivation does not match expected value: %g != %g' % (op.f(k), C*k) def test_linear_derivative(): - + C = numpy.random.rand() op = LinearActivation(C) x = numpy.random.rand(10) #10 random numbers between 0 and 1 @@ -62,14 +62,14 @@ def test_linear_derivative(): # go for an exact match for k in x: assert is_close(op.f_prime(k), C), 'LinearActivation derivative does not match expected value: %g != %g' % (op.f_prime(k), k) - + # tries to estimate the gradient and check for k in x: - absdiff = abs(op.f_prime(k)-gradient.estimate(op.f,k)) - assert absdiff < 1e-4, 'IdentityActivation derivative and estimation do not match to 10^-4: |%g-%g| = %g' % (op.f_prime(k), gradient.estimate(op.f,k), absdiff) + absdiff = abs(op.f_prime(k)-estimate_gradient(op.f,k)) + assert absdiff < 1e-4, 'IdentityActivation derivative and estimation do not match to 10^-4: |%g-%g| = %g' % (op.f_prime(k), estimate_gradient(op.f,k), absdiff) def test_hyperbolic_tangent(): - + op = HyperbolicTangentActivation() x = numpy.random.rand(10) #10 random numbers between 0 and 1 @@ -78,7 +78,7 @@ def test_hyperbolic_tangent(): assert is_close(op.f(k), math.tanh(k)), 'HyperbolicTangentActivation does not match expected value: %g != %g' % (op.f(k), math.tanh(k)) def test_hyperbolic_tangent_derivative(): - + op = HyperbolicTangentActivation() x = numpy.random.rand(10) #10 random numbers between 0 and 1 @@ -86,14 +86,14 @@ def test_hyperbolic_tangent_derivative(): for k in x: precise = 1 - op.f(k)**2 assert is_close(op.f_prime(k), precise), 'HyperbolicTangentActivation derivative does not match expected value: %g != %g' % (op.f_prime(k), precise) - + # tries to estimate the gradient and check for k in x: - absdiff = abs(op.f_prime(k)-gradient.estimate(op.f,k)) - assert absdiff < 1e-4, 'HyperbolicTangentActivation derivative and estimation do not match to 10^-4: |%g-%g| = %g' % (op.f_prime(k), gradient.estimate(op.f,k), absdiff) + absdiff = abs(op.f_prime(k)-estimate_gradient(op.f,k)) + assert absdiff < 1e-4, 'HyperbolicTangentActivation derivative and estimation do not match to 10^-4: |%g-%g| = %g' % (op.f_prime(k), estimate_gradient(op.f,k), absdiff) def test_logistic(): - + op = LogisticActivation() x = numpy.random.rand(10) #10 random numbers between 0 and 1 @@ -103,7 +103,7 @@ def test_logistic(): assert is_close(op.f(k), precise), 'LogisticActivation does not match expected value: %g != %g' % (op.f(k), precise) def test_logistic_derivative(): - + op = LogisticActivation() x = numpy.random.rand(10) #10 random numbers between 0 and 1 @@ -111,11 +111,11 @@ def test_logistic_derivative(): for k in x: precise = op.f(k) * (1 - op.f(k)) assert is_close(op.f_prime(k), precise), 'LogisticActivation derivative does not match expected value: %g != %g' % (op.f_prime(k), precise) - + # tries to estimate the gradient and check for k in x: - absdiff = abs(op.f_prime(k)-gradient.estimate(op.f,k)) - assert absdiff < 1e-4, 'LogisticActivation derivative and estimation do not match to 10^-4: |%g-%g| = %g' % (op.f_prime(k), gradient.estimate(op.f,k), absdiff) + absdiff = abs(op.f_prime(k)-estimate_gradient(op.f,k)) + assert absdiff < 1e-4, 'LogisticActivation derivative and estimation do not match to 10^-4: |%g-%g| = %g' % (op.f_prime(k), estimate_gradient(op.f,k), absdiff) def test_multiplied_tanh(): @@ -129,7 +129,7 @@ def test_multiplied_tanh(): assert is_close(op.f(k), C*math.tanh(M*k)), 'MultipliedHyperbolicTangentActivation does not match expected value: %g != %g' % (op.f(k), C*math.tanh(M*k)) def test_multiplied_tanh_derivative(): - + C = numpy.random.rand() M = numpy.random.rand() op = MultipliedHyperbolicTangentActivation(C, M) @@ -139,11 +139,11 @@ def test_multiplied_tanh_derivative(): for k in x: precise = C*M*(1-math.pow(math.tanh(M*k),2)) assert is_close(op.f_prime(k),precise), 'MultipliedHyperbolicTangentActivation derivative does not match expected value: %g != %g' % (op.f_prime(k), precise) - + # tries to estimate the gradient and check for k in x: - absdiff = abs(op.f_prime(k)-gradient.estimate(op.f,k)) - assert absdiff < 1e-4, 'MultipliedHyperbolicTangentActivation derivative and estimation do not match to 10^-4: |%g-%g| = %g' % (op.f_prime(k), gradient.estimate(op.f,k), absdiff) + absdiff = abs(op.f_prime(k)-estimate_gradient(op.f,k)) + assert absdiff < 1e-4, 'MultipliedHyperbolicTangentActivation derivative and estimation do not match to 10^-4: |%g-%g| = %g' % (op.f_prime(k), estimate_gradient(op.f,k), absdiff) def test_1d_ndarray(): diff --git a/xbob/machine/test_utils.py b/xbob/machine/test_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..2a915559818f1e28a19fd8276e93448c28168369 --- /dev/null +++ b/xbob/machine/test_utils.py @@ -0,0 +1,94 @@ +#!/usr/bin/env python +# vim: set fileencoding=utf-8 : +# Andre Anjos <andre.dos.anjos@gmail.com> +# Sun 2 Jun 16:09:29 2013 + +"""Utilities for checking gradients. +""" + +import numpy + +def estimate_gradient(f, x, epsilon=1e-4, args=()): + """Estimate the gradient for a given callable f + + Suppose you have a function :math:`f'(x)` that purportedly computes + :math`\frac{\partial f(x)}{\partial x}`. You'd like to check if :math:`f'(x)` + is outputting correct derivative values. You can then use this function to + estimate the gradient around a point and compare it to the output of + :math:`f'(x)`. The estimation can have a precision of up to a few decimal + houses. + + Imagine a random value for :math:`x`, called :math:`x_t` (for test). Now + imagine you modify one of the elements in :math:`x_t` so that + :math:`x_{t+\epsilon}` has that element added with a small (positive) value + :math:`\epsilon` and :math:`x_{t-\epsilon}` has the same value subtracted. + + In this case, one can use a truncated Taylor expansion of the derivative + to calculate the approximate supposed value: + + .. math:: + f'(x_t) \sim \frac{f(x_{t+\epsilon}) - f(x_{t-\epsilon})}{2\epsilon} + + The degree to which these two values should approximate each other will + depend on the details of :math:`f(x)`. But assuming :math:`\epsilon = + 10^{-4}`, you’ll usually find that the left- and right-hand sides of the + above will agree to at least 4 significant digits (and often many more). + + Keyword arguments: + + f + The function which you'd like to have the gradient estimated for. + + x + The input to ``f``. This must be the first parameter ``f`` receives. If + that is not the case, you must write a wrapper around ``f`` so it does the + parameter inversion and provide that wrapper instead. + + If f expects a multi-dimensional array, than this entry should be a + :py:class:`numpy.ndarray` with as many dimensions as required for f. + + precision + The epsilon step + + args (optional) + Extra arguments (a tuple) to ``f`` + + This function returns the estimated value for :math:`f'(x)` given ``x``. + + .. note:: + + Gradient estimation is a powerful tool for testing if a function is + correctly computing the derivative of another function, but can be quite + slow. It therefore is not a good replacement for writing specific code that + can compute the derivative of ``f``. + """ + epsilon = 1e-4 + + if isinstance(x, numpy.ndarray): + + retval = numpy.ndarray(x.shape, dtype=x.dtype) + for k in range(x.size): + xt_plus = x.copy() + xt_plus.flat[k] += epsilon + xt_minus = x.copy() + xt_minus.flat[k] -= epsilon + retval.flat[k] = (f(xt_plus,*args) - f(xt_minus,*args)) / (2*epsilon) + return retval + + else: # x is scalar + return (f(x+epsilon, *args) - f(x-epsilon, *args)) / (2*epsilon) + +def estimate_gradient_for_machine(machine, X, cost, target): + + def func(weights): + old = machine.weights + machine.weights = machine.roll(weights) + retval = cost.f(machine.forward(X), target).mean(axis=0).sum() + machine.weights = old + return retval + + weights = machine.unroll() + est = estimate(func, weights) + machine.weights = machine.roll(weights) + + return machine.roll(est) #format using the machine's organization