cross_entropy.h 3.99 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
/**
 * @author Andre Anjos <andre.anjos@idiap.ch>
 * @date Fri 31 May 15:08:46 2013
 *
 * @brief Implements the Cross Entropy Loss function
 *
 * Copyright (C) 2011-2014 Idiap Research Institute, Martigny, Switzerland
 */

#ifndef BOB_LEARN_MLP_CROSSENTROPYLOSS_H
#define BOB_LEARN_MLP_CROSSENTROPYLOSS_H

13
14
#include <bob.learn.mlp/cost.h>
#include <bob.learn.activation/Activation.h>
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58

namespace bob { namespace learn { namespace mlp {

  /**
   * Calculates the Cross-Entropy Loss between output and target. The cross
   * entropy loss is defined as follows:
   *
   * \f[
   *    J = - y \cdot \log{(\hat{y})} - (1-y) \log{(1-\hat{y})}
   * \f]
   *
   * where \f$\hat{y}\f$ is the output estimated by your machine and \f$y\f$ is
   * the expected output.
   */
  class CrossEntropyLoss: public Cost {

    public:

      /**
       * Constructor
       *
       * @param actfun Sets the underlying activation function used for error
       * calculation. A special case is foreseen for using this loss function
       * with a logistic activation. In this case, a mathematical
       * simplification is possible in which error() can benefit increasing the
       * numerical stability of the training process. The simplification goes
       * as follows:
       *
       * \f[
       *    b = \delta \cdot \varphi'(z)
       * \f]
       *
       * But, for the CrossEntropyLoss:
       *
       * \f[
       *    \delta = \frac{\hat{y} - y}{\hat{y}(1 - \hat{y}}
       * \f]
       *
       * and \f$\varphi'(z) = \hat{y} - (1 - \hat{y})\f$, so:
       *
       * \f[
       *    b = \hat{y} - y
       * \f]
       */
59
      CrossEntropyLoss(boost::shared_ptr<bob::learn::activation::Activation> actfun);
60
61
62
63
64
65
66
67

      /**
       * Virtualized destructor
       */
      virtual ~CrossEntropyLoss();

      /**
       * Tells if this CrossEntropyLoss is set to operate together with a
68
       * bob::learn::activation::LogisticActivation.
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
       */
      bool logistic_activation() const { return m_logistic_activation; }

      /**
       * Computes cost, given the current output of the linear machine or MLP
       * and the expected output.
       *
       * @param output Real output from the linear machine or MLP
       *
       * @param target Target output you are training to achieve
       *
       * @return The cost
       */
      virtual double f (double output, double target) const;

      /**
       * Computes the derivative of the cost w.r.t. output.
       *
       * @param output Real output from the linear machine or MLP
       *
       * @param target Target output you are training to achieve
       *
       * @return The calculated error
       */
      virtual double f_prime (double output, double target) const;

      /**
       * Computes the back-propagated errors for a given MLP <b>output</b>
       * layer, given its activation function and activation values - i.e., the
       * error back-propagated through the last layer neurons up to the
       * synapses connecting the last hidden layer to the output layer.
       *
       * This entry point allows for optimization in the calculation of the
       * back-propagated errors in cases where there is a possibility of
       * mathematical simplification when using a certain combination of
       * cost-function and activation. For example, using a ML-cost and a
       * logistic activation function.
       *
       * @param output Real output from the linear machine or MLP
       * @param target Target output you are training to achieve
       *
       * @return The calculated error, backpropagated to before the output
       * neuron.
       */
      virtual double error (double output, double target) const;

      /**
       * Returns a stringified representation for this Cost function
       */
      virtual std::string str() const;

    private: //representation

122
      boost::shared_ptr<bob::learn::activation::Activation> m_actfun; //act. function
123
124
125
126
127
128
129
      bool m_logistic_activation; ///< if 'true', simplify backprop_error()

  };

}}}

#endif /* BOB_LEARN_MLP_CROSSENTROPYLOSS_H */