Commit ce6b05b1 authored by Manuel Günther's avatar Manuel Günther
Browse files

Implmented LUTTrainer and JesorskyLoss in C++.

parent 8212087e
......@@ -3,16 +3,16 @@
; Wed Feb 19 13:56:42 CET 2014
[buildout]
parts = xbob.boosting scripts
parts = scripts
eggs = xbob.boosting
newest = false
verbose = true
;debug = true
extensions = xbob.buildout
develop = .
[xbob.boosting]
recipe = xbob.buildout:develop
[scripts]
recipe = xbob.buildout:scripts
......
......@@ -73,6 +73,9 @@ setup(
"xbob/boosting/cpp/StumpMachine.cpp",
"xbob/boosting/cpp/LUTMachine.cpp",
"xbob/boosting/cpp/BoostedMachine.cpp",
"xbob/boosting/cpp/LUTTrainer.cpp",
"xbob/boosting/cpp/LossFunction.cpp",
"xbob/boosting/cpp/JesorskyLoss.cpp",
"xbob/boosting/cpp/bindings.cpp",
],
pkgconfig = [
......
# import the C++ stuff
from ._boosting import StumpMachine, LUTMachine, BoostedMachine, weighted_histogram
from ._boosting import StumpMachine, LUTMachine, BoostedMachine
from . import trainer
from . import loss
......
......@@ -2,15 +2,6 @@
#include "StumpMachine.h"
#include "LUTMachine.h"
// This is a fast implementation of the weighted histogram
inline void weighted_histogram(const blitz::Array<uint16_t,1>& features, const blitz::Array<double,1>& weights, blitz::Array<double,1>& histogram){
assert(features.extent(0) == weights.extent(0));
histogram = 0.;
for (int i = features.extent(0); i--;){
histogram((int)features(i)) += weights(i);
}
}
inline boost::shared_ptr<WeakMachine> loadWeakMachine(bob::io::HDF5File& file){
std::string machine_type;
file.getAttribute(".", "MachineType", machine_type);
......
#include "JesorskyLoss.h"
#include <math.h>
static inline double sqr(const double x){
return x*x;
}
double JesorskyLoss::interEyeDistance(const double y1, const double x1, const double y2, const double x2) const{
return sqrt(sqr(y1 - y2) + sqr(x1 - x2));
}
void JesorskyLoss::loss(const blitz::Array<double, 2>& targets, const blitz::Array<double, 2>& scores, blitz::Array<double, 2>& errors) const{
// compute one error for each sample
errors = 0.;
for (int i = targets.extent(0); i--;){
// compute inter-eye-distance
double scale = 0.5/interEyeDistance(targets(i,0), targets(i,1), targets(i,2), targets(i,3));
// compute error for all positions
// which are assumed to be 2D points
for (int j = 0; j < targets.extent(1); j += 2){
double dx = scores(i, j) - targets(i, j);
double dy = scores(i, j+1) - targets(i, j+1);
// sum errors
errors(i,0) += sqrt(sqr(dx) + sqr(dy)) * scale;
}
}
}
void JesorskyLoss::lossGradient(const blitz::Array<double, 2>& targets, const blitz::Array<double, 2>& scores, blitz::Array<double, 2>& gradient) const{
// # allocate memory for the gradients
// gradient = numpy.ndarray(targets.shape, numpy.float)
for (int i = targets.extent(0); i--;){
// compute inter-eye-distance
double scale = 0.5/interEyeDistance(targets(i,0), targets(i,1), targets(i,2), targets(i,3));
// compute error for all positions
// which are assumed to be 2D points
for (int j = 0; j < targets.extent(1); j += 2){
double dx = scores(i, j) - targets(i, j);
double dy = scores(i, j+1) - targets(i, j+1);
double error = scale / sqrt(sqr(dx) + sqr(dy));
// set gradient
gradient(i, j) = dx * error;
gradient(i, j+1) = dy * error;
}
}
}
#ifndef XBOB_BOOSTING_JESORSKY_LOSS_H
#define XBOB_BOOSTING_JESORSKY_LOSS_H
#include <blitz/array.h>
#include "LossFunction.h"
/**
* This machine uses a Look-Up-Table-based decision using *discrete* features.
*
* For each discrete value of the feature, either +1 or -1 is returned.
* This machine can be used in a multi-variate environment.
*/
class JesorskyLoss : public LossFunction{
public:
// Create an LUT machine using the given LUT and the given index
JesorskyLoss(){}
void loss(const blitz::Array<double, 2>& targets, const blitz::Array<double, 2>& scores, blitz::Array<double, 2>& errors) const;
void lossGradient(const blitz::Array<double, 2>& targets, const blitz::Array<double, 2>& scores, blitz::Array<double, 2>& gradient) const;
private:
double interEyeDistance(const double y1, const double x1, const double y2, const double x2) const;
};
#endif // XBOB_BOOSTING_JESORSKY_LOSS_H
#include "LUTTrainer.h"
#include "Functions.h"
#include <limits>
LUTTrainer::LUTTrainer(uint16_t maximumFeatureValue, int numberOfOutputs, SelectionStyle selectionType) :
m_maximumFeatureValue(maximumFeatureValue),
m_numberOfOutputs(numberOfOutputs),
m_selectionType(selectionType),
_luts(maximumFeatureValue, numberOfOutputs),
_selectedIndices(numberOfOutputs),
_gradientHistogram(maximumFeatureValue)
{
}
int32_t LUTTrainer::bestIndex(const blitz::Array<double,1>& array) const{
double min = std::numeric_limits<double>::max();
int32_t minIndex = -1;
for (int i = 0; i < array.extent(0); ++i){
if (array(i) < min){
min = array(i);
minIndex = i;
}
}
return minIndex;
}
void LUTTrainer::weightedHistogram(const blitz::Array<uint16_t,1>& features, const blitz::Array<double,1>& weights) const{
assert(features.extent(0) == weights.extent(0));
_gradientHistogram = 0.;
for (int i = features.extent(0); i--;){
_gradientHistogram((int)features(i)) += weights(i);
}
}
LUTMachine LUTTrainer::train(const blitz::Array<uint16_t,2>& trainingFeatures, const blitz::Array<double,2>& lossGradient) const{
int featureLength = trainingFeatures.extent(1);
_lossSum.resize(featureLength, m_numberOfOutputs);
// Compute the sum of the gradient based on the feature values or the loss associated with each feature index
// Compute the loss for each feature
for (int featureIndex = featureLength; featureIndex--;){
for (int outputIndex = m_numberOfOutputs; outputIndex--;){
weightedHistogram(trainingFeatures(blitz::Range::all(),featureIndex), lossGradient(blitz::Range::all(), outputIndex));
_lossSum(featureIndex,outputIndex) = - blitz::sum(blitz::abs(_gradientHistogram));
}
}
// Select the most discriminative index (or indices) for classification which minimizes the loss
// and compute the sum of gradient for that index
if (m_selectionType == independent){
// independent feature selection is used if all the dimension of output use different feature
// each of the selected feature minimize a dimension of the loss function
for (int outputIndex = m_numberOfOutputs; outputIndex--;){
_selectedIndices(outputIndex) = bestIndex(_lossSum(blitz::Range::all(),outputIndex));
}
} else {
// for 'shared' feature selection the loss function is summed over multiple dimensions and
// the feature that minimized this cumulative loss is used for all the outputs
blitz::secondIndex j;
const blitz::Array<double,1> sum(blitz::sum(_lossSum, j));
_selectedIndices = bestIndex(sum);
}
for (int outputIndex = m_numberOfOutputs; outputIndex--;){
int selectedIndex = _selectedIndices(outputIndex);
weightedHistogram(trainingFeatures(blitz::Range::all(), selectedIndex), lossGradient(blitz::Range::all(), outputIndex));
for (int lutIndex = m_maximumFeatureValue; lutIndex--;){
_luts(lutIndex, outputIndex) = (_gradientHistogram(lutIndex) > 0) * 2. - 1.;
}
}
// create new weak machine
return LUTMachine(_luts.copy(), _selectedIndices.copy());
}
#ifndef XBOB_BOOSTING_LUT_TRAINER_H
#define XBOB_BOOSTING_LUT_TRAINER_H
#include "LUTMachine.h"
/**
* This machine uses a Look-Up-Table-based decision using *discrete* features.
*
* For each discrete value of the feature, either +1 or -1 is returned.
* This machine can be used in a multi-variate environment.
*/
class LUTTrainer{
public:
typedef enum {
independent = 0,
shared = 1
} SelectionStyle;
// Create an LUT machine using the given LUT and the given index
LUTTrainer(uint16_t maximumFeatureValue, int numberOfOutputs = 1, SelectionStyle selectionType = independent);
LUTMachine train(const blitz::Array<uint16_t, 2>& training_features, const blitz::Array<double,2>& loss_gradient) const;
uint16_t maximumFeatureValue() const {return m_maximumFeatureValue;}
int numberOfOutputs() const {return m_numberOfOutputs;}
SelectionStyle selectionType() const {return m_selectionType;}
private:
int32_t bestIndex(const blitz::Array<double,1>& array) const;
void weightedHistogram(const blitz::Array<uint16_t,1>& features, const blitz::Array<double,1>& weights) const;
uint16_t m_maximumFeatureValue;
int m_numberOfOutputs;
SelectionStyle m_selectionType;
// pre-allocated arrays for faster access
mutable blitz::Array<double,2> _luts;
mutable blitz::Array<int32_t,1> _selectedIndices;
mutable blitz::Array<double,1> _gradientHistogram;
mutable blitz::Array<double,2> _lossSum;
};
#endif // XBOB_BOOSTING_LUT_TRAINER_H
#include "LossFunction.h"
#include <math.h>
static inline double sqr(const double x){
return x*x;
}
void LossFunction::lossSum(const blitz::Array<double,1>& alpha, const blitz::Array<double,2>& targets, const blitz::Array<double,2>& previous_scores, const blitz::Array<double,2>& current_scores, blitz::Array<double,1>& loss_sum) const{
// compute the scores and loss for the current alpha
scores.resize(targets.shape());
// TODO: is there any faster way for this?
for (int i = scores.extent(0); i--;){
for (int j = scores.extent(1); j--;){
scores(i,j) = previous_scores(i,j) + alpha(j) * current_scores(i,j);
}
}
errors.resize(targets.extent(0), 1);
loss(targets, scores, errors);
// compute the sum of the loss
blitz::firstIndex i;
blitz::secondIndex j;
loss_sum = blitz::sum(errors(j,i), j);
}
void LossFunction::gradientSum(const blitz::Array<double,1>& alpha, const blitz::Array<double,2>& targets, const blitz::Array<double,2>& previous_scores, const blitz::Array<double,2>& current_scores, blitz::Array<double,1>& gradient_sum) const{
// compute the scores and gradient for the current alpha
scores.resize(targets.shape());
// TODO: is there any faster way for this?
for (int i = scores.extent(0); i--;){
for (int j = scores.extent(1); j--;){
scores(i,j) = previous_scores(i,j) + alpha(j) * current_scores(i,j);
}
}
// scores = previous_scores + alpha * current_scores;
gradients.resize(targets.shape());
lossGradient(targets, scores, gradients);
// take the sum of the loss gradient values
const blitz::Array<double, 2> grad(gradients * current_scores);
blitz::firstIndex i;
blitz::secondIndex j;
gradient_sum = blitz::sum(grad(j,i), j);
}
#ifndef XBOB_BOOSTING_LOSS_FUNCTION_H
#define XBOB_BOOSTING_LOSS_FUNCTION_H
#include <blitz/array.h>
class LossFunction{
public:
void lossSum(const blitz::Array<double,1>& alpha, const blitz::Array<double,2>& targets, const blitz::Array<double,2>& previous_scores, const blitz::Array<double,2>& current_scores, blitz::Array<double,1>& loss_sum) const;
void gradientSum(const blitz::Array<double,1>& alpha, const blitz::Array<double,2>& targets, const blitz::Array<double,2>& previous_scores, const blitz::Array<double,2>& current_scores, blitz::Array<double,1>& gradient_sum) const;
virtual void loss(const blitz::Array<double, 2>& targets, const blitz::Array<double, 2>& scores, blitz::Array<double, 2>& errors) const = 0;
virtual void lossGradient(const blitz::Array<double, 2>& targets, const blitz::Array<double, 2>& scores, blitz::Array<double, 2>& gradient) const = 0;
protected:
// This class is not instantiatable
LossFunction(){}
private:
mutable blitz::Array<double,2> scores;
mutable blitz::Array<double,2> errors;
mutable blitz::Array<double,2> gradients;
};
#endif // XBOB_BOOSTING_LOSS_FUNCTION_H
......@@ -10,6 +10,10 @@
#include "StumpMachine.h"
#include "LUTMachine.h"
#include "BoostedMachine.h"
#include "JesorskyLoss.h"
#include "LUTTrainer.h"
#include "Functions.h"
using namespace boost::python;
......@@ -58,6 +62,7 @@ static blitz::Array<int32_t, 1> get_indices(const BoostedMachine& self){
return self.getIndices();
}
#if 0
// Wrapper functions for the weighted histogram
static void weighted_histogram1(bob::python::const_ndarray features, bob::python::const_ndarray weights, blitz::Array<double,1> histogram){
weighted_histogram(features.bz<uint16_t,1>(), weights.bz<double,1>(), histogram);
......@@ -67,6 +72,33 @@ static blitz::Array<double, 1> weighted_histogram2(bob::python::const_ndarray fe
weighted_histogram(features.bz<uint16_t,1>(), weights.bz<double,1>(), retval);
return retval;
}
#endif
// Wrapper functions for Jesorsky loss
static blitz::Array<double,1> jesorsky_loss_sum(const JesorskyLoss& loss, const blitz::Array<double,1>& alpha, const blitz::Array<double,2>& targets, const blitz::Array<double,2>& previous_scores, const blitz::Array<double,2>& current_scores){
blitz::Array<double, 1> retval(1);
loss.lossSum(alpha, targets, previous_scores, current_scores, retval);
return retval;
}
static blitz::Array<double,1> jesorsky_gradient_sum(const JesorskyLoss& loss, const blitz::Array<double,1>& alpha, const blitz::Array<double,2>& targets, const blitz::Array<double,2>& previous_scores, const blitz::Array<double,2>& current_scores){
blitz::Array<double, 1> retval(targets.extent(1));
loss.gradientSum(alpha, targets, previous_scores, current_scores, retval);
return retval;
}
static blitz::Array<double,2> jesorsky_loss(const JesorskyLoss& loss, const blitz::Array<double,2>& targets, const blitz::Array<double, 2>& scores){
blitz::Array<double,2> retval(targets.extent(0), 1);
loss.loss(targets, scores, retval);
return retval;
}
static blitz::Array<double,2> jesorsky_loss_gradient(const JesorskyLoss& loss, const blitz::Array<double,2>& targets, const blitz::Array<double, 2>& scores){
blitz::Array<double,2> retval(targets.extent(0), targets.extent(1));
loss.lossGradient(targets, scores, retval);
return retval;
}
BOOST_PYTHON_MODULE(_boosting) {
......@@ -139,7 +171,27 @@ BOOST_PYTHON_MODULE(_boosting) {
.add_property("weak_machines", &get_weak_machines, "The weak machines.")
;
enum_<LUTTrainer::SelectionStyle>("SelectionStyle")
.value("independent", LUTTrainer::independent)
.value("shared", LUTTrainer::shared)
.export_values();
class_<LUTTrainer, boost::shared_ptr<LUTTrainer> >("LUTTrainer", "A trainer to train a LUTMachine", init<uint16_t, optional<int,LUTTrainer::SelectionStyle> >())
.def("train", &LUTTrainer::train, (arg("self"), arg("training_features"), arg("loss_gradient")), "Trains and returns a LUTMachine.")
.add_property("number_of_labels", &LUTTrainer::maximumFeatureValue, "The highest feature value + 1.")
.add_property("number_of_outputs", &LUTTrainer::numberOfOutputs, "The dimensionality of the output vector (1 for the uni-variate case)")
.add_property("selection_type", &LUTTrainer::selectionType, "The style for selecting features (valid for multi-variate case only)")
;
// bind jesorsky loss function
class_<JesorskyLoss>("JesorskyLoss", "The loss function to compute the Jesorsky loss", init<>())
.def("loss", &jesorsky_loss, (arg("self"), arg("targets"), arg("scores")), "Computes the loss between the given targets and scores")
.def("loss_gradient", &jesorsky_loss_gradient, (arg("self"), arg("targets"), arg("scores")), "Computes the loss gradient for the given targets and scores.")
.def("loss_sum", &jesorsky_loss_sum, (arg("self"), arg("alpha"), arg("targets"), arg("previous_scores"), arg("current_scores")), "Computes the sum of the loss for the given targets and scores")
.def("loss_gradient_sum", &jesorsky_gradient_sum, (arg("self"), arg("alpha"), arg("targets"), arg("previous_scores"), arg("current_scores")), "Computes the sum of the gradients for the given targets and scores")
;
// bind auxiliary functions
def("weighted_histogram", &weighted_histogram1, (arg("features"), arg("weights"), arg("histogram")), "Computes the histogram of features, using the given weight for each feature.");
def("weighted_histogram", &weighted_histogram2, (arg("features"), arg("weights"), arg("bin_count")), "Computes and returns the histogram of features, using the given weight for each feature.");
// def("weighted_histogram", &weighted_histogram1, (arg("features"), arg("weights"), arg("histogram")), "Computes the histogram of features, using the given weight for each feature.");
// def("weighted_histogram", &weighted_histogram2, (arg("features"), arg("weights"), arg("bin_count")), "Computes and returns the histogram of features, using the given weight for each feature.");
}
......@@ -57,7 +57,7 @@ class LossFunction:
Returns
(float <#samples>) The sum of the loss values for the current value of the alpha
(float <#outputs>) The sum of the loss values for the current value of the alpha
"""
# compute the scores and loss for the current alpha
......@@ -85,7 +85,7 @@ class LossFunction:
current_scores (float <#samples, #outputs>): The prediction scores of the samples for the current round of the boosting.
Returns
(float <#samples>) The sum of the loss gradient for the current value of the alpha.
(float <#outputs>) The sum of the loss gradient for the current value of the alpha.
"""
# compute the loss gradient for the updated score
......
from .ExponentialLoss import ExponentialLoss
from .LogitLoss import LogitLoss
from .TangentialLoss import TangentialLoss
from .JesorskyLoss import JesorskyLoss
# from .JesorskyLoss import JesorskyLoss
from .._boosting import JesorskyLoss
......@@ -82,7 +82,7 @@ class TestBoosting(unittest.TestCase):
# for stump trainers, the logit loss function is preferred
loss_function = xbob.boosting.loss.LogitLoss()
weak_trainer = xbob.boosting.trainer.LUTTrainer(256,inputs.shape[1])
weak_trainer = xbob.boosting.trainer.LUTTrainer(256)
booster = xbob.boosting.trainer.Boosting(weak_trainer, loss_function, number_of_rounds=1)
# perform boosting
......@@ -117,7 +117,7 @@ class TestBoosting(unittest.TestCase):
# for stump trainers, the logit loss function is preferred
loss_function = xbob.boosting.loss.LogitLoss()
weak_trainer = xbob.boosting.trainer.LUTTrainer(256, inputs.shape[1], len(digits), 'shared')
weak_trainer = xbob.boosting.trainer.LUTTrainer(256, len(digits), xbob.boosting.trainer.SelectionStyle.shared)
booster = xbob.boosting.trainer.Boosting(weak_trainer, loss_function, number_of_rounds=1)
# perform boosting
......@@ -153,7 +153,7 @@ class TestBoosting(unittest.TestCase):
# for stump trainers, the logit loss function is preferred
loss_function = xbob.boosting.loss.LogitLoss()
weak_trainer = xbob.boosting.trainer.LUTTrainer(256, inputs.shape[1], len(digits), 'independent')
weak_trainer = xbob.boosting.trainer.LUTTrainer(256, len(digits), xbob.boosting.trainer.SelectionStyle.independent)
booster = xbob.boosting.trainer.Boosting(weak_trainer, loss_function, number_of_rounds=1)
# perform boosting
......
......@@ -102,7 +102,7 @@ class TestExponentialLoss(unittest.TestCase):
score = numpy.array([[0.5, 0.5], [0.5, 0.5], [0.5, 0.5]], 'float64')
alpha = 0.5
weak_scores = numpy.array([[0.2, 0.4], [0.5, 0.6], [0.5, 0.5]], 'float64')
prev_scores = numpy.array([[0.1, 0.2],[0.3, 0.4], [0.5, 0.5]], 'float64')
prev_scores = numpy.array([[0.1, 0.2], [0.3, 0.4], [0.5, 0.5]], 'float64')
# check the loss dimensions
loss_value = loss_function.loss(targets, score)
......
......@@ -15,9 +15,9 @@ class TestJesorskyLoss(unittest.TestCase):
loss_function = xbob.boosting.loss.JesorskyLoss()
num_samples = 2
num_outputs = 4
targets = numpy.array([[10, 10, 10, 30], [12, 11, 13, 29]])
targets = numpy.array([[10, 10, 10, 30], [12, 11, 13, 29]], 'float64')
score = numpy.array([[8, 9, 7, 34], [11, 6, 16, 26]], 'float64')
alpha = 0.5
alpha = numpy.array([0.5, 0.5, 0.5, 0.5])
weak_scores = numpy.array([[0.2, 0.4, 0.5, 0.6], [0.5, 0.5, 0.5, 0.5]], 'float64')
prev_scores = numpy.array([[0.1, 0.2, 0.3, 0.4], [0.5, 0.5, 0.5, 0.5]], 'float64')
......
......@@ -8,7 +8,7 @@ import bob
class TestLutTrainer(unittest.TestCase):
"""Class to test the LUT trainer """
def test01_hist_grad(self):
def notest01_hist_grad(self):
num_feature = 100
range_feature = 10
......@@ -33,11 +33,10 @@ class TestLutTrainer(unittest.TestCase):
num_samples = 100
max_feature = 20
dimension_feature = 10
selected_index = 5
range_feature = max_feature
trainer = xbob.boosting.trainer.LUTTrainer(range_feature,dimension_feature)
trainer = xbob.boosting.trainer.LUTTrainer(range_feature)
features = bob.io.load(bob.test.utils.datafile('datafile.hdf5', 'xbob.boosting', 'tests'))
......@@ -63,11 +62,10 @@ class TestLutTrainer(unittest.TestCase):
num_samples = 100
max_feature = 20
dimension_feature = 10
delta = 5
selected_index = 5
range_feature = max_feature + delta
trainer = xbob.boosting.trainer.LUTTrainer(range_feature, dimension_feature)
trainer = xbob.boosting.trainer.LUTTrainer(range_feature)
features = bob.io.load(bob.test.utils.datafile('datafile.hdf5', 'xbob.boosting', 'tests')).astype(numpy.uint16)
x_train = numpy.vstack((features, features))
......@@ -86,11 +84,10 @@ class TestLutTrainer(unittest.TestCase):
num_samples = 100
max_feature = 20
dimension_feature = 10
delta = 5
selected_index = 5
range_feature = max_feature + delta
trainer = xbob.boosting.trainer.LUTTrainer(range_feature, dimension_feature)
trainer = xbob.boosting.trainer.LUTTrainer(range_feature)
features = bob.io.load(bob.test.utils.datafile('datafile.hdf5', 'xbob.boosting', 'tests')).astype(numpy.uint16)
......@@ -106,7 +103,7 @@ class TestLutTrainer(unittest.TestCase):
self.assertEqual(machine.feature_indices()[0], selected_index)
def test05_weighted_histogram(self):
def notest05_weighted_histogram(self):
# test that the weighted histogram implementation in C++ returns the same values as numpy.histogram
size = (2056,)
......
from .LUTTrainer import LUTTrainer
#from .LUTTrainer import LUTTrainer
from .._boosting import LUTTrainer, SelectionStyle
from .StumpTrainer import StumpTrainer
from .Boosting import Boosting
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment