Commit a8f5ad67 authored by Manuel Günther's avatar Manuel Günther
Browse files

Implemented StumpMachine in C++; unified datatypes between C++ and python; some clean-up.

parent 6789e102
......@@ -70,6 +70,7 @@ setup(
Extension(
'xbob.boosting._boosting',
[
"xbob/boosting/cpp/stumpmachine.cpp",
"xbob/boosting/cpp/lutmachine.cpp",
"xbob/boosting/cpp/boosted_machine.cpp",
"xbob/boosting/cpp/bindings.cpp",
......
# import the C++ stuff
from ._boosting import BoostedMachine, LUTMachine
from ._boosting import StumpMachine, LUTMachine, BoostedMachine
import core
import features
......
......@@ -259,7 +259,7 @@ class BoostMachine():
def _update(self):
""" Initializes internal variables."""
self.selected_indices = set([weak_trainer.selected_indices[i] for weak_trainer in self.weak_trainer for i in range(self.number_of_outputs)])
self.selected_indices = set([weak_trainer.feature_indices()[i] for weak_trainer in self.weak_trainer for i in range(self.number_of_outputs)])
self._weak_results = numpy.ndarray((len(self.weak_trainer),), numpy.float64)
......@@ -374,7 +374,7 @@ class BoostMachine():
} [weak_machine_type]
weak_machine.load(hdf5File)
self.weak_trainer.append(weak_machine)
self.selected_indices |= set([weak_machine.selected_indices[i] for i in range(self.number_of_outputs)])
self.selected_indices |= set([weak_machine.feature_indices()[i] for i in range(self.number_of_outputs)])
hdf5File.cd('..')
self._update()
......
......@@ -14,18 +14,18 @@ import numpy
import math
class StumpMachine():
""" The StumpMachine class consist of the core elements of the Stump weak classfier i.e. the threshold,
""" The StumpMachine class consist of the core elements of the Stump weak classifier i.e. the threshold,
the polarity and the feature index corresponding to the weak classifier. """
def __init__(self, threshold = 0, polarity = 0, selected_indices = 0):
def __init__(self, threshold = 0, polarity = 0, selected_index = 0):
""" Initialize the stump classifier"""
self.threshold = threshold
self.polarity = polarity
self.selected_indices = selected_indices
if isinstance(self.selected_indices, int):
self.selected_indices = numpy.array([self.selected_indices], dtype=numpy.int)
self.selected_index = numpy.int32(selected_index)
def feature_indices(self):
return [self.selected_index]
def get_weak_scores(self,test_features):
......@@ -45,7 +45,7 @@ class StumpMachine():
weak_scores = numpy.ones([numSamp,1])
# Select feature corresponding to the specific index
weak_features = test_features[:,self.selected_indices]
weak_features = test_features[:,self.selected_index]
# classify the features and compute the score
weak_scores[weak_features < self.threshold] = -1
......@@ -61,34 +61,29 @@ class StumpMachine():
Output: a single number (+1/-1)
"""
# classify the features and compute the score
return self.polarity * (-1. if feature[self.selected_indices] < self.threshold else 1.)
return self.polarity * (-1. if feature[self.selected_index] < self.threshold else 1.)
def save(self, hdf5File):
"""Saves the current state of this machine to the given HDF5File."""
hdf5File.set("Indices", self.selected_indices)
hdf5File.set("Index", self.selected_index)
hdf5File.set("Threshold", self.threshold)
hdf5File.set("Polarity", self.polarity)
def load(self, hdf5File):
"""Reads the state of this machine from the given HDF5File."""
self.selected_indices = hdf5File.read("Indices")
self.selected_index = hdf5File.read("Index")
self.threshold = hdf5File.read("Threshold")
self.polarity = hdf5File.read("Polarity")
if isinstance(self.selected_indices, int):
self.selected_indices = numpy.array([self.selected_indices], dtype=numpy.int)
from .. import StumpMachine as CppStumpMachine
class StumpTrainer():
""" The weak trainer class for training stumps as classifiers. The trainer is parametrized
the threshold and the polarity.
"""
def compute_weak_trainer(self, fea, loss_grad):
""" The function to compute weak Stump trainer.
......@@ -122,7 +117,7 @@ class StumpTrainer():
# Find the optimum id and its corresponding trainer
opt_id = gain.argmax()
return StumpMachine(threshold[opt_id], polarity[opt_id], opt_id)
return CppStumpMachine(threshold[opt_id], polarity[opt_id], numpy.int32(opt_id))
......@@ -200,10 +195,13 @@ class LutMachine():
type: Integer
"""
self.luts = numpy.ones((num_entries, num_outputs), dtype = numpy.int)
self.selected_indices = numpy.zeros((num_outputs,), 'int16')
self.luts = numpy.ones((num_entries, num_outputs), numpy.float64)
self.selected_indices = numpy.zeros((num_outputs,), numpy.int32)
def feature_indices(self):
return self.selected_indices
def get_weak_scores(self, features):
""" Function computes classification results according to the LUT machine
......@@ -251,6 +249,7 @@ class LutMachine():
self.selected_indices = numpy.array([self.selected_indices], dtype=numpy.int)
from .. import LUTMachine
class LutTrainer():
......@@ -311,7 +310,7 @@ class LutTrainer():
# num_outputs = loss_grad.shape[1]
fea_grad = numpy.zeros([self.num_entries, self.num_outputs])
luts = numpy.ones((self.num_entries, self.num_outputs), numpy.float64)
selected_indices = numpy.ndarray((self.num_outputs,), numpy.uint64)
selected_indices = numpy.ndarray((self.num_outputs,), numpy.int32)
# Compute the sum of the gradient based on the feature values or the loss associated with each
# feature index
......
#include <bob/io/HDF5File.h>
#include <boost/shared_ptr.hpp>
#include <set>
class WeakMachine{
public:
WeakMachine(){}
virtual double forward1(const blitz::Array<uint16_t, 1>& features) const = 0;
virtual void forward2(const blitz::Array<uint16_t, 2>& features, blitz::Array<double,1> predictions) const = 0;
virtual void forward3(const blitz::Array<uint16_t, 2>& features, blitz::Array<double,2> predictions) const = 0;
virtual double forward1(const blitz::Array<uint16_t, 1>& features) const {throw std::runtime_error("This function is not implemented for the given data type in the current class.");}
virtual double forward1(const blitz::Array<double, 1>& features) const {throw std::runtime_error("This function is not implemented for the given data type in the current class.");}
virtual blitz::Array<uint64_t,1> getIndices() const = 0;
virtual void forward2(const blitz::Array<uint16_t, 2>& features, blitz::Array<double,1> predictions) const {throw std::runtime_error("This function is not implemented for the given data type in the current class.");}
virtual void forward2(const blitz::Array<double, 2>& features, blitz::Array<double,1> predictions) const {throw std::runtime_error("This function is not implemented for the given data type in the current class.");}
virtual void forward3(const blitz::Array<uint16_t, 2>& features, blitz::Array<double,2> predictions) const {throw std::runtime_error("This function is not implemented for the given data type in the current class.");}
virtual void forward3(const blitz::Array<double, 2>& features, blitz::Array<double,2> predictions) const {throw std::runtime_error("This function is not implemented for the given data type in the current class.");}
virtual blitz::Array<int,1> getIndices() const = 0;
virtual void save(bob::io::HDF5File& file) const = 0;
virtual void load(bob::io::HDF5File& file) = 0;
};
class StumpMachine : public WeakMachine{
public:
StumpMachine(double threshold, double polarity, int index);
StumpMachine(bob::io::HDF5File& file);
virtual double forward1(const blitz::Array<uint16_t, 1>& features) const;
virtual void forward2(const blitz::Array<uint16_t, 2>& features, blitz::Array<double,1> predictions) const;
virtual double forward1(const blitz::Array<double, 1>& features) const;
virtual void forward2(const blitz::Array<double, 2>& features, blitz::Array<double,1> predictions) const;
virtual blitz::Array<int,1> getIndices() const;
double getThreshold() const {return m_threshold;}
double getPolarity() const {return m_polarity;}
virtual void save(bob::io::HDF5File& file) const;
virtual void load(bob::io::HDF5File& file);
private:
// helper function to compute the prediction
double _predict(double f) const;
// the LUT for the multi-variate case
double m_threshold;
double m_polarity;
int m_index;
};
class LUTMachine : public WeakMachine{
public:
LUTMachine(const blitz::Array<double,2> look_up_tables, const blitz::Array<uint64_t,1> indices);
// LUTMachine(const blitz::Array<double,1>& look_up_table, uint64_t index);
LUTMachine(const blitz::Array<double,2> look_up_tables, const blitz::Array<int,1> indices);
LUTMachine(bob::io::HDF5File& file);
virtual double forward1(const blitz::Array<uint16_t, 1>& features) const;
virtual void forward2(const blitz::Array<uint16_t, 2>& features, blitz::Array<double,1> predictions) const;
virtual void forward3(const blitz::Array<uint16_t, 2>& features, blitz::Array<double,2> predictions) const;
virtual blitz::Array<uint64_t,1> getIndices() const;
virtual blitz::Array<int,1> getIndices() const;
virtual void save(bob::io::HDF5File& file) const;
virtual void load(bob::io::HDF5File& file);
......@@ -36,17 +68,20 @@ class LUTMachine : public WeakMachine{
private:
// the LUT for the multi-variate case
blitz::Array<double,2> m_look_up_tables;
blitz::Array<uint64_t,1> m_indices;
blitz::Array<int,1> m_indices;
// for speed reasons, we also keep the LUT for the uni-variate case
blitz::Array<double,1> m_look_up_table;
uint64_t m_index;
int m_index;
};
inline boost::shared_ptr<WeakMachine> loadWeakMachine(bob::io::HDF5File& file){
std::string machine_type;
file.getAttribute(".", "MachineType", machine_type);
if (machine_type == "LutMachine" || machine_type == "LUTMachine"){
return boost::shared_ptr<WeakMachine>(new LUTMachine(file));
} else if (machine_type == "StumpMachine"){
return boost::shared_ptr<WeakMachine>(new StumpMachine(file));
}
throw std::runtime_error("Weak machine type '" + machine_type + "' is not known or supported.");
}
......@@ -69,7 +104,7 @@ class BoostedMachine{
// predicts the output and the labels for the given features (multi-variate case)
void forward3(const blitz::Array<uint16_t, 2>& features, blitz::Array<double,2> predictions, blitz::Array<double,2> labels) const;
blitz::Array<uint64_t,1> getIndices() const;
blitz::Array<int,1> getIndices() const;
const blitz::Array<double,2> getWeights() const {return m_weights;}
......
......@@ -7,13 +7,34 @@
using namespace boost::python;
static double f11(StumpMachine& s, const blitz::Array<double,1>& f){return s.forward1(f);}
static void f12(StumpMachine& s, const blitz::Array<double,2>& f, blitz::Array<double,1> p){s.forward2(f,p);}
static double f21(StumpMachine& s, const blitz::Array<uint16_t,1>& f){return s.forward1(f);}
static void f22(StumpMachine& s, const blitz::Array<uint16_t,2>& f, blitz::Array<double,1> p){s.forward2(f,p);}
BOOST_PYTHON_MODULE(_boosting) {
bob::python::setup_python("Bindings for the xbob.boosting machines.");
class_<WeakMachine, boost::shared_ptr<WeakMachine>, boost::noncopyable>("WeakMachine", "Pure virtual base class for weak machines", no_init);
class_<StumpMachine, boost::shared_ptr<StumpMachine>, bases<WeakMachine> >("StumpMachine", "A machine comparing features to a threshold.", no_init)
.def(init<double, double, int >((arg("self"), arg("threshold"), arg("polarity"), arg("index")), "Creates a StumpMachine with the given threshold, polarity and the feature index, for which the machine is valid."))
.def(init<bob::io::HDF5File&>((arg("self"),arg("file")), "Creates a new machine from file."))
.def("__call__", &f11, (arg("self"), arg("features")), "Returns the prediction for the given feature vector.")
.def("__call__", &f12, (arg("self"), arg("features"), arg("predictions")), "Computes the predictions for the given feature set (uni-variate only).")
.def("__call__", &f21, (arg("self"), arg("features")), "Returns the prediction for the given feature vector.")
.def("__call__", &f22, (arg("self"), arg("features"), arg("predictions")), "Computes the predictions for the given feature set (uni-variate only).")
.def("load", &StumpMachine::load, "Reads a Machine from file")
.def("save", &StumpMachine::save, "Writes the machine to file")
.def("feature_indices", &StumpMachine::getIndices, "The indices into the feature vector required by this machine.")
.add_property("threshold", &StumpMachine::getThreshold, "The threshold of this machine.")
.add_property("polarity", &StumpMachine::getPolarity, "The polarity for this machine.")
;
class_<LUTMachine, boost::shared_ptr<LUTMachine>, bases<WeakMachine> >("LUTMachine", "A machine containing a Look-Up-Table.", no_init)
.def(init<const blitz::Array<double,2>&, const blitz::Array<uint64_t,1>&>((arg("self"), arg("look_up_tables"), arg("indices")), "Creates a LUTMachine with the given look-up-table and the feature indices, for which the LUT is valid."))
.def(init<const blitz::Array<double,2>&, const blitz::Array<int,1>&>((arg("self"), arg("look_up_tables"), arg("indices")), "Creates a LUTMachine with the given look-up-table and the feature indices, for which the LUT is valid."))
.def(init<bob::io::HDF5File&>((arg("self"),arg("file")), "Creates a new machine from file."))
.def("__call__", &LUTMachine::forward1, (arg("self"), arg("features")), "Returns the prediction for the given feature vector.")
.def("__call__", &LUTMachine::forward2, (arg("self"), arg("features"), arg("predictions")), "Computes the predictions for the given feature set (uni-variate).")
......@@ -22,7 +43,7 @@ BOOST_PYTHON_MODULE(_boosting) {
.def("save", &LUTMachine::save, "Writes the machine to file")
.add_property("lut", &LUTMachine::getLut, "The look up table of the machine.")
.add_property("selected_indices", &LUTMachine::getIndices, "The indices into the feature vector required by this machine.")
.def("feature_indices", &LUTMachine::getIndices, "The indices into the feature vector required by this machine.")
;
class_<BoostedMachine, boost::shared_ptr<BoostedMachine> >("BoostedMachine", "A machine containing of several weak machines", no_init)
......
#include "Machines.h"
#include <sstream>
#include <set>
BoostedMachine::BoostedMachine() :
m_weak_machines(),
......@@ -68,14 +69,14 @@ void BoostedMachine::forward3(const blitz::Array<uint16_t,2>& features, blitz::A
}
}
blitz::Array<uint64_t,1> BoostedMachine::getIndices() const{
std::set<uint64_t> indices;
blitz::Array<int,1> BoostedMachine::getIndices() const{
std::set<int> indices;
for (unsigned i = 0; i < m_weak_machines.size(); ++i){
const blitz::Array<uint64_t,1>& ind = m_weak_machines[i]->getIndices();
const blitz::Array<int,1>& ind = m_weak_machines[i]->getIndices();
indices.insert(ind.begin(), ind.end());
}
blitz::Array<uint64_t,1> ret(indices.size());
blitz::Array<int,1> ret(indices.size());
std::copy(indices.begin(), indices.end(), ret.begin());
return ret;
}
......@@ -115,5 +116,8 @@ void BoostedMachine::load(bob::io::HDF5File& file){
machine_name = fns.str();
}
if (m_weak_machines.empty()){
throw std::runtime_error("Could not read weak machines.");
}
}
#include <Machines.h>
#include "Machines.h"
#include <bob/core/cast.h>
#include <assert.h>
#include <set>
LUTMachine::LUTMachine(const blitz::Array<double,2> look_up_tables, const blitz::Array<uint64_t,1> indices):
LUTMachine::LUTMachine(const blitz::Array<double,2> look_up_tables, const blitz::Array<int,1> indices):
m_look_up_tables(look_up_tables.shape()),
m_indices(indices.shape()),
m_look_up_table(),
......@@ -56,19 +57,28 @@ void LUTMachine::forward3(const blitz::Array<uint16_t,2>& features, blitz::Array
}
}
blitz::Array<uint64_t,1> LUTMachine::getIndices() const{
std::set<uint64_t> indices;
blitz::Array<int,1> LUTMachine::getIndices() const{
std::set<int> indices;
for (int i = 0; i < m_indices.extent(0); ++i){
indices.insert(m_indices(i));
}
blitz::Array<uint64_t, 1> ret(indices.size());
blitz::Array<int, 1> ret(indices.size());
std::copy(indices.begin(), indices.end(), ret.begin());
return ret;
}
void LUTMachine::load(bob::io::HDF5File& file){
m_look_up_tables.reference(bob::core::array::cast<double>(file.readArray<int64_t,2>("LUT")));
m_indices.reference(bob::core::array::cast<uint64_t>(file.readArray<int64_t,1>("Indices")));
try{
m_look_up_tables.reference(file.readArray<double,2>("LUT"));
}catch (std::exception){
m_look_up_tables.reference(bob::core::array::cast<double>(file.readArray<int64_t,2>("LUT")));
}
try{
m_indices.reference(file.readArray<int,1>("Indices"));
}catch (std::exception){
m_indices.reference(bob::core::array::cast<int>(file.readArray<int64_t,1>("Indices")));
}
m_look_up_table.reference(m_look_up_tables(blitz::Range::all(), 0));
m_index = m_indices(0);
}
......
#include "Machines.h"
#include <bob/core/cast.h>
#include <assert.h>
#include <set>
StumpMachine::StumpMachine(double threshold, double polarity, int index):
m_threshold(threshold),
m_polarity(polarity),
m_index(index)
{
}
StumpMachine::StumpMachine(bob::io::HDF5File& file):
m_threshold(0),
m_polarity(0),
m_index(0)
{
load(file);
}
double StumpMachine::_predict(double f) const{
return m_polarity * ((-2. * (f < m_threshold)) + 1.);
}
double StumpMachine::forward1(const blitz::Array<double, 1>& features) const{
return _predict(features((int)m_index));
}
void StumpMachine::forward2(const blitz::Array<double, 2>& features, blitz::Array<double,1> predictions) const{
for (int i = features.extent(0); i--;){
predictions(i) = _predict(features(i, (int)m_index));
}
}
double StumpMachine::forward1(const blitz::Array<uint16_t, 1>& features) const{
return _predict(features((int)m_index));
}
void StumpMachine::forward2(const blitz::Array<uint16_t, 2>& features, blitz::Array<double,1> predictions) const{
for (int i = features.extent(0); i--;){
predictions(i) = _predict(features(i, (int)m_index));
}
}
blitz::Array<int,1> StumpMachine::getIndices() const{
blitz::Array<int, 1> ret(1);
ret = m_index;
return ret;
}
void StumpMachine::load(bob::io::HDF5File& file){
m_threshold = file.read<double>("Threshold");
m_polarity = file.read<double>("Polarity");
m_index = file.read<int>("Index");
}
void StumpMachine::save(bob::io::HDF5File& file) const{
file.set("Threshold", m_threshold);
file.set("Polarity", m_polarity);
file.set("Index", m_index);
file.setAttribute(".", "MachineType", "StumpMachine");
}
\ No newline at end of file
......@@ -22,7 +22,7 @@ def get_image_3x3(val):
class TestdlbpFeatures(unittest.TestCase):
"""Perform test for dlbp features"""
""" The neighbourhood is defined as
""" The neighbourhood is defined as
p0 | p1 | p2
p7 | pc | p3
p6 | p5 | p4 """
......@@ -36,25 +36,24 @@ class TestdlbpFeatures(unittest.TestCase):
self.assertTrue(returned_lbp == 255)
img_values = numpy.array([20,1,1,1,10,10,10,10,5])
img_values = numpy.array([20,1,1,1,10,10,10,10,5])
img = get_image_3x3(img_values)
returned_lbp = feature_extractor.dlbp(img)
print returned_lbp
self.assertTrue(returned_lbp == 3)
img_values = numpy.array([1,20,1,1,10,10,10,10,5])
img_values = numpy.array([1,20,1,1,10,10,10,10,5])
img = get_image_3x3(img_values)
returned_lbp = feature_extractor.dlbp(img)
self.assertTrue(returned_lbp == 12)
img_values = numpy.array([1,1,20,1,10,10,10,10,5])
img_values = numpy.array([1,1,20,1,10,10,10,10,5])
img = get_image_3x3(img_values)
returned_lbp = feature_extractor.dlbp(img)
self.assertTrue(returned_lbp == 48)
img_values = numpy.array([1,1,1,20,10,10,10,10,5])
img_values = numpy.array([1,1,1,20,10,10,10,10,5])
img = get_image_3x3(img_values)
returned_lbp = feature_extractor.dlbp(img)
self.assertTrue(returned_lbp == 192)
......@@ -31,18 +31,18 @@ def test_lut_machine():
# test the LUT machine
machine = xbob.boosting.core.trainers.LutMachine(1, 1)
scores = machine.get_weak_scores(numpy.zeros((1,1), dtype=numpy.uint8))
scores = machine.get_weak_scores(numpy.zeros((1,1), dtype=numpy.uint16))
assert scores.shape == (1,1)
nose.tools.eq_(scores[0,0], 1)
score = machine.get_weak_score(numpy.zeros((1,), dtype=numpy.uint8))
score = machine.get_weak_score(numpy.zeros((1,), dtype=numpy.uint16))
nose.tools.eq_(scores, 1)
def test_boosted_machine():
# test the boosted machine, by adding two different machine types (doesn't usually make sense, though...)
stump_machine = xbob.boosting.core.trainers.StumpMachine(0, 1, 0)
stump_machine = xbob.boosting.core.trainers.StumpMachine(0., 1., 0)
lut_machine = xbob.boosting.core.trainers.LutMachine(1, 1)
boost_machine = xbob.boosting.core.boosting.BoostMachine()
......@@ -50,11 +50,11 @@ def test_boosted_machine():
boost_machine.add_weak_trainer(lut_machine, numpy.array([1.]))
# forward some features
scores, labels = boost_machine.classify(numpy.zeros((1,1), dtype=numpy.uint8))
scores, labels = boost_machine.classify(numpy.zeros((1,1), dtype=numpy.uint16))
assert scores.shape == (1,1)
nose.tools.eq_(scores[0,0], 2)
score = boost_machine(numpy.zeros((1,), dtype=numpy.uint8))
score = boost_machine(numpy.zeros((1,), dtype=numpy.uint16))
nose.tools.eq_(score, 2)
# write the machine to file
......@@ -66,18 +66,28 @@ def test_boosted_machine():
assert (new_machine.alpha == 1).all()
# forward some features with the new machine
scores, labels = new_machine.classify(numpy.zeros((1,1), dtype=numpy.uint8))
scores, labels = new_machine.classify(numpy.zeros((1,1), dtype=numpy.uint16))
assert scores.shape == (1,1)
nose.tools.eq_(scores[0,0], 2)
score = new_machine(numpy.zeros((1,), dtype=numpy.uint8))
score = new_machine(numpy.zeros((1,), dtype=numpy.uint16))
nose.tools.eq_(score, 2)
def test_cpp_machine():
# test the stump machine
stump = xbob.boosting.StumpMachine(0., 1., 0)
scores = numpy.ndarray((1,), numpy.float64)
stump(numpy.ones((1,1), dtype=numpy.uint16), scores)
nose.tools.eq_(scores[0], 1)
score = stump(numpy.ones((1,), dtype=numpy.float64))
nose.tools.eq_(scores, 1)
# test the LUT machine
LUT = numpy.ones((1,1), numpy.float)
indices = numpy.zeros((1,), numpy.uint64)
indices = numpy.zeros((1,), numpy.int32)
machine = xbob.boosting.LUTMachine(LUT, indices)
boosted_machine = xbob.boosting.BoostedMachine()
......@@ -92,12 +102,19 @@ def test_cpp_machine():
nose.tools.eq_(scores[0], 1)
nose.tools.eq_(labels[0], 1)
# TODO: enable when StumpMachine is implemented in C++
"""
# try to read the machine from the temp file, which was written with the python version
f = get_temp_file()
new_machine = xbob.boosting.BoostedMachine(bob.io.HDF5File(f))
assert (new_machine.alpha() == 1).all()
"""
os.remove(get_temp_file())
# forward some features with the new strong machine
score = new_machine(numpy.zeros((1,), dtype=numpy.uint16))
nose.tools.eq_(score, 2)
scores = numpy.ndarray((1,), numpy.float64)
labels = numpy.ndarray((1,), numpy.float64)
new_machine(numpy.zeros((1,1), dtype=numpy.uint16), scores, labels)
nose.tools.eq_(scores[0], 2)
......@@ -34,21 +34,21 @@ class TestLutTrainer(unittest.TestCase):
num_samples = 100
max_feature = 20
dimension_feature = 10
selected_index = 5
range_feature = max_feature
trainer = xbob.boosting.core.trainers.LutTrainer(range_feature,'indep', 1)