Commit 3321f768 authored by André Anjos's avatar André Anjos 💬

Move-in C++ code from Bob; Add 3 more trainers

parent 48366db6
......@@ -201,7 +201,7 @@ latex_font_size = '10pt'
# Grouping the document tree into LaTeX files. List of tuples
# (source start file, target name, title, author, documentclass [howto/manual]).
latex_documents = [
('index', 'xbob_learn_linear.tex', u'Bob Machines',
('index', 'xbob_learn_linear.tex', u'Bob Linear Machines and Trainers',
u'Biometrics Group, Idiap Research Institute', 'manual'),
]
......@@ -241,7 +241,7 @@ rst_epilog = """
# One entry per manual page. List of tuples
# (source start file, name, description, authors, manual section).
man_pages = [
('index', 'xbob_learn_linear', u'Bob Machines Documentation', [u'Idiap Research Institute'], 1)
('index', 'xbob_learn_linear', u'Bob Linear Machines and Trainers', [u'Idiap Research Institute'], 1)
]
# Default processing flags for sphinx
......
......@@ -2,17 +2,25 @@
.. Andre Anjos <andre.anjos@idiap.ch>
.. Fri 13 Dec 2013 12:50:06 CET
..
.. Copyright (C) 2011-2013 Idiap Research Institute, Martigny, Switzerland
.. Copyright (C) 2011-2014 Idiap Research Institute, Martigny, Switzerland
==============
Bob Machines
==============
==================================
Bob Linear Machines and Trainers
==================================
.. todolist::
This module contains base functionality from Bob bound to Python, available in
the C++ counter-part ``bob::machine``. It includes machines from our Machine
Learning core.
This package includes the definition of a linear machine, which is capable of
either projecting the input data into maximally spread representations,
linearly, or providing linear separation planes for multi-class data samples.
The package includes the machine definition *per se* and a selection of
different trainers for specialized purposes:
* Principal Component Analysis
* Fisher's Linear Discriminant Analysis
* (Conjugate Gradient) Logistic Regression
* Whitening filter
* Within-class covariance normalization (WCCN)
Documentation
-------------
......
......@@ -19,7 +19,7 @@ include_dirs = [
xbob.learn.activation.get_include()
]
packages = ['bob-machine >= 1.2.2', 'bob-trainer >= 1.2.2']
packages = ['bob-machine >= 2.0.0a2', 'bob-io >= 2.0.0a2']
version = '2.0.0a0'
setup(
......@@ -64,6 +64,12 @@ setup(
"xbob/learn/linear/pca.cpp",
"xbob/learn/linear/lda.cpp",
"xbob/learn/linear/main.cpp",
"xbob/learn/linear/cpp/machine.cpp",
"xbob/learn/linear/cpp/pca.cpp",
"xbob/learn/linear/cpp/lda.cpp",
"xbob/learn/linear/cpp/logreg.cpp",
"xbob/learn/linear/cpp/wccn.cpp",
"xbob/learn/linear/cpp/whitening.cpp",
],
packages = packages,
include_dirs = include_dirs,
......
/**
* @author Laurent El Shafey <Laurent.El-Shafey@idiap.ch>
* @date Sat Jun 4 21:38:59 2011 +0200
*
* @brief Implements a multi-class Fisher/LDA linear machine Training using
* Singular Value Decomposition (SVD). For more information on Linear Machines
* and associated methods, please consult Bishop, Machine Learning and Pattern
* Recognition chapter 4.
*
* Copyright (C) Idiap Research Institute, Martigny, Switzerland
*/
#include <boost/format.hpp>
#include <bob/math/pinv.h>
#include <bob/math/eig.h>
#include <bob/math/linear.h>
#include <bob/math/stats.h>
#include <xbob.learn.linear/lda.h>
namespace bob { namespace learn { namespace linear {
FisherLDATrainer::FisherLDATrainer
(bool use_pinv, bool strip_to_rank)
: m_use_pinv(use_pinv),
m_strip_to_rank(strip_to_rank)
{
}
FisherLDATrainer::FisherLDATrainer
(const FisherLDATrainer& other)
: m_use_pinv(other.m_use_pinv),
m_strip_to_rank(other.m_strip_to_rank)
{
}
FisherLDATrainer::~FisherLDATrainer()
{
}
FisherLDATrainer& FisherLDATrainer::operator=
(const FisherLDATrainer& other)
{
if (this != &other) // avoid auto assignment
{
m_use_pinv = other.m_use_pinv;
m_strip_to_rank = other.m_strip_to_rank;
}
return *this;
}
bool FisherLDATrainer::operator==
(const FisherLDATrainer& other) const
{
return m_use_pinv == other.m_use_pinv && \
m_strip_to_rank == other.m_strip_to_rank;
}
bool FisherLDATrainer::operator!=
(const FisherLDATrainer& other) const
{
return !(this->operator==(other));
}
bool FisherLDATrainer::is_similar_to
(const FisherLDATrainer& other, const double r_epsilon,
const double a_epsilon) const
{
return this->operator==(other);
}
/**
* Returns the indexes for sorting a given blitz::Array<double,1>
*/
struct compare_1d_blitz {
const blitz::Array<double,1>& v_;
compare_1d_blitz(const blitz::Array<double,1>& v): v_(v) { }
bool operator() (size_t i, size_t j) { return v_(i) < v_(j); }
};
static std::vector<size_t> sort_indexes(const blitz::Array<double,1>& v) {
// initialize original index locations
std::vector<size_t> idx(v.size());
for (size_t i = 0; i != idx.size(); ++i) idx[i] = i;
// sort indexes based on comparing values in v
std::sort(idx.begin(), idx.end(), compare_1d_blitz(v));
return idx;
}
void FisherLDATrainer::train
(Machine& machine, blitz::Array<double,1>& eigen_values,
const std::vector<blitz::Array<double, 2> >& data) const
{
// if #classes < 2, then throw
if (data.size() < 2) {
boost::format m("The number of arrays in the input data == %d whereas for LDA you should provide at least 2");
m % data.size();
throw std::runtime_error(m.str());
}
// checks for arrayset data type and shape once
int n_features = data[0].extent(1);
for (size_t cl=0; cl<data.size(); ++cl) {
if (data[cl].extent(1) != n_features) {
boost::format m("The number of features/columns (%d) in array at position %d of your input differs from that of array at position 0 (%d)");
m % data[cl].extent(1) % n_features;
throw std::runtime_error(m.str());
}
}
int osize = output_size(data);
// Checks that the dimensions are matching
if (machine.inputSize() != (size_t)data[0].extent(1)) {
boost::format m("Number of features at input data set (%d columns) does not match machine input size (%d)");
m % data[0].extent(1) % machine.inputSize();
throw std::runtime_error(m.str());
}
if (machine.outputSize() != (size_t)osize) {
boost::format m("Number of outputs of the given machine (%d) does not match the expected number of outputs calculated by this trainer = %d");
m % machine.outputSize() % osize;
throw std::runtime_error(m.str());
}
if (eigen_values.extent(0) != osize) {
boost::format m("Number of eigenvalues on the given 1D array (%d) does not match the expected number of outputs calculated by this trainer = %d");
m % eigen_values.extent(0) % osize;
throw std::runtime_error(m.str());
}
blitz::Array<double,1> preMean(n_features);
blitz::Array<double,2> Sw(n_features, n_features);
blitz::Array<double,2> Sb(n_features, n_features);
bob::math::scatters_(data, Sw, Sb, preMean);
// computes the generalized eigenvalue decomposition
// so to find the eigen vectors/values of Sw^(-1) * Sb
blitz::Array<double,2> V(Sw.shape());
blitz::Array<double,1> eigen_values_(n_features);
if (m_use_pinv) {
//note: misuse V and Sw as temporary place holders for data
bob::math::pinv_(Sw, V); //V now contains Sw^-1
bob::math::prod_(V, Sb, Sw); //Sw now contains Sw^-1*Sb
blitz::Array<std::complex<double>,1> Dtemp(eigen_values_.shape());
blitz::Array<std::complex<double>,2> Vtemp(V.shape());
bob::math::eig_(Sw, Vtemp, Dtemp); //V now contains eigen-vectors
//sorting: we know this problem on has real eigen-values
blitz::Range a = blitz::Range::all();
blitz::Array<double,1> Dunordered(blitz::real(Dtemp));
std::vector<size_t> order = sort_indexes(Dunordered);
for (int i=0; i<n_features; ++i) {
eigen_values_(i) = Dunordered(order[i]);
V(a,i) = blitz::real(Vtemp(a,order[i]));
}
}
else {
bob::math::eigSym_(Sb, Sw, V, eigen_values_);
}
// Convert ascending order to descending order
eigen_values_.reverseSelf(0);
V.reverseSelf(1);
// limit the dimensions of the resulting projection matrix and eigen values
eigen_values = eigen_values_(blitz::Range(0,osize-1));
V.resizeAndPreserve(V.extent(0), osize);
// normalizes the eigen vectors so they have unit length
blitz::Range a = blitz::Range::all();
for (int column=0; column<V.extent(1); ++column) {
math::normalizeSelf(V(a,column));
}
// updates the machine
machine.setWeights(V);
machine.setInputSubtraction(preMean);
// also set input_div and biases to neutral values...
machine.setInputDivision(1.0);
machine.setBiases(0.0);
}
void FisherLDATrainer::train(Machine& machine,
const std::vector<blitz::Array<double,2> >& data) const {
blitz::Array<double,1> throw_away(output_size(data));
train(machine, throw_away, data);
}
size_t FisherLDATrainer::output_size(const std::vector<blitz::Array<double,2> >& data) const {
return m_strip_to_rank ? std::min(data.size()-1, (size_t)data[0].extent(1)) : data[0].extent(1);
}
}}}
/**
* @date Sat Sep 1 19:26:00 2012 +0100
* @author Laurent El Shafey <laurent.el-shafey@idiap.ch>
*
* Copyright (C) Idiap Research Institute, Martigny, Switzerland
*/
#include <bob/core/logging.h>
#include <bob/math/linear.h>
#include <limits>
#include <xbob.learn.linear/logreg.h>
namespace bob { namespace learn { namespace linear {
CGLogRegTrainer::CGLogRegTrainer(const double prior,
const double convergence_threshold, const size_t max_iterations,
const double lambda, const bool mean_std_norm):
m_prior(prior),
m_convergence_threshold(convergence_threshold),
m_max_iterations(max_iterations),
m_lambda(lambda),
m_mean_std_norm(mean_std_norm)
{
if(prior<=0. || prior>=1.)
{
boost::format m("Prior (%f) not in the range ]0,1[.");
m % prior;
throw std::runtime_error(m.str());
}
}
CGLogRegTrainer::CGLogRegTrainer(const CGLogRegTrainer& other):
m_prior(other.m_prior),
m_convergence_threshold(other.m_convergence_threshold),
m_max_iterations(other.m_max_iterations),
m_lambda(other.m_lambda),
m_mean_std_norm(other.m_mean_std_norm)
{
}
CGLogRegTrainer::~CGLogRegTrainer() {}
CGLogRegTrainer& CGLogRegTrainer::operator=
(const CGLogRegTrainer& other)
{
if(this != &other)
{
m_prior = other.m_prior;
m_convergence_threshold = other.m_convergence_threshold;
m_max_iterations = other.m_max_iterations;
m_lambda = other.m_lambda;
m_mean_std_norm = other.m_mean_std_norm;
}
return *this;
}
bool CGLogRegTrainer::operator==(const CGLogRegTrainer& b) const {
return (this->m_prior == b.m_prior &&
this->m_convergence_threshold == b.m_convergence_threshold &&
this->m_max_iterations == b.m_max_iterations &&
this->m_lambda == b.m_lambda &&
this->m_mean_std_norm == b.m_mean_std_norm);
}
bool CGLogRegTrainer::operator!=(const CGLogRegTrainer& b) const {
return !(this->operator==(b));
}
void CGLogRegTrainer::train(Machine& machine, const blitz::Array<double,2>& negatives, const blitz::Array<double,2>& positives) const {
// Checks for arraysets data type and shape once
bob::core::array::assertSameDimensionLength(negatives.extent(1), positives.extent(1));
// Data is checked now and conforms, just proceed w/o any further checks.
size_t n_samples1 = positives.extent(0);
size_t n_samples2 = negatives.extent(0);
size_t n_samples = n_samples1 + n_samples2;
size_t n_features = positives.extent(1);
// Defines useful ranges
blitz::Range rall = blitz::Range::all();
blitz::Range rd = blitz::Range(0,n_features-1);
blitz::Range r1 = blitz::Range(0,n_samples1-1);
blitz::Range r2 = blitz::Range(n_samples1,n_samples-1);
// indices for blitz iterations
blitz::firstIndex i;
blitz::secondIndex j;
blitz::Array<double,1> mean(n_features);
blitz::Array<double,1> std_dev(n_features);
// mean and variance of the training data
if (m_mean_std_norm){
// collect all samples in one matrix
blitz::Array<double,2> all_data(n_samples,n_features);
for(size_t i=0; i<n_samples1; ++i)
all_data(i,rall) = positives(i,rall);
for(size_t i=0; i<n_samples2; ++i)
all_data(i+n_samples1, rall) = negatives(i,rall);
// compute mean and std-dev from samples
mean = blitz::mean(all_data(j,i), j);
blitz::Array<double,2> squared(blitz::pow2(all_data));
std_dev = blitz::sqrt((blitz::sum(squared(j,i), j) - n_samples*blitz::pow2(mean)) / n_samples);
} else {
mean = 0.;
std_dev = 1.;
}
// Creates a large blitz::Array containing the samples
// x = |positives - negatives|, of size (n_features+1,n_samples1+n_samples2)
// |1. -1. |
blitz::Array<double,2> x(n_features+1, n_samples);
x(n_features,r1) = 1.;
x(n_features,r2) = -1.;
for(size_t i=0; i<n_samples1; ++i)
x(rd,i) = (positives(i,rall) - mean(rall)) / std_dev(rall);
for(size_t i=0; i<n_samples2; ++i)
x(rd,i+n_samples1) = -(negatives(i,rall) - mean(rall)) / std_dev(rall);
// Ratio between the two classes and weights vector
double prop = (double)n_samples1 / (double)n_samples;
blitz::Array<double,1> weights(n_samples);
weights(r1) = m_prior / prop;
weights(r2) = (1.-m_prior) / (1.-prop);
// Initializes offset vector
blitz::Array<double,1> offset(n_samples);
const double logit = log(m_prior/(1.-m_prior));
offset(r1) = logit;
offset(r2) = -logit;
// Initializes gradient and w vectors
blitz::Array<double,1> g_old(n_features+1);
blitz::Array<double,1> w_old(n_features+1);
blitz::Array<double,1> g(n_features+1);
blitz::Array<double,1> w(n_features+1);
g_old = 0.;
w_old = 0.;
g = 0.;
w = 0.;
// Initialize working arrays
blitz::Array<double,1> s1(n_samples);
blitz::Array<double,1> u(n_features+1);
blitz::Array<double,1> tmp_n(n_samples);
blitz::Array<double,1> tmp_d(n_features+1);
// Iterates...
static const double ten_epsilon = 10*std::numeric_limits<double>::epsilon();
for(size_t iter=0; ; ++iter)
{
// 1. Computes the non-weighted version of the likelihood
// s1 = sum_{i=1}^{n}(1./(1.+exp(-y_i (w^T x_i + logit))
// where - the x blitz::Array contains -y_i x_i values
// - the offset blitz::Array contains -y_i logit values
s1 = 1. / (1. + blitz::exp(blitz::sum(w(j)*x(j,i), j) + offset));
// 2. Likelihood weighted by the prior/proportion
tmp_n = s1 * weights;
// 3. Gradient g of this weighted likelihood wrt. the weight vector w
bob::math::prod(x, tmp_n, g);
g -= m_lambda * w; // Regularization
// 4. Conjugate gradient step
if(iter == 0)
u = g;
else
{
tmp_d = (g-g_old);
double den = blitz::sum(u * tmp_d);
if(den == 0)
u = 0.;
else
{
// Hestenes-Stiefel formula: Heuristic to set the scale factor beta
// (chosen as it works well in practice)
// beta = g^t(g-g_old) / (u_old^T (g - g_old))
double beta = blitz::sum(tmp_d * g) / den;
u = g - beta * u;
}
}
// 5. Line search along the direction u
// a. Compute ux
bob::math::prod(u,x,tmp_n);
// b. Compute u^T H u
// = sum_{i} weights(i) sigmoid(w^T x_i) [1-sigmoid(w^T x_i)] (u^T x_i) + lambda u^T u
double uhu = blitz::sum(blitz::pow2(tmp_n) * weights * s1 * (1.-s1)) + m_lambda*blitz::sum(blitz::pow2(u));
// Terminates if uhu is close to zero
if(fabs(uhu) < ten_epsilon)
{
bob::core::info << "# CGLogReg Training terminated: convergence after " << iter << " iterations (u^T H u == 0)." << std::endl;
break;
}
// c. Compute w = w_old - (g^T u)/(u^T H u) u
w = w + blitz::sum(u*g) / uhu * u;
// Terminates if convergence has been reached
if(blitz::max(blitz::fabs(w-w_old)) <= m_convergence_threshold)
{
bob::core::info << "# CGLogReg Training terminated: convergence after " << iter << " iterations." << std::endl;
break;
}
// Terminates if maximum number of iterations has been reached
if(m_max_iterations > 0 && iter+1 >= m_max_iterations)
{
bob::core::info << "# CGLogReg terminated: maximum number of iterations (" << m_max_iterations << ") reached." << std::endl;
break;
}
// Backup previous values
g_old = g;
w_old = w;
}
// Updates the LinearMachine
machine.resize(n_features, 1);
machine.setInputSubtraction(mean);
machine.setInputDivision(std_dev);
blitz::Array<double,2>& w_ = machine.updateWeights();
w_(rall,0) = w(rd); // Weights: first D values
machine.setBiases(w(n_features)); // Bias: D+1 value
}
}}}
/**
* @date Tue May 31 09:22:33 2011 +0200
* @author Andre Anjos <andre.anjos@idiap.ch>
*
* @brief Implements a Machine
*
* Copyright (C) Idiap Research Institute, Martigny, Switzerland
*/
#include <cmath>
#include <boost/make_shared.hpp>
#include <boost/format.hpp>
#include <bob/core/array_copy.h>
#include <bob/math/linear.h>
#include <xbob.learn.linear/machine.h>
namespace bob { namespace learn { namespace linear {
Machine::Machine(const blitz::Array<double,2>& weight)
: m_input_sub(weight.extent(0)),
m_input_div(weight.extent(0)),
m_bias(weight.extent(1)),
m_activation(boost::make_shared<bob::machine::IdentityActivation>()),
m_buffer(weight.extent(0))
{
m_input_sub = 0.0;
m_input_div = 1.0;
m_bias = 0.0;
m_weight.reference(bob::core::array::ccopy(weight));
}
Machine::Machine():
m_input_sub(0),
m_input_div(0),
m_weight(0, 0),
m_bias(0),
m_activation(boost::make_shared<bob::machine::IdentityActivation>()),
m_buffer(0)
{
}
Machine::Machine(size_t n_input, size_t n_output):
m_input_sub(n_input),
m_input_div(n_input),
m_weight(n_input, n_output),
m_bias(n_output),
m_activation(boost::make_shared<bob::machine::IdentityActivation>()),
m_buffer(n_input)
{
m_input_sub = 0.0;
m_input_div = 1.0;
m_weight = 0.0;
m_bias = 0.0;
}
Machine::Machine(const Machine& other):
m_input_sub(bob::core::array::ccopy(other.m_input_sub)),
m_input_div(bob::core::array::ccopy(other.m_input_div)),
m_weight(bob::core::array::ccopy(other.m_weight)),
m_bias(bob::core::array::ccopy(other.m_bias)),
m_activation(other.m_activation),
m_buffer(m_input_sub.shape())
{
}
Machine::Machine (bob::io::HDF5File& config) {
load(config);
}
Machine::~Machine() {}
Machine& Machine::operator=
(const Machine& other) {
if(this != &other)
{
m_input_sub.reference(bob::core::array::ccopy(other.m_input_sub));
m_input_div.reference(bob::core::array::ccopy(other.m_input_div));
m_weight.reference(bob::core::array::ccopy(other.m_weight));
m_bias.reference(bob::core::array::ccopy(other.m_bias));
m_activation = other.m_activation;
m_buffer.resize(m_input_sub.shape());
}
return *this;
}
bool Machine::operator==(const Machine& b) const {
return (bob::core::array::isEqual(m_input_sub, b.m_input_sub) &&
bob::core::array::isEqual(m_input_div, b.m_input_div) &&
bob::core::array::isEqual(m_weight, b.m_weight) &&
bob::core::array::isEqual(m_bias, b.m_bias) &&
m_activation->str() == b.m_activation->str());
}
bool Machine::operator!=(const Machine& b) const {
return !(this->operator==(b));
}
bool Machine::is_similar_to(const Machine& b, const double r_epsilon, const double a_epsilon) const {
return (bob::core::array::isClose(m_input_sub, b.m_input_sub, r_epsilon, a_epsilon) &&
bob::core::array::isClose(m_input_div, b.m_input_div, r_epsilon, a_epsilon) &&
bob::core::array::isClose(m_weight, b.m_weight, r_epsilon, a_epsilon) &&
bob::core::array::isClose(m_bias, b.m_bias, r_epsilon, a_epsilon) &&
m_activation->str() == b.m_activation->str());
}
void Machine::load (bob::io::HDF5File& config) {
//reads all data directly into the member variables
m_input_sub.reference(config.readArray<double,1>("input_sub"));
m_input_div.reference(config.readArray<double,1>("input_div"));
m_weight.reference(config.readArray<double,2>("weights"));
m_bias.reference(config.readArray<double,1>("biases"));
m_buffer.resize(m_input_sub.extent(0));
//switch between different versions - support for version 1
if (config.hasAttribute(".", "version")) { //new version
config.cd("activation");
m_activation = bob::machine::load_activation(config);
config.cd("..");
}
else { //old version
uint32_t act = config.read<uint32_t>("activation");
m_activation = bob::machine::make_deprecated_activation(act);
}
}
void Machine::resize (size_t input, size_t output) {
m_input_sub.resizeAndPreserve(input);
m_input_div.resizeAndPreserve(input);
m_buffer.resizeAndPreserve(input);
m_weight.resizeAndPreserve(input, output);
m_bias.resizeAndPreserve(output);
}
void Machine::save (bob::io::HDF5File& config) const {
config.setAttribute(".", "version", 1);
config.setArray("input_sub", m_input_sub);
config.setArray("input_div", m_input_div);
config.setArray("weights", m_weight);
config.setArray("biases", m_bias);
config.createGroup("activation");
config.cd("activation");
m_activation->save(config);
config.cd("..");
}
void Machine::forward_ (const blitz::Array<double,1>& input, blitz::Array<double,1>& output) const {
m_buffer = (input - m_input_sub) / m_input_div;
bob::math::prod_(m_buffer, m_weight, output);
for (int i=0; i<m_weight.extent(1); ++i)
output(i) = m_activation->f(output(i) + m_bias(i));
}
void Machine::forward (const blitz::Array<double,1>& input, blitz::Array<double,1>& output) const {
if (m_weight.extent(0) != input.extent(0)) { //checks input dimension
boost::format m("mismatch on the input dimension: expected a vector of size %d, but you input one with size = %d instead");
m % m_weight.extent(0) % input.extent(0);
throw std::runtime_error(m.str());
}
if (m_weight.extent(1) != output.extent(0)) { //checks output dimension
boost::format m("mismatch on the output dimension: expected a vector of size %d, but you input one with size = %d instead");
m % m_weight.extent(1) % output.extent(0);
throw std::runtime_error(m.str());
}
forward_(input, output);
}
void Machine::setWeights (const blitz::Array<double,2>& weight) {
if (weight.extent(0) != m_input_sub.extent(0)) { //checks 1st dimension
boost::format m("mismatch on the weight shape (number of rows): expected a weight matrix with %d row(s), but you input one with %d row(s) instead");
m % m_input_sub.extent(0) % weight.extent(0);
throw std::runtime_error(m.str());
}
if (weight.extent(1) != m_bias.extent(0)) { //checks 2nd dimension
boost::format m("mismatch on the weight shape (number of columns): expected a weight matrix with %d column(s), but you input one with %d column(s) instead");