Commit 00cf3a29 authored by Manuel Günther's avatar Manuel Günther
Browse files

Moved C++ code into its own Library; removed dependency on Bob

parent ecf64885
......@@ -14,16 +14,11 @@ matrix:
- NUMPYSPEC===1.8.0
before_install:
- sudo add-apt-repository -y ppa:biometrics/bob
- sudo add-apt-repository -y ppa:biometrics/bob.preview
- sudo apt-get update -qq
- sudo apt-get install -qq --force-yes bob-dev
- sudo apt-get install -qq libsvm-dev
- if [ -n "${NUMPYSPEC}" ]; then sudo apt-get install -qq libatlas-dev libatlas-base-dev
liblapack-dev gfortran; fi
- sudo apt-get install -qq --force-yes libboost-all-dev libblitz1-dev libhdf5-serial-dev libsvm-dev
- if [ -n "${NUMPYSPEC}" ]; then sudo apt-get install -qq libatlas-dev libatlas-base-dev liblapack-dev gfortran; fi
- if [ -n "${NUMPYSPEC}" ]; then pip install --upgrade pip setuptools; fi
- if [ -n "${NUMPYSPEC}" ]; then pip install --find-links http://wheels.astropy.org/
--find-links http://wheels2.astropy.org/ --use-wheel numpy$NUMPYSPEC sphinx nose;
fi
- if [ -n "${NUMPYSPEC}" ]; then pip install --find-links http://wheels.astropy.org/ --find-links http://wheels2.astropy.org/ --use-wheel numpy$NUMPYSPEC sphinx nose; fi
- pip install cpp-coveralls
install:
- python bootstrap.py
......
/**
* @author Andre Anjos <andre.anjos@idiap.ch>
* @date Tue 25 Mar 2014 14:01:13 CET
*
* @brief Implementation of the SVM machine using libsvm
*
* Copyright (C) 2011-2014 Idiap Research Institute, Martigny, Switzerland
*/
#include <bob.learn.libsvm/file.h>
#include <boost/format.hpp>
#include <boost/algorithm/string.hpp>
static bool is_colon(char i) { return i == ':'; }
bob::learn::libsvm::File::File (const std::string& filename):
m_filename(filename),
m_file(m_filename.c_str()),
m_shape(0),
m_n_samples(0)
{
if (!m_file) {
boost::format s("cannot open file '%s'");
s % filename;
throw std::runtime_error(s.str());
}
//scan the whole file, gets the shape and total size
while (m_file.good()) {
//gets the next non-empty line
std::string line;
while (!line.size()) {
if (!m_file.good()) break;
std::getline(m_file, line);
boost::trim(line);
}
if (!m_file.good()) break;
int label;
size_t pos;
char separator;
double value;
size_t n_values = std::count_if(line.begin(), line.end(), is_colon);
std::istringstream iss(line);
iss >> label;
for (size_t k=0; k<n_values; ++k) {
iss >> pos >> separator >> value;
if (m_shape < pos) m_shape = pos;
}
++m_n_samples;
}
//reset the file to then begin to read it properly
m_file.clear();
m_file.seekg(0, std::ios_base::beg);
}
bob::learn::libsvm::File::~File() {
}
void bob::learn::libsvm::File::reset() {
m_file.close();
m_file.open(m_filename.c_str());
}
bool bob::learn::libsvm::File::read(int& label, blitz::Array<double,1>& values) {
if ((size_t)values.extent(0) != m_shape) {
boost::format s("file '%s' contains %d entries per sample, but you gave me an array with only %d positions");
s % m_filename % m_shape % values.extent(0);
throw std::runtime_error(s.str());
}
//read the data.
return read_(label, values);
}
bool bob::learn::libsvm::File::read_(int& label, blitz::Array<double,1>& values) {
//if the file is at the end, just raise, you should have checked
if (!m_file.good()) return false;
//gets the next non-empty line
std::string line;
while (!line.size()) {
if (!m_file.good()) return false;
std::getline(m_file, line);
boost::trim(line);
}
std::istringstream iss(line);
iss >> label;
int pos;
char separator;
double value;
values = 0; ///zero values all over as the data is sparse on the files
for (size_t k=0; k<m_shape; ++k) {
iss >> pos >> separator >> value;
values(pos-1) = value;
}
return true;
}
/**
* @author Andre Anjos <andre.anjos@idiap.ch>
* @date Sat Dec 17 14:41:56 2011 +0100
*
* @brief Implementation of the SVM machine using libsvm
*
* Copyright (C) 2011-2013 Idiap Research Institute, Martigny, Switzerland
*/
#include <bob.learn.libsvm/machine.h>
#include <sys/stat.h>
#include <boost/format.hpp>
#include <boost/filesystem.hpp>
#include <bob.core/check.h>
#include <bob.core/logging.h>
std::string bob::learn::libsvm::_tmpdir() {
const char* value = getenv("TMPDIR");
if (value)
return value;
else
return "/tmp";
}
std::string bob::learn::libsvm::_tmpfile(const std::string& extension) {
boost::filesystem::path tpl = bob::learn::libsvm::_tmpdir();
tpl /= std::string("bob_tmpfile_XXXXXX");
boost::shared_array<char> char_tpl(new char[tpl.string().size()+1]);
strcpy(char_tpl.get(), tpl.string().c_str());
#ifdef _WIN32
mktemp(char_tpl.get());
#else
int fd = mkstemp(char_tpl.get());
close(fd);
boost::filesystem::remove(char_tpl.get());
#endif
std::string res = char_tpl.get();
res += extension;
return res;
}
/**
* A wrapper, to standardize this function.
*/
static void svm_model_free(svm_model*& m) {
#if LIBSVM_VERSION >= 300
svm_free_and_destroy_model(&m);
#else
svm_destroy_model(m);
#endif
}
blitz::Array<uint8_t,1> bob::learn::libsvm::svm_pickle
(const boost::shared_ptr<svm_model> model)
{
std::string tmp_filename = bob::learn::libsvm::_tmpfile(".svm");
//save it to a temporary file
if (svm_save_model(tmp_filename.c_str(), model.get())) {
boost::format s("cannot save SVM to file `%s' while copying model");
s % tmp_filename;
throw std::runtime_error(s.str());
}
//gets total size of file
struct stat filestatus;
stat(tmp_filename.c_str(), &filestatus);
//reload the data from the file in binary format
std::ifstream binfile(tmp_filename.c_str(), std::ios::binary);
blitz::Array<uint8_t,1> buffer(filestatus.st_size);
binfile.read(reinterpret_cast<char*>(buffer.data()), filestatus.st_size);
//unlink the temporary file
boost::filesystem::remove(tmp_filename);
//finally, return the pickled data
return buffer;
}
static boost::shared_ptr<svm_model> make_model(const char* filename) {
boost::shared_ptr<svm_model> retval(svm_load_model(filename),
std::ptr_fun(svm_model_free));
#if LIBSVM_VERSION > 315
if (retval) retval->sv_indices = 0; ///< force initialization: see ticket #109
#endif
return retval;
}
/**
* Reverts the pickling process, returns the model
*/
boost::shared_ptr<svm_model> bob::learn::libsvm::svm_unpickle
(const blitz::Array<uint8_t,1>& buffer) {
std::string tmp_filename = bob::learn::libsvm::_tmpfile(".svm");
std::ofstream binfile(tmp_filename.c_str(), std::ios::binary);
binfile.write(reinterpret_cast<const char*>(buffer.data()), buffer.size());
binfile.close();
//reload the file using the appropriate libsvm loading method
boost::shared_ptr<svm_model> retval = make_model(tmp_filename.c_str());
if (!retval) {
boost::format s("cannot open model file '%s'");
s % tmp_filename;
throw std::runtime_error(s.str());
}
//unlinks the temporary file
boost::filesystem::remove(tmp_filename);
//finally, return the pickled data
return retval;
}
void bob::learn::libsvm::Machine::reset() {
//gets the expected size for the input from the SVM
m_input_size = 0;
for (int k=0; k<m_model->l; ++k) {
svm_node* end = m_model->SV[k];
while (end->index != -1) {
if (end->index > (int)m_input_size) m_input_size = end->index;
++end;
}
}
//create and reset cache
m_input_cache.reset(new svm_node[1 + m_input_size]);
m_input_sub.resize(inputSize());
m_input_sub = 0.0;
m_input_div.resize(inputSize());
m_input_div = 1.0;
}
bob::learn::libsvm::Machine::Machine(const std::string& model_file):
m_model(make_model(model_file.c_str()))
{
if (!m_model) {
boost::format s("cannot open model file '%s'");
s % model_file;
throw std::runtime_error(s.str());
}
reset();
}
bob::learn::libsvm::Machine::Machine(bob::io::base::HDF5File& config):
m_model()
{
uint64_t version = 0;
config.getAttribute(".", "version", version);
if ( (LIBSVM_VERSION/100) > (version/100) ) {
//if the major version changes... be aware!
boost::format m("SVM being loaded from `%s:%s' (created with libsvm-%d) with libsvm-%d. You may want to read the libsvm FAQ at http://www.csie.ntu.edu.tw/~cjlin/libsvm/log to check if there were format changes between these versions. If not, you can safely ignore this warning and even tell us to remove it via our bug tracker: https://github.com/idiap/bob/issues");
m % config.filename() % config.cwd() % version % LIBSVM_VERSION;
bob::core::warn << m.str() << std::endl;
}
m_model = bob::learn::libsvm::svm_unpickle(config.readArray<uint8_t,1>("svm_model"));
reset(); ///< note: has to be done before reading scaling parameters
config.readArray("input_subtract", m_input_sub);
config.readArray("input_divide", m_input_div);
}
bob::learn::libsvm::Machine::Machine(boost::shared_ptr<svm_model> model)
: m_model(model)
{
if (!m_model) {
throw std::runtime_error("null SVM model cannot be processed");
}
reset();
}
bob::learn::libsvm::Machine::~Machine() { }
bool bob::learn::libsvm::Machine::supportsProbability() const {
return svm_check_probability_model(m_model.get());
}
size_t bob::learn::libsvm::Machine::inputSize() const {
return m_input_size;
}
size_t bob::learn::libsvm::Machine::outputSize() const {
size_t retval = svm_get_nr_class(m_model.get());
return (retval == 2)? 1 : retval;
}
size_t bob::learn::libsvm::Machine::numberOfClasses() const {
return svm_get_nr_class(m_model.get());
}
int bob::learn::libsvm::Machine::classLabel(size_t i) const {
if (i >= (size_t)svm_get_nr_class(m_model.get())) {
boost::format s("request for label of class %d in SVM with %d classes is not legal");
s % (int)i % svm_get_nr_class(m_model.get());
throw std::runtime_error(s.str());
}
return m_model->label[i];
}
bob::learn::libsvm::machine_t bob::learn::libsvm::Machine::machineType() const {
return (machine_t)svm_get_svm_type(m_model.get());
}
bob::learn::libsvm::kernel_t bob::learn::libsvm::Machine::kernelType() const {
return (kernel_t)m_model->param.kernel_type;
}
int bob::learn::libsvm::Machine::polynomialDegree() const {
return m_model->param.degree;
}
double bob::learn::libsvm::Machine::gamma() const {
return m_model->param.gamma;
}
double bob::learn::libsvm::Machine::coefficient0() const {
return m_model->param.coef0;
}
void bob::learn::libsvm::Machine::setInputSubtraction(const blitz::Array<double,1>& v) {
if (inputSize() > (size_t)v.extent(0)) {
boost::format m("mismatch on the input subtraction dimension: expected a vector with **at least** %d positions, but you input %d");
m % inputSize() % v.extent(0);
throw std::runtime_error(m.str());
}
m_input_sub.reference(bob::core::array::ccopy(v));
}
void bob::learn::libsvm::Machine::setInputDivision(const blitz::Array<double,1>& v) {
if (inputSize() > (size_t)v.extent(0)) {
boost::format m("mismatch on the input division dimension: expected a vector with **at least** %d positions, but you input %d");
m % inputSize() % v.extent(0);
throw std::runtime_error(m.str());
}
m_input_div.reference(bob::core::array::ccopy(v));
}
/**
* Copies the user input to a locally pre-allocated cache. Apply normalization
* at the same occasion.
*/
static inline void copy(const blitz::Array<double,1>& input,
size_t cache_size, boost::shared_array<svm_node>& cache,
const blitz::Array<double,1>& sub, const blitz::Array<double,1>& div) {
size_t cur = 0; ///< currently used index
for (size_t k=0; k<cache_size; ++k) {
double tmp = (input(k) - sub(k))/div(k);
if (!tmp) continue;
cache[cur].index = k+1;
cache[cur].value = tmp;
++cur;
}
cache[cur].index = -1; //libsvm detects end of input if index==-1
}
int bob::learn::libsvm::Machine::predictClass_
(const blitz::Array<double,1>& input) const {
copy(input, m_input_size, m_input_cache, m_input_sub, m_input_div);
int retval = round(svm_predict(m_model.get(), m_input_cache.get()));
return retval;
}
int bob::learn::libsvm::Machine::predictClass
(const blitz::Array<double,1>& input) const {
if ((size_t)input.extent(0) < inputSize()) {
boost::format s("input for this SVM should have **at least** %d components, but you provided an array with %d elements instead");
s % inputSize() % input.extent(0);
throw std::runtime_error(s.str());
}
return predictClass_(input);
}
int bob::learn::libsvm::Machine::predictClassAndScores_
(const blitz::Array<double,1>& input,
blitz::Array<double,1>& scores) const {
copy(input, m_input_size, m_input_cache, m_input_sub, m_input_div);
#if LIBSVM_VERSION > 290
int retval = round(svm_predict_values(m_model.get(), m_input_cache.get(), scores.data()));
#else
svm_predict_values(m_model.get(), m_input_cache.get(), scores.data());
int retval = round(svm_predict(m_model.get(), m_input_cache.get()));
#endif
return retval;
}
int bob::learn::libsvm::Machine::predictClassAndScores
(const blitz::Array<double,1>& input,
blitz::Array<double,1>& scores) const {
if ((size_t)input.extent(0) < inputSize()) {
boost::format s("input for this SVM should have **at least** %d components, but you provided an array with %d elements instead");
s % inputSize() % input.extent(0);
throw std::runtime_error(s.str());
}
if (!bob::core::array::isCContiguous(scores)) {
throw std::runtime_error("scores output array should be C-style contiguous and what you provided is not");
}
size_t N = outputSize();
size_t size = N < 2 ? 1 : (N*(N-1))/2;
if ((size_t)scores.extent(0) != size) {
boost::format s("output scores for this SVM (%d classes) should have %d components, but you provided an array with %d elements instead");
s % svm_get_nr_class(m_model.get()) % size % scores.extent(0);
throw std::runtime_error(s.str());
}
return predictClassAndScores_(input, scores);
}
int bob::learn::libsvm::Machine::predictClassAndProbabilities_
(const blitz::Array<double,1>& input,
blitz::Array<double,1>& probabilities) const {
copy(input, m_input_size, m_input_cache, m_input_sub, m_input_div);
int retval = round(svm_predict_probability(m_model.get(), m_input_cache.get(), probabilities.data()));
return retval;
}
int bob::learn::libsvm::Machine::predictClassAndProbabilities
(const blitz::Array<double,1>& input,
blitz::Array<double,1>& probabilities) const {
if ((size_t)input.extent(0) < inputSize()) {
boost::format s("input for this SVM should have **at least** %d components, but you provided an array with %d elements instead");
s % inputSize() % input.extent(0);
throw std::runtime_error(s.str());
}
if (!supportsProbability()) {
throw std::runtime_error("this SVM does not support probabilities");
}
if (!bob::core::array::isCContiguous(probabilities)) {
throw std::runtime_error("probabilities output array should be C-style contiguous and what you provided is not");
}
if ((size_t)probabilities.extent(0) != outputSize()) {
boost::format s("output probabilities for this SVM should have %d components, but you provided an array with %d elements instead");
s % outputSize() % probabilities.extent(0);
throw std::runtime_error(s.str());
}
return predictClassAndProbabilities_(input, probabilities);
}
void bob::learn::libsvm::Machine::save(const std::string& filename) const {
if (svm_save_model(filename.c_str(), m_model.get())) {
boost::format s("cannot save SVM model to file '%s'");
s % filename;
throw std::runtime_error(s.str());
}
}
void bob::learn::libsvm::Machine::save(bob::io::base::HDF5File& config) const {
config.setArray("svm_model", bob::learn::libsvm::svm_pickle(m_model));
config.setArray("input_subtract", m_input_sub);
config.setArray("input_divide", m_input_div);
uint64_t version = LIBSVM_VERSION;
config.setAttribute(".", "version", version);
}
/**
* @author Andre Anjos <andre.anjos@idiap.ch>
* @date Sun 4 Mar 10:02:45 2012 CET
*
* @brief Implementation of the SVM training methods
*
* Copyright (C) 2011-2014 Idiap Research Institute, Martigny, Switzerland
*/
#include <bob.learn.libsvm/trainer.h>
#include <boost/format.hpp>
#include <boost/make_shared.hpp>
#include <bob.core/logging.h>
#ifdef BOB_DEBUG
//remove newline
#include <boost/algorithm/string/trim.hpp>
static std::string strip(const char* s) {
std::string t(s);
boost::algorithm::trim(t);
return t;
}
#endif
static void debug_libsvm(const char* s) {
TDEBUG1("[libsvm-" << LIBSVM_VERSION << "] " << strip(s));
}
bob::learn::libsvm::Trainer::Trainer(
bob::learn::libsvm::machine_t machine_type,
bob::learn::libsvm::kernel_t kernel_type,
double cache_size,
double eps,
bool shrinking,
bool probability
)
{
m_param.svm_type = machine_type;
m_param.kernel_type = kernel_type;
m_param.degree = 3;
m_param.gamma = 0.;
m_param.coef0 = 0.;
m_param.cache_size = cache_size;
m_param.eps = eps;
m_param.C = 1;
m_param.nu = 0.5;
m_param.p = 0.1;
m_param.shrinking = shrinking;
m_param.probability = probability;
//extracted from the data
m_param.nr_weight = 0;
m_param.weight_label = 0;
m_param.weight = 0;
}
bob::learn::libsvm::Trainer::~Trainer() { }
/**
* Erases an SVM problem:
*
* struct svm_problem {
* int l; //number of entries
* double* y; //labels
* svm_node** x; //each set terminated with a -1 index entry
* };
*
* At svm-train the nodes for each entry are allocated globally, what is
* probably more efficient from the allocation perspective. It still requires
* libsvm to scan the data twice to understand how many nodes need to be
* allocated globally.
*/
static void delete_problem(svm_problem* p) {
delete[] p->y; //all labels
delete[] p->x[0]; //all entries
delete[] p->x; //entry pointers
delete p;
}
/**
* Allocates an svm_problem matrix
*/
static svm_problem* new_problem(size_t entries) {
svm_problem* retval = new svm_problem;
retval->l = (int)entries;
retval->y = new double[entries];
typedef svm_node* svm_node_ptr;
retval->x = new svm_node_ptr[entries];
for (size_t k=0; k<entries; ++k) retval->x[k] = 0;
return retval;
}
/**
* Converts the input arrayset data into an svm_problem matrix, used by libsvm
* training routines. Updates "gamma" at the svm_parameter's.
*/
static boost::shared_ptr<