Commit 00cf3a29 authored by Manuel Günther's avatar Manuel Günther

Moved C++ code into its own Library; removed dependency on Bob

parent ecf64885
......@@ -14,16 +14,11 @@ matrix:
- NUMPYSPEC===1.8.0
before_install:
- sudo add-apt-repository -y ppa:biometrics/bob
- sudo add-apt-repository -y ppa:biometrics/bob.preview
- sudo apt-get update -qq
- sudo apt-get install -qq --force-yes bob-dev
- sudo apt-get install -qq libsvm-dev
- if [ -n "${NUMPYSPEC}" ]; then sudo apt-get install -qq libatlas-dev libatlas-base-dev
liblapack-dev gfortran; fi
- sudo apt-get install -qq --force-yes libboost-all-dev libblitz1-dev libhdf5-serial-dev libsvm-dev
- if [ -n "${NUMPYSPEC}" ]; then sudo apt-get install -qq libatlas-dev libatlas-base-dev liblapack-dev gfortran; fi
- if [ -n "${NUMPYSPEC}" ]; then pip install --upgrade pip setuptools; fi
- if [ -n "${NUMPYSPEC}" ]; then pip install --find-links http://wheels.astropy.org/
--find-links http://wheels2.astropy.org/ --use-wheel numpy$NUMPYSPEC sphinx nose;
fi
- if [ -n "${NUMPYSPEC}" ]; then pip install --find-links http://wheels.astropy.org/ --find-links http://wheels2.astropy.org/ --use-wheel numpy$NUMPYSPEC sphinx nose; fi
- pip install cpp-coveralls
install:
- python bootstrap.py
......
/**
* @author Andre Anjos <andre.anjos@idiap.ch>
* @date Tue 25 Mar 2014 14:01:13 CET
*
* @brief Implementation of the SVM machine using libsvm
*
* Copyright (C) 2011-2014 Idiap Research Institute, Martigny, Switzerland
*/
#include <bob.learn.libsvm/file.h>
#include <boost/format.hpp>
#include <boost/algorithm/string.hpp>
static bool is_colon(char i) { return i == ':'; }
bob::learn::libsvm::File::File (const std::string& filename):
m_filename(filename),
m_file(m_filename.c_str()),
m_shape(0),
m_n_samples(0)
{
if (!m_file) {
boost::format s("cannot open file '%s'");
s % filename;
throw std::runtime_error(s.str());
}
//scan the whole file, gets the shape and total size
while (m_file.good()) {
//gets the next non-empty line
std::string line;
while (!line.size()) {
if (!m_file.good()) break;
std::getline(m_file, line);
boost::trim(line);
}
if (!m_file.good()) break;
int label;
size_t pos;
char separator;
double value;
size_t n_values = std::count_if(line.begin(), line.end(), is_colon);
std::istringstream iss(line);
iss >> label;
for (size_t k=0; k<n_values; ++k) {
iss >> pos >> separator >> value;
if (m_shape < pos) m_shape = pos;
}
++m_n_samples;
}
//reset the file to then begin to read it properly
m_file.clear();
m_file.seekg(0, std::ios_base::beg);
}
bob::learn::libsvm::File::~File() {
}
void bob::learn::libsvm::File::reset() {
m_file.close();
m_file.open(m_filename.c_str());
}
bool bob::learn::libsvm::File::read(int& label, blitz::Array<double,1>& values) {
if ((size_t)values.extent(0) != m_shape) {
boost::format s("file '%s' contains %d entries per sample, but you gave me an array with only %d positions");
s % m_filename % m_shape % values.extent(0);
throw std::runtime_error(s.str());
}
//read the data.
return read_(label, values);
}
bool bob::learn::libsvm::File::read_(int& label, blitz::Array<double,1>& values) {
//if the file is at the end, just raise, you should have checked
if (!m_file.good()) return false;
//gets the next non-empty line
std::string line;
while (!line.size()) {
if (!m_file.good()) return false;
std::getline(m_file, line);
boost::trim(line);
}
std::istringstream iss(line);
iss >> label;
int pos;
char separator;
double value;
values = 0; ///zero values all over as the data is sparse on the files
for (size_t k=0; k<m_shape; ++k) {
iss >> pos >> separator >> value;
values(pos-1) = value;
}
return true;
}
This diff is collapsed.
/**
* @author Andre Anjos <andre.anjos@idiap.ch>
* @date Sun 4 Mar 10:02:45 2012 CET
*
* @brief Implementation of the SVM training methods
*
* Copyright (C) 2011-2014 Idiap Research Institute, Martigny, Switzerland
*/
#include <bob.learn.libsvm/trainer.h>
#include <boost/format.hpp>
#include <boost/make_shared.hpp>
#include <bob.core/logging.h>
#ifdef BOB_DEBUG
//remove newline
#include <boost/algorithm/string/trim.hpp>
static std::string strip(const char* s) {
std::string t(s);
boost::algorithm::trim(t);
return t;
}
#endif
static void debug_libsvm(const char* s) {
TDEBUG1("[libsvm-" << LIBSVM_VERSION << "] " << strip(s));
}
bob::learn::libsvm::Trainer::Trainer(
bob::learn::libsvm::machine_t machine_type,
bob::learn::libsvm::kernel_t kernel_type,
double cache_size,
double eps,
bool shrinking,
bool probability
)
{
m_param.svm_type = machine_type;
m_param.kernel_type = kernel_type;
m_param.degree = 3;
m_param.gamma = 0.;
m_param.coef0 = 0.;
m_param.cache_size = cache_size;
m_param.eps = eps;
m_param.C = 1;
m_param.nu = 0.5;
m_param.p = 0.1;
m_param.shrinking = shrinking;
m_param.probability = probability;
//extracted from the data
m_param.nr_weight = 0;
m_param.weight_label = 0;
m_param.weight = 0;
}
bob::learn::libsvm::Trainer::~Trainer() { }
/**
* Erases an SVM problem:
*
* struct svm_problem {
* int l; //number of entries
* double* y; //labels
* svm_node** x; //each set terminated with a -1 index entry
* };
*
* At svm-train the nodes for each entry are allocated globally, what is
* probably more efficient from the allocation perspective. It still requires
* libsvm to scan the data twice to understand how many nodes need to be
* allocated globally.
*/
static void delete_problem(svm_problem* p) {
delete[] p->y; //all labels
delete[] p->x[0]; //all entries
delete[] p->x; //entry pointers
delete p;
}
/**
* Allocates an svm_problem matrix
*/
static svm_problem* new_problem(size_t entries) {
svm_problem* retval = new svm_problem;
retval->l = (int)entries;
retval->y = new double[entries];
typedef svm_node* svm_node_ptr;
retval->x = new svm_node_ptr[entries];
for (size_t k=0; k<entries; ++k) retval->x[k] = 0;
return retval;
}
/**
* Converts the input arrayset data into an svm_problem matrix, used by libsvm
* training routines. Updates "gamma" at the svm_parameter's.
*/
static boost::shared_ptr<svm_problem> data2problem
(const std::vector<blitz::Array<double, 2> >& data,
const blitz::Array<double,1>& sub, const blitz::Array<double,1>& div,
svm_parameter& param) {
//counts the number of samples required
size_t entries = 0;
for (size_t k=0; k<data.size(); ++k)
entries += data[k].extent(blitz::firstDim);
//allocates the container that will represent the problem; at this stage, we
//allocate entries for each vector, but not the space in which feature will
//be put at. This will come next.
boost::shared_ptr<svm_problem> problem(new_problem(entries),
std::ptr_fun(delete_problem));
//choose labels.
if ((data.size() <= 1) | (data.size() > 16)) {
boost::format m("Only supports SVMs for binary or multi-class classification problems (up to 16 classes). You passed me a list of %d arraysets.");
m % data.size();
throw std::runtime_error(m.str());
}
std::vector<double> labels;
labels.reserve(data.size());
if (data.size() == 2) {
//keep libsvm ordering
labels.push_back(+1.);
labels.push_back(-1.);
}
else { //data.size() == 3, 4, ..., 16
for (size_t k=0; k<data.size(); ++k) labels.push_back(k+1);
}
//just count how many nodes we need; unfortunately we have no other choice
//than doing a 2-pass instantiation here as libsvm has a very weird way to
//optimize data access in which it requires all nodes to be allocated in a
//single shot.
size_t nodes = 0; //total number of nodes to be allocated
blitz::Range all=blitz::Range::all();
int n_features = data[0].extent(blitz::secondDim);
blitz::Array<double,1> d(n_features); //for temporary feature manipulation
for (size_t k=0; k<data.size(); ++k) {
for (int i=0; i<data[k].extent(blitz::firstDim); ++i) {
d = (data[k](i,all)-sub)/div; //eval and copy in 1 instruction
for (int p=0; p<d.extent(blitz::firstDim); ++p) {
if (d(p)) {
++nodes;
}
}
++nodes; //one extra for the termination node "index == -1"
}
}
//allocates all the nodes, set first entry, a la libsvm
svm_node* all_nodes = new svm_node[nodes];
//iterates over each class data and fills the svm_node's
int max_index = 0; //data width
size_t sample = 0; //sample counter
size_t node = 0; //node counter
for (size_t k=0; k<data.size(); ++k) {
for (int i=0; i<data[k].extent(blitz::firstDim); ++i) {
problem->x[sample] = &all_nodes[node]; //setup current sample base pointer
d = (data[k](i,all)-sub)/div; //eval and copy in 1 instruction
for (blitz::sizeType p=0; p<d.size(); ++p) {
if (d(p)) {
int index = p+1; //starts indexing at 1
all_nodes[node].index = index;
all_nodes[node].value = d(p);
if ( index > max_index ) max_index = index;
++node; //index within the current sample
}
}
//marks end of sequence
all_nodes[node].index = -1;
all_nodes[node].value = 0;
problem->y[sample] = labels[k];
++node;
++sample;
}
}
//extracted from svm-train.c
if (param.gamma == 0. && max_index > 0) {
param.gamma = 1.0/max_index;
}
//do not support pre-computed kernels...
if (param.kernel_type == PRECOMPUTED) {
throw std::runtime_error("We currently dod not support PRECOMPUTED kernels in these bindings to libsvm");
}
return problem;
}
/**
* A wrapper, to standardize the freeing of the svm_model
*/
static void svm_model_free(svm_model*& m) {
#if LIBSVM_VERSION >= 300
svm_free_and_destroy_model(&m);
#else
svm_destroy_model(m);
#endif
}
bob::learn::libsvm::Machine* bob::learn::libsvm::Trainer::train
(const std::vector<blitz::Array<double, 2> >& data,
const blitz::Array<double,1>& input_subtraction,
const blitz::Array<double,1>& input_division) const {
//sanity check of input arraysets
int n_features = data[0].extent(blitz::secondDim);
for (size_t cl=0; cl<data.size(); ++cl) {
if (data[cl].extent(blitz::secondDim) != n_features) {
boost::format m("number of features (columns) of array for class %u (%d) does not match that of array for class 0 (%d)");
m % cl % data[cl].extent(blitz::secondDim) % n_features;
throw std::runtime_error(m.str());
}
}
//converts the input arraysets into something libsvm can digest
double save_gamma = m_param.gamma; ///< the next method may update it!
boost::shared_ptr<svm_problem> problem =
data2problem(data, input_subtraction, input_division,
const_cast<svm_parameter&>(m_param) ///< temporary cast
);
//checks parametrization to make sure all is alright.
const char* error_msg = svm_check_parameter(problem.get(), &m_param);
if (error_msg) {
const_cast<double&>(m_param.gamma) = save_gamma;
boost::format m("libsvm-%d reports: %s");
m % libsvm_version % error_msg;
std::runtime_error(m.str());
}
//do the training, returns the new machine
#if LIBSVM_VERSION >= 291
svm_set_print_string_function(debug_libsvm);
#else
boost::format m("libsvm-%d does not support debugging stream setting");
m % libsvm_version;
debug_libsvm(m.str().c_str());
#endif
boost::shared_ptr<svm_model> model(svm_train(problem.get(), &m_param),
std::ptr_fun(svm_model_free));
const_cast<double&>(m_param.gamma) = save_gamma;
//save newly created machine to file, reload from there to get rid of memory
//dependencies due to the poorly implemented memory model in libsvm
boost::shared_ptr<svm_model> new_model =
bob::learn::libsvm::svm_unpickle(bob::learn::libsvm::svm_pickle(model));
auto retval = new bob::learn::libsvm::Machine(new_model);
//sets up the scaling parameters given as input
retval->setInputSubtraction(input_subtraction);
retval->setInputDivision(input_division);
return retval;
}
bob::learn::libsvm::Machine* bob::learn::libsvm::Trainer::train
(const std::vector<blitz::Array<double,2> >& data) const {
int n_features = data[0].extent(blitz::secondDim);
blitz::Array<double,1> sub(n_features);
sub = 0.;
blitz::Array<double,1> div(n_features);
div = 1.;
return train(data, sub, div);
}
This diff is collapsed.
......@@ -16,7 +16,7 @@
#include <blitz/array.h>
#include <fstream>
#include <svm.h>
#include <bob/io/HDF5File.h>
#include <bob.io.base/HDF5File.h>
// @cond SKIPDOXYGEN
// We need to declare the svm_model type for libsvm < 3.0.0. The next bit of
......@@ -62,6 +62,23 @@ namespace bob { namespace learn { namespace libsvm {
PRECOMPUTED
}; /* kernel type used on the machine */
/**
* @brief Chooses the correct temporary directory to use, like this:
*
* - The environment variable TMPDIR, if it is defined. For security reasons
* this only happens if the program is not SUID or SGID enabled.
* - The directory /tmp.
*/
std::string _tmpdir();
/**
* @brief Returns the full path of a temporary file in tmpdir().
*
* @param extension The desired extension for the file
*/
std::string _tmpfile(const std::string& extension=".hdf5");
/**
* Here is the problem: libsvm does not provide a simple way to extract the
* information from the SVM structure. There are lots of cases and allocation
......@@ -100,7 +117,7 @@ namespace bob { namespace learn { namespace libsvm {
* from the file. Using this constructor assures a 100% state recovery
* from previous sessions.
*/
Machine(bob::io::HDF5File& config);
Machine(bob::io::base::HDF5File& config);
/**
* Builds a new SVM model from a trained model. Scaling parameters will
......@@ -273,7 +290,7 @@ namespace bob { namespace learn { namespace libsvm {
* single instruction parameter loading, which includes both the model
* and the scaling parameters.
*/
void save(bob::io::HDF5File& config) const;
void save(bob::io::base::HDF5File& config) const;
private: //not implemented
......
......@@ -11,7 +11,7 @@
#define BOB_LEARN_LIBSVM_TRAINER_H
#include <vector>
#include "machine.h"
#include <bob.learn.libsvm/machine.h>
namespace bob { namespace learn { namespace libsvm {
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -5,9 +5,12 @@
* @brief Binds configuration information available from bob
*/
#include <Python.h>
#ifdef NO_IMPORT_ARRAY
#undef NO_IMPORT_ARRAY
#endif
#include <bob.blitz/capi.h>
#include <bob.blitz/cleanup.h>
#include <bob/config.h>
#include <string>
#include <cstdlib>
......@@ -17,11 +20,7 @@
#include <boost/format.hpp>
#include <svm.h>
#ifdef NO_IMPORT_ARRAY
#undef NO_IMPORT_ARRAY
#endif
#include <bob.blitz/capi.h>
#include <bob.blitz/cleanup.h>
#include <bob.core/config.h>
#include <bob.io.base/config.h>
#include <bob.learn.libsvm/config.h>
......@@ -93,13 +92,6 @@ static PyObject* python_version() {
return Py_BuildValue("s", f.str().c_str());
}
/**
* Bob version, API version and platform
*/
static PyObject* bob_version() {
return Py_BuildValue("sis", BOB_VERSION, BOB_API_VERSION, BOB_PLATFORM);
}
/**
* Numpy version
*/
......@@ -115,6 +107,13 @@ static PyObject* bob_blitz_version() {
return Py_BuildValue("{ss}", "api", BOOST_PP_STRINGIZE(BOB_BLITZ_API_VERSION));
}
/**
* bob.core c/c++ api version
*/
static PyObject* bob_core_version() {
return Py_BuildValue("{ss}", "api", BOOST_PP_STRINGIZE(BOB_CORE_API_VERSION));
}
/**
* bob.io.base c/c++ api version
*/
......@@ -128,6 +127,7 @@ static PyObject* build_version_dictionary() {
if (!retval) return 0;
auto retval_ = make_safe(retval);
if (!dict_steal(retval, "Bob", bob_core_version())) return 0;
if (!dict_set(retval, "Blitz++", BZ_VERSION)) return 0;
if (!dict_steal(retval, "LIBSVM", get_libsvm_version())) return 0;
if (!dict_steal(retval, "Boost", boost_version())) return 0;
......@@ -135,8 +135,8 @@ static PyObject* build_version_dictionary() {
if (!dict_steal(retval, "Python", python_version())) return 0;
if (!dict_steal(retval, "NumPy", numpy_version())) return 0;
if (!dict_steal(retval, "bob.blitz", bob_blitz_version())) return 0;
if (!dict_steal(retval, "bob.core", bob_core_version())) return 0;
if (!dict_steal(retval, "bob.io.base", bob_io_base_version())) return 0;
if (!dict_steal(retval, "Bob", bob_version())) return 0;
Py_INCREF(retval);
return retval;
......
......@@ -10,18 +10,18 @@ extensions = bob.buildout
auto-checkout = *
develop = src/bob.extension
src/bob.blitz
src/bob.core
src/bob.io.base
.
; options for bob.buildout extension
debug = true
verbose = true
prefixes = /idiap/group/torch5spro/releases/preview/install/linux-x86_64-release
/Users/andre/work/bob/b/dbg/
[sources]
bob.extension = git https://github.com/bioidiap/bob.extension
bob.blitz = git https://github.com/bioidiap/bob.blitz
bob.core = git https://github.com/bioidiap/bob.core
bob.io.base = git https://github.com/bioidiap/bob.io.base
[scripts]
......
......@@ -3,21 +3,16 @@
# Andre Anjos <andre.anjos@idiap.ch>
# Mon 16 Apr 08:18:08 2012 CEST
bob_packages = ['bob.core', 'bob.io.base']
from setuptools import setup, find_packages, dist
dist.Distribution(dict(setup_requires=['bob.extension', 'bob.blitz', 'bob.io.base']))
dist.Distribution(dict(setup_requires=['bob.blitz'] + bob_packages))
from bob.extension.utils import egrep, find_header, find_library
from bob.blitz.extension import Extension
import bob.io.base
from bob.blitz.extension import Extension, Library, build_ext
packages = ['boost']
boost_modules = ['filesystem']
import os
package_dir = os.path.dirname(os.path.realpath(__file__))
package_dir = os.path.join(package_dir, 'bob', 'learn', 'libsvm', 'include')
include_dirs = [
package_dir,
bob.io.base.get_include(),
]
packages = ['bob-core >= 1.2.2', 'bob-io >= 1.2.2']
version = '2.0.0a0'
# process libsvm requirement
......@@ -150,7 +145,7 @@ class libsvm:
return [('HAVE_LIBSVM', '1'), ('LIBSVM_VERSION', '"%s"' % self.version)]
pkg = libsvm()
extra_compile_args = ['-isystem', pkg.include_directory]
system_include_dirs = [pkg.include_directory]
library_dirs = [pkg.library_directory]
libraries = pkg.libraries
define_macros = pkg.macros()
......@@ -174,53 +169,71 @@ setup(
install_requires=[
'setuptools',
'bob.blitz',
'bob.core',
'bob.io.base',
],
namespace_packages=[
"bob",
"bob.learn",
],
],
ext_modules = [
Extension("bob.learn.libsvm.version",
[
"bob/learn/libsvm/version.cpp",
],
packages = packages,
include_dirs = include_dirs,
],
bob_packages = bob_packages,
version = version,
system_include_dirs = system_include_dirs,
define_macros = define_macros,
library_dirs = library_dirs,
libraries = libraries,
),
Library("bob.learn.libsvm.bob_learn_libsvm",
[
"bob/learn/libsvm/cpp/file.cpp",
"bob/learn/libsvm/cpp/machine.cpp",
"bob/learn/libsvm/cpp/trainer.cpp",
],
bob_packages = bob_packages,
version = version,
extra_compile_args = extra_compile_args,
system_include_dirs = system_include_dirs,
define_macros = define_macros,
library_dirs = library_dirs,
libraries = libraries,
),
packages = packages,
boost_modules = boost_modules,
),
Extension("bob.learn.libsvm._library",
[
"bob/learn/libsvm/utils.cpp",
"bob/learn/libsvm/file.cpp",
"bob/learn/libsvm/machine.cpp",
"bob/learn/libsvm/trainer.cpp",
"bob/learn/libsvm/pyutils.cpp",
"bob/learn/libsvm/pyfile.cpp",
"bob/learn/libsvm/pymachine.cpp",
"bob/learn/libsvm/pytrainer.cpp",
"bob/learn/libsvm/main.cpp",
],
packages = packages,
include_dirs = include_dirs,
],
bob_packages = bob_packages,
version = version,
extra_compile_args = extra_compile_args,
system_include_dirs = system_include_dirs,
define_macros = define_macros,
library_dirs = library_dirs,
libraries = libraries,
boost_modules = ['filesystem'],
),
],
packages = packages,
boost_modules = boost_modules,
),
],
cmdclass = {
'build_ext': build_ext
},
entry_points={
'console_scripts': [
],
},
],
},
classifiers = [
'Development Status :: 3 - Alpha',
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment