Commit f3d96894 authored by André Anjos's avatar André Anjos 💬
Browse files

Add remaining C++ classes into main module

parent 46895ca7
......@@ -196,6 +196,8 @@ setup(
Extension("xbob.learn.libsvm._library",
[
"xbob/learn/libsvm/file.cpp",
"xbob/learn/libsvm/machine.cpp",
"xbob/learn/libsvm/trainer.cpp",
],
packages = packages,
include_dirs = include_dirs,
......@@ -204,6 +206,7 @@ setup(
define_macros = define_macros,
library_dirs = library_dirs,
libraries = libraries,
boost_modules = ['filesystem'],
),
],
......
......@@ -8,14 +8,14 @@
* Copyright (C) 2011-2013 Idiap Research Institute, Martigny, Switzerland
*/
#ifndef BOB_MACHINE_SVM_H
#define BOB_MACHINE_SVM_H
#ifndef BOB_LEARN_LIBSVM_MACHINE_H
#define BOB_LEARN_LIBSVM_MACHINE_H
#include <svm.h>
#include <boost/shared_ptr.hpp>
#include <boost/shared_array.hpp>
#include <blitz/array.h>
#include <fstream>
#include <svm.h>
#include <bob/io/HDF5File.h>
// We need to declare the svm_model type for libsvm < 3.0.0. The next bit of
......@@ -42,86 +42,7 @@ struct svm_model {
};
#endif
namespace bob { namespace machine {
/**
* @ingroup MACHINE
* @{
*/
/**
* Loads a given libsvm data file. The data file format, as defined on the
* library README is like this:
*
* [label] [index1]:[value1] [index2]:[value2] ...
*
* The labels are integer values, so are the indexes, starting from "1" (and
* not from zero as a C-programmer would expect. The values are floating
* point.
*
* Zero values are suppressed - this is a sparse format.
*/
class SVMFile {
public: //api
/**
* Constructor, initializes the file readout.
*/
SVMFile (const std::string& filename);
/**
* Destructor virtualization
*/
virtual ~SVMFile();
/**
* Returns the size of each entry in the file, in number of floats
*/
inline size_t shape() const { return m_shape; }
/**
* Returns the number of samples in the file.
*/
inline size_t samples() const { return m_n_samples; }
/**
* Resets the file, going back to the beginning.
*/
void reset();
/**
* Reads the next entry. Values are organized according to the indexed
* labels at the file. Returns 'false' if the file is over or something
* goes wrong.
*/
bool read(int& label, blitz::Array<double,1>& values);
/**
* Reads the next entry on the file, but without checking. Returns
* 'false' if the file is over or something goes wrong reading the file.
*/
bool read_(int& label, blitz::Array<double,1>& values);
/**
* Returns the name of the file being read.
*/
inline const std::string& filename() const { return m_filename; }
/**
* Tests if the file is still good to go.
*/
inline bool good() const { return m_file.good(); }
inline bool eof() const { return m_file.eof(); }
inline bool fail() const { return m_file.fail(); }
private: //representation
std::string m_filename; ///< The path to the file being read
std::ifstream m_file; ///< The file I'm reading.
size_t m_shape; ///< Number of floats in samples
size_t m_n_samples; ///< total number of samples at input file
};
namespace bob { namespace learn { namespace libsvm {
/**
* Here is the problem: libsvm does not provide a simple way to extract the
......@@ -141,7 +62,7 @@ namespace bob { namespace machine {
/**
* Interface to svm_model, from libsvm. Incorporates prediction.
*/
class SupportVector {
class Machine {
public: //api
......@@ -169,7 +90,7 @@ namespace bob { namespace machine {
* 1.0). If you need scaling to be applied, set it individually using the
* appropriate methods bellow.
*/
SupportVector(const std::string& model_file);
Machine(const std::string& model_file);
/**
* Builds a new Support Vector Machine from an HDF5 file containing the
......@@ -177,23 +98,23 @@ namespace bob { namespace machine {
* from the file. Using this constructor assures a 100% state recovery
* from previous sessions.
*/
SupportVector(bob::io::HDF5File& config);
Machine(bob::io::HDF5File& config);
/**
* Builds a new SVM model from a trained model. Scaling parameters will
* be neutral (subtraction := 0.0, division := 1.0).
*
* @note: This method is typically only used by the respective
* bob::trainer::SupportVectorTrainer as it requires the creation of the
* bob::trainer::MachineTrainer as it requires the creation of the
* object "svm_model". You can still make use of it if you decide to
* implement the model instantiation yourself.
*/
SupportVector(boost::shared_ptr<svm_model> model);
Machine(boost::shared_ptr<svm_model> model);
/**
* Virtual d'tor
*/
virtual ~SupportVector();
virtual ~Machine();
/**
* Tells the input size this machine expects
......@@ -354,9 +275,9 @@ namespace bob { namespace machine {
private: //not implemented
SupportVector(const SupportVector& other);
Machine(const Machine& other);
SupportVector& operator= (const SupportVector& other);
Machine& operator= (const Machine& other);
private: //methods
......@@ -375,9 +296,6 @@ namespace bob { namespace machine {
};
/**
* @}
*/
}}
}}}
#endif /* BOB_MACHINE_SVM_H */
#endif /* BOB_LEARN_LIBSVM_MACHINE_H */
/**
* @file bob/trainer/SVMTrainer.h
* @date Sat Dec 17 14:41:56 2011 +0100
* @author Andre Anjos <andre.anjos@idiap.ch>
* @date Sat Dec 17 14:41:56 2011 +0100
*
* @brief C++ bindings to libsvm (training bits)
* @brief C++ bindings to libsvm (training bit)
*
* Copyright (C) 2011-2013 Idiap Research Institute, Martigny, Switzerland
* Copyright (C) 2011-2014 Idiap Research Institute, Martigny, Switzerland
*/
#ifndef BOB_TRAINER_SVMTRAINER_H
#define BOB_TRAINER_SVMTRAINER_H
#ifndef BOB_LEARN_LIBSVM_TRAINER_H
#define BOB_LEARN_LIBSVM_TRAINER_H
#include <vector>
#include "kernel_machine_cxx.h"
#include "machine.h"
namespace bob { namespace trainer {
/**
* @ingroup TRAINER
* @{
*/
namespace bob { namespace learn { namespace libsvm {
/**
* This class emulates the behavior of the command line utility called
......@@ -30,7 +25,7 @@ namespace bob { namespace trainer {
*
* Fell free to implement those and remove these remarks.
*/
class SVMTrainer {
class Trainer {
public: //api
......@@ -38,9 +33,9 @@ namespace bob { namespace trainer {
* Builds a new trainer setting the default parameters as defined in the
* command line application svm-train.
*/
SVMTrainer(
bob::machine::SupportVector::svm_t svm_type=bob::machine::SupportVector::C_SVC,
bob::machine::SupportVector::kernel_t kernel_type=bob::machine::SupportVector::RBF,
Trainer(
bob::learn::libsvm::Machine::svm_t svm_type=bob::learn::libsvm::Machine::C_SVC,
bob::learn::libsvm::Machine::kernel_t kernel_type=bob::learn::libsvm::Machine::RBF,
int degree=3, //for poly
double gamma=0., //for poly/rbf/sigmoid
double coef0=0., //for poly/sigmoid
......@@ -57,7 +52,7 @@ namespace bob { namespace trainer {
/**
* Destructor virtualisation
*/
virtual ~SVMTrainer();
virtual ~Trainer();
/**
* Trains a new machine for multi-class classification. If the number of
......@@ -66,14 +61,14 @@ namespace bob { namespace trainer {
* from 1 (i.e., 1, 2, 3, 4, etc.). If what you want is regression, the
* size of the input data array should be 1.
*/
boost::shared_ptr<bob::machine::SupportVector> train
boost::shared_ptr<bob::learn::libsvm::Machine> train
(const std::vector<blitz::Array<double,2> >& data) const;
/**
* This version accepts scaling parameters that will be applied
* column-wise to the input data.
*/
boost::shared_ptr<bob::machine::SupportVector> train
boost::shared_ptr<bob::learn::libsvm::Machine> train
(const std::vector<blitz::Array<double,2> >& data,
const blitz::Array<double,1>& input_subtract,
const blitz::Array<double,1>& input_division) const;
......@@ -81,11 +76,11 @@ namespace bob { namespace trainer {
/**
* Getters and setters for all parameters
*/
bob::machine::SupportVector::svm_t getSvmType() const { return (bob::machine::SupportVector::svm_t)m_param.svm_type; }
void setSvmType(bob::machine::SupportVector::svm_t v) { m_param.svm_type = v; }
bob::learn::libsvm::Machine::svm_t getSvmType() const { return (bob::learn::libsvm::Machine::svm_t)m_param.svm_type; }
void setSvmType(bob::learn::libsvm::Machine::svm_t v) { m_param.svm_type = v; }
bob::machine::SupportVector::kernel_t getKernelType() const { return (bob::machine::SupportVector::kernel_t)m_param.kernel_type; }
void setKernelType(bob::machine::SupportVector::kernel_t v) { m_param.kernel_type = v; }
bob::learn::libsvm::Machine::kernel_t getKernelType() const { return (bob::learn::libsvm::Machine::kernel_t)m_param.kernel_type; }
void setKernelType(bob::learn::libsvm::Machine::kernel_t v) { m_param.kernel_type = v; }
int getDegree() const { return m_param.degree; }
void setDegree(int v) { m_param.degree = v; }
......@@ -125,9 +120,6 @@ namespace bob { namespace trainer {
};
/**
* @}
*/
}}
}}}
#endif /* BOB_TRAINER_SVMTRAINER_H */
#endif /* BOB_LEARN_LIBSVM_TRAINER_H */
/**
* @file machine/cxx/SVM.cc
* @date Sat Dec 17 14:41:56 2011 +0100
* @author Andre Anjos <andre.anjos@idiap.ch>
*
* @brief Implementation of the SVM machine using libsvm
*
* Copyright (C) 2011-2013 Idiap Research Institute, Martigny, Switzerland
*/
#include <string>
#include <cmath>
#include <boost/format.hpp>
#include <boost/algorithm/string.hpp>
#include <boost/filesystem.hpp>
#include <bob/machine/SVM.h>
#include <bob/core/check.h>
#include <bob/core/logging.h>
#include <cstdio>
#include <cstdlib>
#include <fstream>
#include <sys/types.h>
#include <sys/stat.h>
#include <algorithm>
static bool is_colon(char i) { return i == ':'; }
bob::machine::SVMFile::SVMFile (const std::string& filename):
m_filename(filename),
m_file(m_filename.c_str()),
m_shape(0),
m_n_samples(0)
{
if (!m_file) {
boost::format s("cannot open file '%s'");
s % filename;
throw std::runtime_error(s.str());
}
//scan the whole file, gets the shape and total size
while (m_file.good()) {
//gets the next non-empty line
std::string line;
while (!line.size()) {
if (!m_file.good()) break;
std::getline(m_file, line);
boost::trim(line);
}
if (!m_file.good()) break;
int label;
size_t pos;
char separator;
double value;
size_t n_values = std::count_if(line.begin(), line.end(), is_colon);
std::istringstream iss(line);
iss >> label;
for (size_t k=0; k<n_values; ++k) {
iss >> pos >> separator >> value;
if (m_shape < pos) m_shape = pos;
}
++m_n_samples;
}
//reset the file to then begin to read it properly
m_file.clear();
m_file.seekg(0, std::ios_base::beg);
}
bob::machine::SVMFile::~SVMFile() {
}
void bob::machine::SVMFile::reset() {
m_file.close();
m_file.open(m_filename.c_str());
}
bool bob::machine::SVMFile::read(int& label, blitz::Array<double,1>& values) {
if ((size_t)values.extent(0) != m_shape) {
boost::format s("file '%s' contains %d entries per sample, but you gave me an array with only %d positions");
s % m_filename % m_shape % values.extent(0);
throw std::runtime_error(s.str());
}
//read the data.
return read_(label, values);
}
bool bob::machine::SVMFile::read_(int& label, blitz::Array<double,1>& values) {
//if the file is at the end, just raise, you should have checked
if (!m_file.good()) return false;
//gets the next non-empty line
std::string line;
while (!line.size()) {
if (!m_file.good()) return false;
std::getline(m_file, line);
boost::trim(line);
}
std::istringstream iss(line);
iss >> label;
int pos;
char separator;
double value;
values = 0; ///zero values all over as the data is sparse on the files
for (size_t k=0; k<m_shape; ++k) {
iss >> pos >> separator >> value;
values(pos-1) = value;
}
return true;
}
/**
* A wrapper, to standardize this function.
*/
static void svm_model_free(svm_model*& m) {
#if LIBSVM_VERSION >= 300
svm_free_and_destroy_model(&m);
#else
svm_destroy_model(m);
#endif
}
blitz::Array<uint8_t,1> bob::machine::svm_pickle
(const boost::shared_ptr<svm_model> model)
{
std::string tmp_filename = bob::core::tmpfile(".svm");
//save it to a temporary file
if (svm_save_model(tmp_filename.c_str(), model.get())) {
boost::format s("cannot save SVM to file `%s' while copying model");
s % tmp_filename;
throw std::runtime_error(s.str());
}
//gets total size of file
struct stat filestatus;
stat(tmp_filename.c_str(), &filestatus);
//reload the data from the file in binary format
std::ifstream binfile(tmp_filename.c_str(), std::ios::binary);
blitz::Array<uint8_t,1> buffer(filestatus.st_size);
binfile.read(reinterpret_cast<char*>(buffer.data()), filestatus.st_size);
//unlink the temporary file
boost::filesystem::remove(tmp_filename);
//finally, return the pickled data
return buffer;
}
static boost::shared_ptr<svm_model> make_model(const char* filename) {
boost::shared_ptr<svm_model> retval(svm_load_model(filename),
std::ptr_fun(svm_model_free));
#if LIBSVM_VERSION > 315
if (retval) retval->sv_indices = 0; ///< force initialization: see ticket #109
#endif
return retval;
}
/**
* Reverts the pickling process, returns the model
*/
boost::shared_ptr<svm_model> bob::machine::svm_unpickle
(const blitz::Array<uint8_t,1>& buffer) {
std::string tmp_filename = bob::core::tmpfile(".svm");
std::ofstream binfile(tmp_filename.c_str(), std::ios::binary);
binfile.write(reinterpret_cast<const char*>(buffer.data()), buffer.size());
binfile.close();
//reload the file using the appropriate libsvm loading method
boost::shared_ptr<svm_model> retval = make_model(tmp_filename.c_str());
if (!retval) {
boost::format s("cannot open model file '%s'");
s % tmp_filename;
throw std::runtime_error(s.str());
}
//unlinks the temporary file
boost::filesystem::remove(tmp_filename);
//finally, return the pickled data
return retval;
}
void bob::machine::SupportVector::reset() {
//gets the expected size for the input from the SVM
m_input_size = 0;
for (int k=0; k<m_model->l; ++k) {
svm_node* end = m_model->SV[k];
while (end->index != -1) {
if (end->index > (int)m_input_size) m_input_size = end->index;
++end;
}
}
//create and reset cache
m_input_cache.reset(new svm_node[1 + m_input_size]);
m_input_sub.resize(inputSize());
m_input_sub = 0.0;
m_input_div.resize(inputSize());
m_input_div = 1.0;
}
bob::machine::SupportVector::SupportVector(const std::string& model_file):
m_model(make_model(model_file.c_str()))
{
if (!m_model) {
boost::format s("cannot open model file '%s'");
s % model_file;
throw std::runtime_error(s.str());
}
reset();
}
bob::machine::SupportVector::SupportVector(bob::io::HDF5File& config):
m_model()
{
uint64_t version = 0;
config.getAttribute(".", "version", version);
if ( (LIBSVM_VERSION/100) > (version/100) ) {
//if the major version changes... be aware!
boost::format m("SVM being loaded from `%s:%s' (created with libsvm-%d) with libsvm-%d. You may want to read the libsvm FAQ at http://www.csie.ntu.edu.tw/~cjlin/libsvm/log to check if there were format changes between these versions. If not, you can safely ignore this warning and even tell us to remove it via our bug tracker: https://github.com/idiap/bob/issues");
m % config.filename() % config.cwd() % version % LIBSVM_VERSION;
bob::core::warn << m.str() << std::endl;
}
m_model = bob::machine::svm_unpickle(config.readArray<uint8_t,1>("svm_model"));
reset(); ///< note: has to be done before reading scaling parameters
config.readArray("input_subtract", m_input_sub);
config.readArray("input_divide", m_input_div);
}
bob::machine::SupportVector::SupportVector(boost::shared_ptr<svm_model> model)
: m_model(model)
{
if (!m_model) {
throw std::runtime_error("null SVM model cannot be processed");
}
reset();
}
bob::machine::SupportVector::~SupportVector() { }
bool bob::machine::SupportVector::supportsProbability() const {
return svm_check_probability_model(m_model.get());
}
size_t bob::machine::SupportVector::inputSize() const {
return m_input_size;
}
size_t bob::machine::SupportVector::outputSize() const {
size_t retval = svm_get_nr_class(m_model.get());
return (retval == 2)? 1 : retval;
}
size_t bob::machine::SupportVector::numberOfClasses() const {
return svm_get_nr_class(m_model.get());
}
int bob::machine::SupportVector::classLabel(size_t i) const {
if (i >= (size_t)svm_get_nr_class(m_model.get())) {
boost::format s("request for label of class %d in SVM with %d classes is not legal");
s % (int)i % svm_get_nr_class(m_model.get());
throw std::runtime_error(s.str());
}
return m_model->label[i];
}
bob::machine::SupportVector::svm_t bob::machine::SupportVector::machineType() const {
return (svm_t)svm_get_svm_type(m_model.get());
}
bob::machine::SupportVector::kernel_t bob::machine::SupportVector::kernelType() const {
return (kernel_t)m_model->param.kernel_type;
}
int bob::machine::SupportVector::polynomialDegree() const {
return m_model->param.degree;
}
double bob::machine::SupportVector::gamma() const {
return m_model->param.gamma;
}
double bob::machine::SupportVector::coefficient0() const {
return m_model->param.coef0;
}
void bob::machine::SupportVector::setInputSubtraction(const blitz::Array<double,1>& v) {
if (inputSize() > (size_t)v.extent(0)) {
boost::format m("mismatch on the input subtraction dimension: expected a vector with **at least** %d positions, but you input %d");
m % inputSize() % v.extent(0);
throw std::runtime_error(m.str());