Commit 239605df authored by André Anjos's avatar André Anjos 💬

Fix output behavior; All machine tests are now passing

parent dbeacb4e
......@@ -5,6 +5,7 @@
[buildout]
parts = scripts
eggs = xbob.learn.libsvm
ipdb
extensions = xbob.buildout
mr.developer
auto-checkout = *
......
......@@ -752,7 +752,7 @@ static PyObject* PyBobLearnLibsvmMachine_forward
}
else {
if (input->shape[1] != (Py_ssize_t)self->cxx->inputSize()) {
PyErr_Format(PyExc_RuntimeError, "2D `input' array should have %" PY_FORMAT_SIZE_T "d columns, matching `%s' input size, not %" PY_FORMAT_SIZE_T "d elements", self->cxx->inputSize(), Py_TYPE(self)->tp_name, input->shape[1]);
PyErr_Format(PyExc_RuntimeError, "2D `input' array should have %" PY_FORMAT_SIZE_T "d columns, matching `%s' input size, not %" PY_FORMAT_SIZE_T "d", self->cxx->inputSize(), Py_TYPE(self)->tp_name, input->shape[1]);
return 0;
}
if (output && input->shape[0] != output->shape[0]) {
......@@ -777,7 +777,7 @@ static PyObject* PyBobLearnLibsvmMachine_forward
}
else {
auto bzin = PyBlitzArrayCxx_AsBlitz<double,2>(input);
auto bzout = PyBlitzArrayCxx_AsBlitz<int64_t,2>(output);
auto bzout = PyBlitzArrayCxx_AsBlitz<int64_t,1>(output);
blitz::Range all = blitz::Range::all();
for (int k=0; k<bzin->extent(0); ++k) {
blitz::Array<double,1> i_ = (*bzin)(k, all);
......@@ -803,16 +803,24 @@ PyDoc_STRVAR(s_scores_str, "predict_class_and_scores");
PyDoc_STRVAR(s_scores_doc,
"o.predict_class_and_scores(input, [cls, [score]]) -> (array, array)\n\
\n\
Calculates the **predicted class** and output scores for the SVM using\n\
the this Machine, given one single feature vector or multiple ones.\n\
Calculates the **predicted class** and output scores for the SVM\n\
using the this Machine, given one single feature vector or multiple\n\
ones.\n\
\n\
The ``input`` array can be either 1D or 2D 64-bit float arrays.\n\
The ``cls`` array, if provided, must be of type ``int64``,\n\
always uni-dimensional. The ``cls`` output corresponds to the\n\
predicted classes for each of the input rows. The ``score`` array,\n\
if provided, must be of type ``float64`` (like ``input``) and have\n\
as many rows as ``input`` and ``o.shape[1]`` columns, matching the \n\
output size of this SVM.\n\
as many rows as ``input`` and ``C`` columns, matching the \n\
number of combinations of the outputs 2-by-2. To score, LIBSVM\n\
will compare the SV outputs for each set two classes in the machine\n\
and output 1 score. If there is only 1 output, then the problem is\n\
binary and only 1 score is produced (``C = 1``). If the SVM is\n\
multi-class, then the number of combinations ``C`` is the total\n\
amount of output combinations which is possible. If ``N`` is\n\
the number of classes in this SVM, then :math:`C = N\\cdot(N-1)/2`.\n\
If ``N = 3``, then ``C = 3``. If ``N = 5``, then ``C = 10``.\n\
\n\
This method always returns a tuple composed of the predicted classes\n\
for each row in the ``input`` array, with data type ``int64`` and\n\
......@@ -844,6 +852,11 @@ static PyObject* PyBobLearnLibsvmMachine_predictClassAndScores
auto cls_ = make_xsafe(cls);
auto score_ = make_xsafe(score);
//calculates the number of scores expected: combinatorics between
//all class outputs
Py_ssize_t N = self->cxx->outputSize();
Py_ssize_t number_of_scores = N < 2 ? 1 : (N*(N-1))/2;
if (input->type_num != NPY_FLOAT64) {
PyErr_Format(PyExc_TypeError, "`%s' only supports 64-bit float arrays for input array `input'", Py_TYPE(self)->tp_name);
return 0;
......@@ -883,8 +896,8 @@ static PyObject* PyBobLearnLibsvmMachine_predictClassAndScores
PyErr_Format(PyExc_RuntimeError, "1D `cls' array should have 1 element, not %" PY_FORMAT_SIZE_T "d elements", cls->shape[0]);
return 0;
}
if (score && score->shape[0] != (Py_ssize_t)self->cxx->outputSize()) {
PyErr_Format(PyExc_RuntimeError, "1D `score' array should have %" PY_FORMAT_SIZE_T "d elements matching the output size of `%s', not %" PY_FORMAT_SIZE_T "d elements", self->cxx->outputSize(), Py_TYPE(self)->tp_name, score->shape[0]);
if (score && score->shape[0] != number_of_scores) {
PyErr_Format(PyExc_RuntimeError, "1D `score' array should have %" PY_FORMAT_SIZE_T "d elements matching the expected number of scores for `%s', not %" PY_FORMAT_SIZE_T "d elements", number_of_scores, Py_TYPE(self)->tp_name, score->shape[0]);
return 0;
}
}
......@@ -897,8 +910,8 @@ static PyObject* PyBobLearnLibsvmMachine_predictClassAndScores
PyErr_Format(PyExc_RuntimeError, "1D `cls' array should have %" PY_FORMAT_SIZE_T "d elements matching the number of rows on `input', not %" PY_FORMAT_SIZE_T "d rows", input->shape[0], cls->shape[0]);
return 0;
}
if (score && score->shape[1] != (Py_ssize_t)self->cxx->outputSize()) {
PyErr_Format(PyExc_RuntimeError, "2D `score' array should have %" PY_FORMAT_SIZE_T "d columns matching the output size of `%s', not %" PY_FORMAT_SIZE_T "d elements", self->cxx->outputSize(), Py_TYPE(self)->tp_name, score->shape[1]);
if (score && score->shape[1] != number_of_scores) {
PyErr_Format(PyExc_RuntimeError, "2D `score' array should have %" PY_FORMAT_SIZE_T "d columns matching the output size of `%s', not %" PY_FORMAT_SIZE_T "d elements", number_of_scores, Py_TYPE(self)->tp_name, score->shape[1]);
return 0;
}
if (score && input->shape[0] != score->shape[0]) {
......@@ -919,11 +932,11 @@ static PyObject* PyBobLearnLibsvmMachine_predictClassAndScores
if (!score) {
Py_ssize_t osize[2];
if (input->ndim == 1) {
osize[0] = self->cxx->outputSize();
osize[0] = number_of_scores;
}
else {
osize[0] = input->shape[0];
osize[1] = self->cxx->outputSize();
osize[1] = number_of_scores;
}
score = (PyBlitzArrayObject*)PyBlitzArray_SimpleNew(NPY_FLOAT64, input->ndim, osize);
score_ = make_safe(score);
......@@ -980,8 +993,8 @@ The ``cls`` array, if provided, must be of type ``int64``,\n\
always uni-dimensional. The ``cls`` output corresponds to the\n\
predicted classes for each of the input rows. The ``prob`` array,\n\
if provided, must be of type ``float64`` (like ``input``) and have\n\
as many rows as ``input`` and ``o.shape[1]`` columns, matching the \n\
output size of this SVM.\n\
as many rows as ``input`` and ``len(o.labels)`` columns, matching\n\
the number of classes for this SVM.\n\
\n\
This method always returns a tuple composed of the predicted classes\n\
for each row in the ``input`` array, with data type ``int64`` and\n\
......@@ -995,6 +1008,11 @@ and ``prob`` arrays to avoid constant re-allocation.\n\
static PyObject* PyBobLearnLibsvmMachine_predictClassAndProbabilities
(PyBobLearnLibsvmMachineObject* self, PyObject* args, PyObject* kwds) {
if (!self->cxx->supportsProbability()) {
PyErr_Format(PyExc_RuntimeError, "`%s' object does not support probabilities - in the future, use `o.probability' to query for this property", Py_TYPE(self)->tp_name);
return 0;
}
static const char* const_kwlist[] = {"input", "cls", "prob", 0};
static char** kwlist = const_cast<char**>(const_kwlist);
......@@ -1052,8 +1070,8 @@ static PyObject* PyBobLearnLibsvmMachine_predictClassAndProbabilities
PyErr_Format(PyExc_RuntimeError, "1D `cls' array should have 1 element, not %" PY_FORMAT_SIZE_T "d elements", cls->shape[0]);
return 0;
}
if (prob && prob->shape[0] != (Py_ssize_t)self->cxx->outputSize()) {
PyErr_Format(PyExc_RuntimeError, "1D `prob' array should have %" PY_FORMAT_SIZE_T "d elements matching the output size of `%s', not %" PY_FORMAT_SIZE_T "d elements", self->cxx->outputSize(), Py_TYPE(self)->tp_name, prob->shape[0]);
if (prob && prob->shape[0] != (Py_ssize_t)self->cxx->numberOfClasses()) {
PyErr_Format(PyExc_RuntimeError, "1D `prob' array should have %" PY_FORMAT_SIZE_T "d elements matching the number of classes of `%s', not %" PY_FORMAT_SIZE_T "d elements", self->cxx->numberOfClasses(), Py_TYPE(self)->tp_name, prob->shape[0]);
return 0;
}
}
......@@ -1066,8 +1084,8 @@ static PyObject* PyBobLearnLibsvmMachine_predictClassAndProbabilities
PyErr_Format(PyExc_RuntimeError, "1D `cls' array should have %" PY_FORMAT_SIZE_T "d elements matching the number of rows on `input', not %" PY_FORMAT_SIZE_T "d rows", input->shape[0], cls->shape[0]);
return 0;
}
if (prob && prob->shape[1] != (Py_ssize_t)self->cxx->outputSize()) {
PyErr_Format(PyExc_RuntimeError, "2D `prob' array should have %" PY_FORMAT_SIZE_T "d columns matching the output size of `%s', not %" PY_FORMAT_SIZE_T "d elements", self->cxx->outputSize(), Py_TYPE(self)->tp_name, prob->shape[1]);
if (prob && prob->shape[1] != (Py_ssize_t)self->cxx->numberOfClasses()) {
PyErr_Format(PyExc_RuntimeError, "2D `prob' array should have %" PY_FORMAT_SIZE_T "d columns matching the number of classes of `%s', not %" PY_FORMAT_SIZE_T "d elements", self->cxx->numberOfClasses(), Py_TYPE(self)->tp_name, prob->shape[1]);
return 0;
}
if (prob && input->shape[0] != prob->shape[0]) {
......@@ -1088,11 +1106,11 @@ static PyObject* PyBobLearnLibsvmMachine_predictClassAndProbabilities
if (!prob) {
Py_ssize_t osize[2];
if (input->ndim == 1) {
osize[0] = self->cxx->outputSize();
osize[0] = self->cxx->numberOfClasses();
}
else {
osize[0] = input->shape[0];
osize[1] = self->cxx->outputSize();
osize[1] = self->cxx->numberOfClasses();
}
prob = (PyBlitzArrayObject*)PyBlitzArray_SimpleNew(NPY_FLOAT64, input->ndim, osize);
prob_ = make_safe(prob);
......
......@@ -13,6 +13,7 @@ import numpy
import tempfile
import pkg_resources
import nose.tools
import xbob.io
from xbob.learn.libsvm import File, Machine, svm_kernel_type, svm_type
......@@ -39,17 +40,9 @@ IRIS_EXPECTED = F('iris.out') #expected probabilities
def load_expected(filename):
"""Loads libsvm's svm-predict output file with probabilities"""
f = open(filename, 'rt')
labels = [int(k) for k in f.readline().split()[1:]]
predictions = []
probabilities = []
for k in f: #load the remaning lines
s = k.split()
predictions.append(int(s[0]))
probabilities.append(numpy.array([float(c) for c in s[1:]], 'float64'))
return tuple(labels), tuple(predictions), tuple(probabilities)
all_labels = sorted([int(k) for k in open(filename).readline().split()[1:]])
data = numpy.loadtxt(filename, dtype='float64', skiprows=1)
return all_labels, data[:,0].astype('int64'), data[:,1:]
#extracted by running svm-predict.c on the heart_scale example data
expected_heart_predictions = (1, -1, -1, 1, -1, -1, 1, 1, 1, 1, 1, 1, -1, -1,
......@@ -79,8 +72,8 @@ def test_can_load():
machine = Machine(HEART_MACHINE)
nose.tools.eq_(machine.shape, (13,1))
nose.tools.eq_(machine.kernel_type, svm_kernel_type.RBF)
nose.tools.eq_(machine.svm_type, svm_type.C_SVC)
nose.tools.eq_(machine.kernel_type, 'RBF')
nose.tools.eq_(machine.svm_type, 'C_SVC')
nose.tools.eq_(len(machine.labels), 2)
assert -1 in machine.labels
assert +1 in machine.labels
......@@ -96,8 +89,8 @@ def test_can_save():
# make sure that the save machine is the same as before
machine = Machine(tmp)
nose.tools.eq_(machine.shape, (13,1))
nose.tools.eq_(machine.kernel_type, svm_kernel_type.RBF)
nose.tools.eq_(machine.svm_type, svm_type.C_SVC)
nose.tools.eq_(machine.kernel_type, 'RBF')
nose.tools.eq_(machine.svm_type, 'C_SVC')
nose.tools.eq_(len(machine.labels), 2)
assert -1 in machine.labels
assert +1 in machine.labels
......@@ -109,14 +102,14 @@ def test_can_save_hdf5():
machine = Machine(HEART_MACHINE)
tmp = tempname('.hdf5')
machine.save(bob.io.HDF5File(tmp, 'w'))
machine.save(xbob.io.HDF5File(tmp, 'w'))
del machine
# make sure that the save machine is the same as before
machine = Machine(bob.io.HDF5File(tmp))
machine = Machine(xbob.io.HDF5File(tmp))
nose.tools.eq_(machine.shape, (13,1))
nose.tools.eq_(machine.kernel_type, svm_kernel_type.RBF)
nose.tools.eq_(machine.svm_type, svm_type.C_SVC)
nose.tools.eq_(machine.kernel_type, 'RBF')
nose.tools.eq_(machine.svm_type, 'C_SVC')
nose.tools.eq_(len(machine.labels), 2)
assert -1 in machine.labels
assert +1 in machine.labels
......@@ -139,12 +132,10 @@ def test_data_loading():
all_data = []
all_labels = []
while data.good():
values = numpy.ndarray(data.shape, 'float64')
label = data.read(values)
if label:
all_labels.append(label)
all_data.append(values)
all_labels = tuple(all_labels)
entry = data.read()
if entry is not None:
all_labels.append(entry[0])
all_data.append(entry[1])
nose.tools.eq_(len(all_data), len(all_labels))
nose.tools.eq_(len(all_data), 270)
......@@ -162,7 +153,7 @@ def test_data_loading():
#tries loading the file all in a single shot
data.reset()
labels, data = data.read_all()
nose.tools.eq_(labels, all_labels)
assert numpy.array_equal(labels, all_labels)
for k, l in zip(data, all_data):
assert numpy.array_equal(k, l)
......@@ -179,34 +170,31 @@ def test_data_loading():
nose.tools.eq_( l, labels[k] )
assert numpy.array_equal(e, data[k])
@self.assert_raises(RuntimeError)
@nose.tools.raises(RuntimeError)
def test_raises():
#tests that the normal machine raises because probabilities are not
#supported on that model
machine = Machine(TEST_MACHINE_NO_PROBS)
labels, data = File(HEART_DATA).read_all()
data = numpy.vstack(data)
machine.predict_classes_and_probabilities(data)
machine.predict_class_and_probabilities(data)
def test_correctness_heart():
#tests the correctness of the libSVM bindings
machine = Machine(HEART_MACHINE)
labels, data = File(HEART_DATA).read_all()
data = numpy.vstack(data)
pred_label = machine.predict_class(data)
nose.tools.eq_(pred_label, expected_heart_predictions)
assert numpy.array_equal(pred_label, expected_heart_predictions)
#finally, we test if the values also work fine.
pred_lab_values = [machine.predict_class_and_scores(k) for k in data]
#tries the variant with multiple inputs
pred_labels2, pred_scores2 = machine.predict_classes_and_scores(data)
nose.tools.eq_( expected_heart_predictions, pred_labels2 )
nose.tools.eq_( tuple([k[1] for k in pred_lab_values]), pred_scores2 )
pred_labels2, pred_scores2 = machine.predict_class_and_scores(data)
assert numpy.array_equal(expected_heart_predictions, pred_labels2)
assert numpy.array_equal(tuple([k[1] for k in pred_lab_values]), pred_scores2)
#tries to get the probabilities - note: for some reason, when getting
#probabilities, the labels change, but notice the note bellow:
......@@ -222,27 +210,25 @@ def test_correctness_heart():
# parameter set as more differences will be observed.
all_labels, real_labels, real_probs = load_expected(HEART_EXPECTED)
pred_labels, pred_probs = machine.predict_classes_and_probabilities(data)
nose.tools.eq_(pred_labels, real_labels)
assert numpy.all(abs(numpy.vstack(pred_probs) - numpy.vstack(real_probs)) < 1e-6)
pred_labels, pred_probs = machine.predict_class_and_probabilities(data)
assert numpy.array_equal(pred_labels, real_labels)
assert numpy.all(abs(pred_probs - real_probs) < 1e-6)
def test_correctness_iris():
#same test as above, but with a 3-class problem.
machine = Machine(IRIS_MACHINE)
labels, data = File(IRIS_DATA).read_all()
data = numpy.vstack(data)
pred_label = machine.predict_class(data)
nose.tools.eq_(pred_label, expected_iris_predictions)
assert numpy.array_equal(pred_label, expected_iris_predictions)
#finally, we test if the values also work fine.
pred_lab_values = [machine.predict_class_and_scores(k) for k in data]
#tries the variant with multiple inputs
pred_labels2, pred_scores2 = machine.predict_classes_and_scores(data)
nose.tools.eq_( expected_iris_predictions, pred_labels2 )
pred_labels2, pred_scores2 = machine.predict_class_and_scores(data)
assert numpy.array_equal(expected_iris_predictions, pred_labels2)
assert numpy.all(abs(numpy.vstack([k[1] for k in
pred_lab_values]) - numpy.vstack(pred_scores2)) < 1e-20 )
......@@ -251,20 +237,18 @@ def test_correctness_iris():
all_labels, real_labels, real_probs = load_expected(IRIS_EXPECTED)
pred_labels, pred_probs = machine.predict_classes_and_probabilities(data)
nose.tools.eq_(pred_labels, real_labels)
pred_labels, pred_probs = machine.predict_class_and_probabilities(data)
assert numpy.array_equal(pred_labels, real_labels)
assert numpy.all(abs(numpy.vstack(pred_probs) - numpy.vstack(real_probs)) < 1e-6)
@nose.tools.raises(RuntimeError)
def test_correctness_inputsize_exceeds():
#same test as above, but test for excess input
machine = Machine(IRIS_MACHINE)
labels, data = File(IRIS_DATA).read_all()
data = numpy.vstack(data)
# add extra columns to the input data
data = numpy.hstack([data, numpy.ones((data.shape[0], 2), dtype=float)])
pred_label = machine.predict_class(data)
nose.tools.eq_(pred_label, expected_iris_predictions)
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment