Commit 1d35378c authored by Tiago de Freitas Pereira's avatar Tiago de Freitas Pereira

Merge branch 'multiprocessing' into 'master'

Trial to implement EM with multiprocessing

See merge request !16
parents 709dd06b 510bd6ab
Pipeline #30161 passed with stages
in 22 minutes and 7 seconds
......@@ -19,7 +19,7 @@ bob::learn::em::GMMBaseTrainer::GMMBaseTrainer(const bool update_means,
{}
bob::learn::em::GMMBaseTrainer::GMMBaseTrainer(const bob::learn::em::GMMBaseTrainer& b):
m_ss(new bob::learn::em::GMMStats()),
m_ss(new bob::learn::em::GMMStats( *b.getGMMStats() )),
m_update_means(b.m_update_means), m_update_variances(b.m_update_variances),
m_mean_var_update_responsibilities_threshold(b.m_mean_var_update_responsibilities_threshold)
{}
......
......@@ -76,7 +76,7 @@ static int PyBobLearnEMGaussian_init_hdf5(PyBobLearnEMGaussianObject* self, PyOb
return -1;
}
auto config_ = make_safe(config);
self->cxx.reset(new bob::learn::em::Gaussian(*(config->f)));
return 0;
......@@ -96,7 +96,7 @@ static int PyBobLearnEMGaussian_init(PyBobLearnEMGaussianObject* self, PyObject*
//Reading the input argument
PyObject* arg = 0;
if (PyTuple_Size(args))
if (PyTuple_Size(args))
arg = PyTuple_GET_ITEM(args, 0);
else {
PyObject* tmp = PyDict_Values(kwargs);
......@@ -105,7 +105,7 @@ static int PyBobLearnEMGaussian_init(PyBobLearnEMGaussianObject* self, PyObject*
}
/**If the constructor input is a number**/
if (PyBob_NumberCheck(arg))
if (PyBob_NumberCheck(arg))
return PyBobLearnEMGaussian_init_number(self, args, kwargs);
/**If the constructor input is Gaussian object**/
else if (PyBobLearnEMGaussian_Check(arg))
......@@ -113,10 +113,11 @@ static int PyBobLearnEMGaussian_init(PyBobLearnEMGaussianObject* self, PyObject*
/**If the constructor input is a HDF5**/
else if (PyBobIoHDF5File_Check(arg))
return PyBobLearnEMGaussian_init_hdf5(self, args, kwargs);
else
PyErr_Format(PyExc_TypeError, "invalid input argument");
Gaussian_doc.print_usage();
return -1;
else {
PyErr_Format(PyExc_TypeError, "invalid input argument");
Gaussian_doc.print_usage();
return -1;
}
BOB_CATCH_MEMBER("cannot create Gaussian", -1)
return 0;
......@@ -177,22 +178,22 @@ int PyBobLearnEMGaussian_setMean(PyBobLearnEMGaussianObject* self, PyObject* val
PyErr_Format(PyExc_RuntimeError, "%s %s expects a 1D array of floats", Py_TYPE(self)->tp_name, mean.name());
return -1;
}
// perform check on the input
// perform check on the input
if (input->type_num != NPY_FLOAT64){
PyErr_Format(PyExc_TypeError, "`%s' only supports 64-bit float arrays for input array `%s`", Py_TYPE(self)->tp_name, mean.name());
return -1;
}
}
if (input->ndim != 1){
PyErr_Format(PyExc_TypeError, "`%s' only processes 1D arrays of float64 for `%s`", Py_TYPE(self)->tp_name, mean.name());
return -1;
}
}
if (input->shape[0] != (Py_ssize_t)self->cxx->getNInputs()){
PyErr_Format(PyExc_TypeError, "`%s' 1D `input` array should have %" PY_FORMAT_SIZE_T "d elements, not %" PY_FORMAT_SIZE_T "d for `%s`", Py_TYPE(self)->tp_name, self->cxx->getNInputs(), input->shape[0], mean.name());
return -1;
}
}
auto o_ = make_safe(input);
auto b = PyBlitzArrayCxx_AsBlitz<double,1>(input, "mean");
......@@ -222,17 +223,17 @@ int PyBobLearnEMGaussian_setVariance(PyBobLearnEMGaussianObject* self, PyObject*
return -1;
}
auto input_ = make_safe(input);
// perform check on the input
if (input->type_num != NPY_FLOAT64){
PyErr_Format(PyExc_TypeError, "`%s' only supports 64-bit float arrays for input array `%s`", Py_TYPE(self)->tp_name, variance.name());
return -1;
}
}
if (input->ndim != 1){
PyErr_Format(PyExc_TypeError, "`%s' only processes 1D arrays of float64 for `%s`", Py_TYPE(self)->tp_name, variance.name());
return -1;
}
}
if (input->shape[0] != (Py_ssize_t)self->cxx->getNInputs()){
PyErr_Format(PyExc_TypeError, "`%s' 1D `input` array should have %" PY_FORMAT_SIZE_T "d elements, not %" PY_FORMAT_SIZE_T "d for `%s`", Py_TYPE(self)->tp_name, self->cxx->getNInputs(), input->shape[0], variance.name());
......@@ -265,7 +266,7 @@ int PyBobLearnEMGaussian_setVarianceThresholds(PyBobLearnEMGaussianObject* self,
if (!PyBlitzArray_Converter(value, &input)){
PyErr_Format(PyExc_RuntimeError, "%s %s expects a 1D array of floats", Py_TYPE(self)->tp_name, variance_thresholds.name());
return -1;
}
}
auto input_ = make_safe(input);
......@@ -273,23 +274,23 @@ int PyBobLearnEMGaussian_setVarianceThresholds(PyBobLearnEMGaussianObject* self,
if (input->type_num != NPY_FLOAT64){
PyErr_Format(PyExc_TypeError, "`%s' only supports 64-bit float arrays for input array `%s`", Py_TYPE(self)->tp_name, variance_thresholds.name());
return -1;
}
}
if (input->ndim != 1){
PyErr_Format(PyExc_TypeError, "`%s' only processes 1D arrays of float64 for `%s`", Py_TYPE(self)->tp_name, variance_thresholds.name());
return -1;
}
}
if (input->shape[0] != (Py_ssize_t)self->cxx->getNInputs()){
PyErr_Format(PyExc_TypeError, "`%s' 1D `input` array should have %" PY_FORMAT_SIZE_T "d elements, not %" PY_FORMAT_SIZE_T "d for `%s`", Py_TYPE(self)->tp_name, self->cxx->getNInputs(), input->shape[0], variance_thresholds.name());
return -1;
}
auto b = PyBlitzArrayCxx_AsBlitz<double,1>(input, "variance_thresholds");
if (!b) return -1;
self->cxx->setVarianceThresholds(*b);
return 0;
BOB_CATCH_MEMBER("variance_thresholds could not be set", -1)
BOB_CATCH_MEMBER("variance_thresholds could not be set", -1)
}
......@@ -383,7 +384,7 @@ static auto log_likelihood = bob::extension::FunctionDoc(
.add_return("output","float","The log likelihood");
static PyObject* PyBobLearnEMGaussian_loglikelihood(PyBobLearnEMGaussianObject* self, PyObject* args, PyObject* kwargs) {
BOB_TRY
char** kwlist = log_likelihood.kwlist(0);
PyBlitzArrayObject* input = 0;
......@@ -397,19 +398,19 @@ static PyObject* PyBobLearnEMGaussian_loglikelihood(PyBobLearnEMGaussianObject*
PyErr_Format(PyExc_TypeError, "`%s' only supports 64-bit float arrays for input array `input`", Py_TYPE(self)->tp_name);
log_likelihood.print_usage();
return 0;
}
}
if (input->ndim != 1){
PyErr_Format(PyExc_TypeError, "`%s' only processes 1D arrays of float64", Py_TYPE(self)->tp_name);
log_likelihood.print_usage();
return 0;
}
}
if (input->shape[0] != (Py_ssize_t)self->cxx->getNInputs()){
PyErr_Format(PyExc_TypeError, "`%s' 1D `input` array should have %" PY_FORMAT_SIZE_T "d elements, not %" PY_FORMAT_SIZE_T "d", Py_TYPE(self)->tp_name, self->cxx->getNInputs(), input->shape[0]);
log_likelihood.print_usage();
return 0;
}
}
double value = self->cxx->logLikelihood(*PyBlitzArrayCxx_AsBlitz<double,1>(input));
return Py_BuildValue("d", value);
......@@ -440,19 +441,19 @@ static PyObject* PyBobLearnEMGaussian_loglikelihood_(PyBobLearnEMGaussianObject*
PyErr_Format(PyExc_TypeError, "`%s' only supports 64-bit float arrays for input array `input`", Py_TYPE(self)->tp_name);
log_likelihood.print_usage();
return 0;
}
}
if (input->ndim != 1){
PyErr_Format(PyExc_TypeError, "`%s' only processes 1D arrays of float64", Py_TYPE(self)->tp_name);
log_likelihood.print_usage();
return 0;
}
}
if (input->shape[0] != (Py_ssize_t)self->cxx->getNInputs()){
PyErr_Format(PyExc_TypeError, "`%s' 1D `input` array should have %" PY_FORMAT_SIZE_T "d elements, not %" PY_FORMAT_SIZE_T "d", Py_TYPE(self)->tp_name, self->cxx->getNInputs(), input->shape[0]);
log_likelihood.print_usage();
return 0;
}
}
double value = self->cxx->logLikelihood_(*PyBlitzArrayCxx_AsBlitz<double,1>(input));
return Py_BuildValue("d", value);
......@@ -470,9 +471,9 @@ static auto save = bob::extension::FunctionDoc(
.add_parameter("hdf5", ":py:class:`bob.io.base.HDF5File`", "An HDF5 file open for writing");
static PyObject* PyBobLearnEMGaussian_Save(PyBobLearnEMGaussianObject* self, PyObject* args, PyObject* kwargs) {
BOB_TRY
// get list of arguments
char** kwlist = save.kwlist(0);
char** kwlist = save.kwlist(0);
PyBobIoHDF5FileObject* hdf5;
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O&", kwlist, PyBobIoHDF5File_Converter, &hdf5)) return 0;
......@@ -494,15 +495,15 @@ static auto load = bob::extension::FunctionDoc(
static PyObject* PyBobLearnEMGaussian_Load(PyBobLearnEMGaussianObject* self, PyObject* args, PyObject* kwargs) {
BOB_TRY
char** kwlist = load.kwlist(0);
char** kwlist = load.kwlist(0);
PyBobIoHDF5FileObject* hdf5;
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O&", kwlist, PyBobIoHDF5File_Converter, &hdf5)) return 0;
auto hdf5_ = make_safe(hdf5);
auto hdf5_ = make_safe(hdf5);
self->cxx->load(*hdf5->f);
BOB_CATCH_MEMBER("cannot load the data", 0)
BOB_CATCH_MEMBER("cannot load the data", 0)
Py_RETURN_NONE;
}
......@@ -510,7 +511,7 @@ static PyObject* PyBobLearnEMGaussian_Load(PyBobLearnEMGaussianObject* self, Py
/*** is_similar_to ***/
static auto is_similar_to = bob::extension::FunctionDoc(
"is_similar_to",
"Compares this Gaussian with the ``other`` one to be approximately the same.",
"The optional values ``r_epsilon`` and ``a_epsilon`` refer to the "
"relative and absolute precision for the ``weights``, ``biases`` and any other values internal to this machine.",
......
......@@ -407,8 +407,9 @@ static PyObject* PyBobLearnEMKMeansTrainer_e_step(PyBobLearnEMKMeansTrainerObjec
return 0;
}
auto state = PyEval_SaveThread();
self->cxx->eStep(*kmeans_machine->cxx, *PyBlitzArrayCxx_AsBlitz<double,2>(data));
PyEval_RestoreThread(state);
BOB_CATCH_MEMBER("cannot perform the e_step method", 0)
......
......@@ -374,8 +374,9 @@ static PyObject* PyBobLearnEMMAPGMMTrainer_e_step(PyBobLearnEMMAPGMMTrainerObjec
return 0;
}
auto state = PyEval_SaveThread();
self->cxx->eStep(*gmm_machine->cxx, *PyBlitzArrayCxx_AsBlitz<double,2>(data));
PyEval_RestoreThread(state);
BOB_CATCH_MEMBER("cannot perform the e_step method", 0)
......
......@@ -258,7 +258,9 @@ static PyObject* PyBobLearnEMMLGMMTrainer_e_step(PyBobLearnEMMLGMMTrainerObject*
return 0;
}
auto state = PyEval_SaveThread();
self->cxx->eStep(*gmm_machine->cxx, *PyBlitzArrayCxx_AsBlitz<double,2>(data));
PyEval_RestoreThread(state);
BOB_CATCH_MEMBER("cannot perform the e_step method", 0)
......
......@@ -19,7 +19,7 @@ from bob.learn.em import KMeansMachine, GMMMachine, KMeansTrainer, \
import bob.learn.em
import bob.core
bob.core.log.setup("bob.learn.em")
logger = bob.core.log.setup("bob.learn.em")
#, MAP_GMMTrainer
......@@ -113,6 +113,45 @@ def test_gmm_ML_2():
assert equals(gmm.weights, weightsML_ref, 1e-4)
def test_gmm_ML_parallel():
# Trains a GMMMachine with ML_GMMTrainer; compares to an old reference
ar = bob.io.base.load(datafile('dataNormalized.hdf5', __name__, path="../data/"))
# Initialize GMMMachine
gmm = GMMMachine(5, 45)
gmm.means = bob.io.base.load(datafile('meansAfterKMeans.hdf5', __name__, path="../data/")).astype('float64')
gmm.variances = bob.io.base.load(datafile('variancesAfterKMeans.hdf5', __name__, path="../data/")).astype('float64')
gmm.weights = numpy.exp(bob.io.base.load(datafile('weightsAfterKMeans.hdf5', __name__, path="../data/")).astype('float64'))
threshold = 0.001
gmm.set_variance_thresholds(threshold)
# Initialize ML Trainer
prior = 0.001
max_iter_gmm = 25
accuracy = 0.00001
ml_gmmtrainer = ML_GMMTrainer(True, True, True, prior)
# Run ML
import multiprocessing.pool
pool = multiprocessing.pool.ThreadPool(3)
# pool = multiprocessing.Pool(1)
bob.learn.em.train(ml_gmmtrainer, gmm, ar, max_iterations = max_iter_gmm, convergence_threshold=accuracy, pool=pool)
# Test results
# Load torch3vision reference
meansML_ref = bob.io.base.load(datafile('meansAfterML.hdf5', __name__, path="../data/"))
variancesML_ref = bob.io.base.load(datafile('variancesAfterML.hdf5', __name__, path="../data/"))
weightsML_ref = bob.io.base.load(datafile('weightsAfterML.hdf5', __name__, path="../data/"))
# Compare to current results
assert equals(gmm.means, meansML_ref, 3e-3)
assert equals(gmm.variances, variancesML_ref, 3e-3)
assert equals(gmm.weights, weightsML_ref, 1e-4)
def test_gmm_MAP_1():
......
......@@ -13,6 +13,7 @@ import numpy.random
import nose.tools
from bob.learn.em import GMMMachine, GMMStats, IVectorMachine, IVectorTrainer
import bob.learn.em
### Test class inspired by an implementation of Chris McCool
### Chris McCool (chris.mccool@nicta.com.au)
......@@ -366,3 +367,102 @@ def test_trainer_update_sigma():
trainer.m_step(m)
assert numpy.allclose(t_ref[it], m.t, 1e-5)
assert numpy.allclose(sigma_ref[it], m.sigma, 1e-5)
def test_trainer_update_sigma_parallel():
# Ubm
dim_c = 2
dim_d = 3
ubm = GMMMachine(dim_c,dim_d)
ubm.weights = numpy.array([0.4,0.6])
ubm.means = numpy.array([[1.,7,4],[4,5,3]])
ubm.variances = numpy.array([[0.5,1.,1.5],[1.,1.5,2.]])
# Defines GMMStats
gs1 = GMMStats(dim_c,dim_d)
log_likelihood1 = -3.
T1 = 1
n1 = numpy.array([0.4, 0.6], numpy.float64)
sumpx1 = numpy.array([[1., 2., 3.], [2., 4., 3.]], numpy.float64)
sumpxx1 = numpy.array([[10., 20., 30.], [40., 50., 60.]], numpy.float64)
gs1.log_likelihood = log_likelihood1
gs1.t = T1
gs1.n = n1
gs1.sum_px = sumpx1
gs1.sum_pxx = sumpxx1
gs2 = GMMStats(dim_c,dim_d)
log_likelihood2 = -4.
T2 = 1
n2 = numpy.array([0.2, 0.8], numpy.float64)
sumpx2 = numpy.array([[2., 1., 3.], [3., 4.1, 3.2]], numpy.float64)
sumpxx2 = numpy.array([[12., 15., 25.], [39., 51., 62.]], numpy.float64)
gs2.log_likelihood = log_likelihood2
gs2.t = T2
gs2.n = n2
gs2.sum_px = sumpx2
gs2.sum_pxx = sumpxx2
data = [gs1, gs2]
# Reference values
acc_Nij_Sigma_wij2_ref1 = {0: numpy.array([[ 0.03202305, -0.02947769], [-0.02947769, 0.0561132 ]]),
1: numpy.array([[ 0.07953279, -0.07829414], [-0.07829414, 0.13814242]])}
acc_Fnorm_Sigma_wij_ref1 = {0: numpy.array([[-0.29622691, 0.61411796], [ 0.09391764, -0.27955961], [-0.39014455, 0.89367757]]),
1: numpy.array([[ 0.04695882, -0.13977981], [-0.05718673, 0.24159665], [-0.17098161, 0.47326585]])}
acc_Snorm_ref1 = numpy.array([16.6, 22.4, 16.6, 61.4, 55., 97.4])
N_ref1 = numpy.array([0.6, 1.4])
t_ref1 = numpy.array([[ 1.59543739, 11.78239235], [ -3.20130371, -6.66379081], [ 4.79674111, 18.44618316],
[ -0.91765407, -1.5319461 ], [ 2.26805901, 3.03434944], [ 2.76600031, 4.9935962 ]])
sigma_ref1 = numpy.array([ 16.39472121, 34.72955353, 3.3108037, 43.73496916, 38.85472445, 68.22116903])
acc_Nij_Sigma_wij2_ref2 = {0: numpy.array([[ 0.50807426, -0.11907756], [-0.11907756, 0.12336544]]),
1: numpy.array([[ 1.18602399, -0.2835859 ], [-0.2835859 , 0.39440498]])}
acc_Fnorm_Sigma_wij_ref2 = {0: numpy.array([[ 0.07221453, 1.1189786 ], [-0.08681275, -0.35396112], [ 0.15902728, 1.47293972]]),
1: numpy.array([[-0.04340637, -0.17698056], [ 0.10662127, 0.21484933],[ 0.13116645, 0.64474271]])}
acc_Snorm_ref2 = numpy.array([16.6, 22.4, 16.6, 61.4, 55., 97.4])
N_ref2 = numpy.array([0.6, 1.4])
t_ref2 = numpy.array([[ 2.93105054, 11.89961223], [ -1.08988119, -3.92120757], [ 4.02093173, 15.82081981],
[ -0.17376634, -0.57366984], [ 0.26585634, 0.73589952], [ 0.60557877, 2.07014704]])
sigma_ref2 = numpy.array([5.12154025e+00, 3.48623823e+01, 1.00000000e-05, 4.37792350e+01, 3.91525332e+01, 6.85613258e+01])
acc_Nij_Sigma_wij2_ref = [acc_Nij_Sigma_wij2_ref1, acc_Nij_Sigma_wij2_ref2]
acc_Fnorm_Sigma_wij_ref = [acc_Fnorm_Sigma_wij_ref1, acc_Fnorm_Sigma_wij_ref2]
acc_Snorm_ref = [acc_Snorm_ref1, acc_Snorm_ref2]
N_ref = [N_ref1, N_ref2]
t_ref = [t_ref1, t_ref2]
sigma_ref = [sigma_ref1, sigma_ref2]
# Machine
t = numpy.array([[1.,2],[4,1],[0,3],[5,8],[7,10],[11,1]])
sigma = numpy.array([1.,2.,1.,3.,2.,4.])
# C++ implementation
# Machine
serial_m = IVectorMachine(ubm, 2)
serial_m.variance_threshold = 1e-5
# SERIAL TRAINER
serial_trainer = IVectorTrainer(update_sigma=True)
serial_m.t = t
serial_m.sigma = sigma
bob.learn.em.train(serial_trainer, serial_m, data, max_iterations=5, initialize=True)
# PARALLEL TRAINER
parallel_m = IVectorMachine(ubm, 2)
parallel_m.variance_threshold = 1e-5
parallel_trainer = IVectorTrainer(update_sigma=True)
parallel_m.t = t
parallel_m.sigma = sigma
bob.learn.em.train(parallel_trainer, parallel_m, data, max_iterations=5, initialize=True, pool=2)
assert numpy.allclose(serial_trainer.acc_nij_wij2, parallel_trainer.acc_nij_wij2, 1e-5)
assert numpy.allclose(serial_trainer.acc_fnormij_wij, parallel_trainer.acc_fnormij_wij, 1e-5)
assert numpy.allclose(serial_trainer.acc_snormij, parallel_trainer.acc_snormij, 1e-5)
assert numpy.allclose(serial_trainer.acc_nij, parallel_trainer.acc_nij, 1e-5)
......@@ -177,6 +177,41 @@ def test_kmeans_b():
assert (numpy.isnan(machine.means).any()) == False
def test_kmeans_parallel():
# Trains a KMeansMachine
(arStd, std) = NormalizeStdArray(datafile("faithful.torch3.hdf5", __name__, path="../data/"))
machine = KMeansMachine(2, 2)
trainer = KMeansTrainer()
# trainer.seed = 1337
import multiprocessing.pool
pool = multiprocessing.pool.ThreadPool(3)
bob.learn.em.train(trainer, machine, arStd, convergence_threshold=0.001, pool = pool)
[variances, weights] = machine.get_variances_and_weights_for_each_cluster(arStd)
means = numpy.array(machine.means)
variances = numpy.array(variances)
multiplyVectorsByFactors(means, std)
multiplyVectorsByFactors(variances, std ** 2)
gmmWeights = bob.io.base.load(datafile('gmm.init_weights.hdf5', __name__, path="../data/"))
gmmMeans = bob.io.base.load(datafile('gmm.init_means.hdf5', __name__, path="../data/"))
gmmVariances = bob.io.base.load(datafile('gmm.init_variances.hdf5', __name__, path="../data/"))
if (means[0, 0] < means[1, 0]):
means = flipRows(means)
variances = flipRows(variances)
weights = flipRows(weights)
assert equals(means, gmmMeans, 1e-3)
assert equals(weights, gmmWeights, 1e-3)
assert equals(variances, gmmVariances, 1e-3)
def test_trainer_execption():
from nose.tools import assert_raises
......
This diff is collapsed.
......@@ -30,7 +30,7 @@ extensions = [
]
# Be picky about warnings
nitpicky = False
nitpicky = True
# Ignores stuff we can't easily resolve on other project's sphinx manuals
nitpick_ignore = []
......
2.1.3b0
\ No newline at end of file
2.2.0b0
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment