Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • bob/bob.learn.em
1 result
Show changes
Showing
with 1620 additions and 1036 deletions
...@@ -17,7 +17,7 @@ static inline bool f(PyObject* o){return o != 0 && PyObject_IsTrue(o) > 0;} /* ...@@ -17,7 +17,7 @@ static inline bool f(PyObject* o){return o != 0 && PyObject_IsTrue(o) > 0;} /*
static auto ML_GMMTrainer_doc = bob::extension::ClassDoc( static auto ML_GMMTrainer_doc = bob::extension::ClassDoc(
BOB_EXT_MODULE_PREFIX ".ML_GMMTrainer", BOB_EXT_MODULE_PREFIX ".ML_GMMTrainer",
"This class implements the maximum likelihood M-step of the expectation-maximisation algorithm for a GMM Machine." "This class implements the maximum likelihood M-step (:ref:`MLE <mle>`) of the expectation-maximisation algorithm for a GMM Machine."
).add_constructor( ).add_constructor(
bob::extension::FunctionDoc( bob::extension::FunctionDoc(
"__init__", "__init__",
......
...@@ -15,159 +15,179 @@ from bob.io.base.test_utils import datafile ...@@ -15,159 +15,179 @@ from bob.io.base.test_utils import datafile
from bob.learn.em import KMeansMachine, KMeansTrainer from bob.learn.em import KMeansMachine, KMeansTrainer
def equals(x, y, epsilon): def equals(x, y, epsilon):
return (abs(x - y) < epsilon).all() return (abs(x - y) < epsilon).all()
def kmeans_plus_plus(machine, data, seed): def kmeans_plus_plus(machine, data, seed):
"""Python implementation of K-Means++ (initialization)""" """Python implementation of K-Means++ (initialization)"""
n_data = data.shape[0] n_data = data.shape[0]
rng = bob.core.random.mt19937(seed) rng = bob.core.random.mt19937(seed)
u = bob.core.random.uniform('int32', 0, n_data-1) u = bob.core.random.uniform('int32', 0, n_data - 1)
index = u(rng) index = u(rng)
machine.set_mean(0, data[index,:]) machine.set_mean(0, data[index, :])
weights = numpy.zeros(shape=(n_data,), dtype=numpy.float64) weights = numpy.zeros(shape=(n_data,), dtype=numpy.float64)
for m in range(1,machine.dim_c): for m in range(1, machine.dim_c):
for s in range(n_data): for s in range(n_data):
s_cur = data[s,:] s_cur = data[s, :]
w_cur = machine.get_distance_from_mean(s_cur, 0) w_cur = machine.get_distance_from_mean(s_cur, 0)
for i in range(m): for i in range(m):
w_cur = min(machine.get_distance_from_mean(s_cur, i), w_cur) w_cur = min(machine.get_distance_from_mean(s_cur, i), w_cur)
weights[s] = w_cur weights[s] = w_cur
weights *= weights weights *= weights
weights /= numpy.sum(weights) weights /= numpy.sum(weights)
d = bob.core.random.discrete('int32', weights) d = bob.core.random.discrete('int32', weights)
index = d(rng) index = d(rng)
machine.set_mean(m, data[index,:]) machine.set_mean(m, data[index, :])
def NormalizeStdArray(path): def NormalizeStdArray(path):
array = bob.io.base.load(path).astype('float64') array = bob.io.base.load(path).astype('float64')
std = array.std(axis=0) std = array.std(axis=0)
return (array/std, std) return (array / std, std)
def multiplyVectorsByFactors(matrix, vector): def multiplyVectorsByFactors(matrix, vector):
for i in range(0, matrix.shape[0]): for i in range(0, matrix.shape[0]):
for j in range(0, matrix.shape[1]): for j in range(0, matrix.shape[1]):
matrix[i, j] *= vector[j] matrix[i, j] *= vector[j]
def flipRows(array): def flipRows(array):
if len(array.shape) == 2: if len(array.shape) == 2:
return numpy.array([numpy.array(array[1, :]), numpy.array(array[0, :])], 'float64') return numpy.array([numpy.array(array[1, :]), numpy.array(array[0, :])], 'float64')
elif len(array.shape) == 1: elif len(array.shape) == 1:
return numpy.array([array[1], array[0]], 'float64') return numpy.array([array[1], array[0]], 'float64')
else: else:
raise Exception('Input type not supportd by flipRows') raise Exception('Input type not supportd by flipRows')
if hasattr(KMeansTrainer, 'KMEANS_PLUS_PLUS'): if hasattr(KMeansTrainer, 'KMEANS_PLUS_PLUS'):
def test_kmeans_plus_plus(): def test_kmeans_plus_plus():
# Tests the K-Means++ initialization
dim_c = 5
dim_d = 7
n_samples = 150
data = numpy.random.randn(n_samples, dim_d)
seed = 0
# C++ implementation
machine = KMeansMachine(dim_c, dim_d)
trainer = KMeansTrainer()
trainer.rng = bob.core.random.mt19937(seed)
trainer.initialization_method = 'KMEANS_PLUS_PLUS'
trainer.initialize(machine, data)
# Python implementation
py_machine = KMeansMachine(dim_c, dim_d)
kmeans_plus_plus(py_machine, data, seed)
assert equals(machine.means, py_machine.means, 1e-8)
# Tests the K-Means++ initialization
dim_c = 5
dim_d = 7
n_samples = 150
data = numpy.random.randn(n_samples,dim_d)
seed = 0
# C++ implementation def test_kmeans_noduplicate():
# Data/dimensions
dim_c = 2
dim_d = 3
seed = 0
data = numpy.array([[1, 2, 3], [1, 2, 3], [1, 2, 3], [4, 5, 6.]])
# Defines machine and trainer
machine = KMeansMachine(dim_c, dim_d) machine = KMeansMachine(dim_c, dim_d)
trainer = KMeansTrainer() trainer = KMeansTrainer()
trainer.rng = bob.core.random.mt19937(seed) rng = bob.core.random.mt19937(seed)
trainer.initialization_method = 'KMEANS_PLUS_PLUS' trainer.initialization_method = 'RANDOM_NO_DUPLICATE'
trainer.initialize(machine, data) trainer.initialize(machine, data, rng)
# Makes sure that the two initial mean vectors selected are different
# Python implementation assert equals(machine.get_mean(0), machine.get_mean(1), 1e-8) == False
py_machine = KMeansMachine(dim_c, dim_d)
kmeans_plus_plus(py_machine, data, seed)
assert equals(machine.means, py_machine.means, 1e-8)
def test_kmeans_noduplicate():
# Data/dimensions
dim_c = 2
dim_d = 3
seed = 0
data = numpy.array([[1,2,3],[1,2,3],[1,2,3],[4,5,6.]])
# Defines machine and trainer
machine = KMeansMachine(dim_c, dim_d)
trainer = KMeansTrainer()
rng = bob.core.random.mt19937(seed)
trainer.initialization_method = 'RANDOM_NO_DUPLICATE'
trainer.initialize(machine, data, rng)
# Makes sure that the two initial mean vectors selected are different
assert equals(machine.get_mean(0), machine.get_mean(1), 1e-8) == False
def test_kmeans_a(): def test_kmeans_a():
# Trains a KMeansMachine
# This files contains draws from two 1D Gaussian distributions:
# * 100 samples from N(-10,1)
# * 100 samples from N(10,1)
data = bob.io.base.load(datafile("samplesFrom2G_f64.hdf5", __name__, path="../data/"))
# Trains a KMeansMachine machine = KMeansMachine(2, 1)
# This files contains draws from two 1D Gaussian distributions:
# * 100 samples from N(-10,1)
# * 100 samples from N(10,1)
data = bob.io.base.load(datafile("samplesFrom2G_f64.hdf5", __name__, path="../data/"))
machine = KMeansMachine(2, 1)
trainer = KMeansTrainer()
#trainer.train(machine, data)
bob.learn.em.train(trainer,machine,data)
[variances, weights] = machine.get_variances_and_weights_for_each_cluster(data) trainer = KMeansTrainer()
variances_b = numpy.ndarray(shape=(2,1), dtype=numpy.float64) # trainer.train(machine, data)
weights_b = numpy.ndarray(shape=(2,), dtype=numpy.float64) bob.learn.em.train(trainer, machine, data)
machine.__get_variances_and_weights_for_each_cluster_init__(variances_b, weights_b)
machine.__get_variances_and_weights_for_each_cluster_acc__(data, variances_b, weights_b) [variances, weights] = machine.get_variances_and_weights_for_each_cluster(data)
machine.__get_variances_and_weights_for_each_cluster_fin__(variances_b, weights_b) variances_b = numpy.ndarray(shape=(2, 1), dtype=numpy.float64)
m1 = machine.get_mean(0) weights_b = numpy.ndarray(shape=(2,), dtype=numpy.float64)
m2 = machine.get_mean(1) machine.__get_variances_and_weights_for_each_cluster_init__(variances_b, weights_b)
machine.__get_variances_and_weights_for_each_cluster_acc__(data, variances_b, weights_b)
machine.__get_variances_and_weights_for_each_cluster_fin__(variances_b, weights_b)
m1 = machine.get_mean(0)
m2 = machine.get_mean(1)
## Check means [-10,10] / variances [1,1] / weights [0.5,0.5]
if (m1 < m2):
means = numpy.array(([m1[0], m2[0]]), 'float64')
else:
means = numpy.array(([m2[0], m1[0]]), 'float64')
assert equals(means, numpy.array([-10., 10.]), 2e-1)
assert equals(variances, numpy.array([1., 1.]), 2e-1)
assert equals(weights, numpy.array([0.5, 0.5]), 1e-3)
assert equals(variances, variances_b, 1e-8)
assert equals(weights, weights_b, 1e-8)
## Check means [-10,10] / variances [1,1] / weights [0.5,0.5]
if(m1<m2): means=numpy.array(([m1[0],m2[0]]), 'float64')
else: means=numpy.array(([m2[0],m1[0]]), 'float64')
assert equals(means, numpy.array([-10.,10.]), 2e-1)
assert equals(variances, numpy.array([1.,1.]), 2e-1)
assert equals(weights, numpy.array([0.5,0.5]), 1e-3)
assert equals(variances, variances_b, 1e-8) def test_kmeans_b():
assert equals(weights, weights_b, 1e-8) # Trains a KMeansMachine
(arStd, std) = NormalizeStdArray(datafile("faithful.torch3.hdf5", __name__, path="../data/"))
machine = KMeansMachine(2, 2)
trainer = KMeansTrainer()
# trainer.seed = 1337
bob.learn.em.train(trainer, machine, arStd, convergence_threshold=0.001)
def test_kmeans_b(): [variances, weights] = machine.get_variances_and_weights_for_each_cluster(arStd)
# Trains a KMeansMachine means = numpy.array(machine.means)
(arStd,std) = NormalizeStdArray(datafile("faithful.torch3.hdf5", __name__, path="../data/")) variances = numpy.array(variances)
machine = KMeansMachine(2, 2) multiplyVectorsByFactors(means, std)
multiplyVectorsByFactors(variances, std ** 2)
trainer = KMeansTrainer() gmmWeights = bob.io.base.load(datafile('gmm.init_weights.hdf5', __name__, path="../data/"))
#trainer.seed = 1337 gmmMeans = bob.io.base.load(datafile('gmm.init_means.hdf5', __name__, path="../data/"))
bob.learn.em.train(trainer,machine, arStd, convergence_threshold=0.001) gmmVariances = bob.io.base.load(datafile('gmm.init_variances.hdf5', __name__, path="../data/"))
[variances, weights] = machine.get_variances_and_weights_for_each_cluster(arStd) if (means[0, 0] < means[1, 0]):
means = flipRows(means)
variances = flipRows(variances)
weights = flipRows(weights)
means = numpy.array(machine.means) assert equals(means, gmmMeans, 1e-3)
variances = numpy.array(variances) assert equals(weights, gmmWeights, 1e-3)
assert equals(variances, gmmVariances, 1e-3)
multiplyVectorsByFactors(means, std) # Check that there is no duplicate means during initialization
multiplyVectorsByFactors(variances, std ** 2) machine = KMeansMachine(2, 1)
trainer = KMeansTrainer()
trainer.initialization_method = 'RANDOM_NO_DUPLICATE'
data = numpy.array([[1.], [1.], [1.], [1.], [1.], [1.], [2.], [3.]])
bob.learn.em.train(trainer, machine, data)
assert (numpy.isnan(machine.means).any()) == False
gmmWeights = bob.io.base.load(datafile('gmm.init_weights.hdf5', __name__, path="../data/"))
gmmMeans = bob.io.base.load(datafile('gmm.init_means.hdf5', __name__, path="../data/"))
gmmVariances = bob.io.base.load(datafile('gmm.init_variances.hdf5', __name__, path="../data/"))
if (means[0, 0] < means[1, 0]): def test_trainer_execption():
means = flipRows(means) from nose.tools import assert_raises
variances = flipRows(variances)
weights = flipRows(weights)
assert equals(means, gmmMeans, 1e-3) # Testing Inf
assert equals(weights, gmmWeights, 1e-3) machine = KMeansMachine(2, 2)
assert equals(variances, gmmVariances, 1e-3) data = numpy.array([[1.0, 2.0], [2, 3.], [1, 1.], [2, 5.], [numpy.inf, 1.0]])
trainer = KMeansTrainer()
assert_raises(ValueError, bob.learn.em.train, trainer, machine, data, 10)
# Check that there is no duplicate means during initialization # Testing Nan
machine = KMeansMachine(2, 1) machine = KMeansMachine(2, 2)
trainer = KMeansTrainer() data = numpy.array([[1.0, 2.0], [2, 3.], [1, numpy.nan], [2, 5.], [2.0, 1.0]])
trainer.initialization_method = 'RANDOM_NO_DUPLICATE' trainer = KMeansTrainer()
data = numpy.array([[1.], [1.], [1.], [1.], [1.], [1.], [2.], [3.]]) assert_raises(ValueError, bob.learn.em.train, trainer, machine, data, 10)
bob.learn.em.train(trainer, machine, data)
assert (numpy.isnan(machine.means).any()) == False
...@@ -7,112 +7,125 @@ ...@@ -7,112 +7,125 @@
import numpy import numpy
import bob.learn.em import bob.learn.em
import logging import logging
logger = logging.getLogger('bob.learn.em') logger = logging.getLogger('bob.learn.em')
def train(trainer, machine, data, max_iterations = 50, convergence_threshold=None, initialize=True, rng=None):
def train(trainer, machine, data, max_iterations=50, convergence_threshold=None, initialize=True, rng=None,
""" check_inputs=True):
Trains a machine given a trainer and the proper data """
Trains a machine given a trainer and the proper data
**Parameters**:
trainer : one of :py:class:`KMeansTrainer`, :py:class:`MAP_GMMTrainer`, :py:class:`ML_GMMTrainer`, :py:class:`ISVTrainer`, :py:class:`IVectorTrainer`, :py:class:`PLDATrainer`, :py:class:`EMPCATrainer` **Parameters**:
A trainer mechanism trainer : one of :py:class:`KMeansTrainer`, :py:class:`MAP_GMMTrainer`, :py:class:`ML_GMMTrainer`, :py:class:`ISVTrainer`, :py:class:`IVectorTrainer`, :py:class:`PLDATrainer`, :py:class:`EMPCATrainer`
machine : one of :py:class:`KMeansMachine`, :py:class:`GMMMachine`, :py:class:`ISVBase`, :py:class:`IVectorMachine`, :py:class:`PLDAMachine`, :py:class:`bob.learn.linear.Machine` A trainer mechanism
A container machine machine : one of :py:class:`KMeansMachine`, :py:class:`GMMMachine`, :py:class:`ISVBase`, :py:class:`IVectorMachine`, :py:class:`PLDAMachine`, :py:class:`bob.learn.linear.Machine`
data : array_like <float, 2D> A container machine
The data to be trained data : array_like <float, 2D>
max_iterations : int The data to be trained
The maximum number of iterations to train a machine max_iterations : int
convergence_threshold : float The maximum number of iterations to train a machine
The convergence threshold to train a machine. If None, the training procedure will stop with the iterations criteria convergence_threshold : float
initialize : bool The convergence threshold to train a machine. If None, the training procedure will stop with the iterations criteria
If True, runs the initialization procedure initialize : bool
rng : :py:class:`bob.core.random.mt19937` If True, runs the initialization procedure
The Mersenne Twister mt19937 random generator used for the initialization of subspaces/arrays before the EM loop rng : :py:class:`bob.core.random.mt19937`
""" The Mersenne Twister mt19937 random generator used for the initialization of subspaces/arrays before the EM loop
#Initialization check_inputs:
if initialize: Shallow checks in the inputs. Check for inf and NaN
if rng is not None: """
trainer.initialize(machine, data, rng)
else: if check_inputs and type(data) is numpy.ndarray:
trainer.initialize(machine, data)
if numpy.isinf(numpy.sum(data)):
trainer.e_step(machine, data) raise ValueError("Please, check your inputs; numpy.inf detected in `data` ")
average_output = 0
average_output_previous = 0 if numpy.isnan(numpy.sum(data)):
raise ValueError("Please, check your inputs; numpy.nan detected in `data` ")
if hasattr(trainer,"compute_likelihood"):
average_output = trainer.compute_likelihood(machine) # Initialization
if initialize:
for i in range(max_iterations): if rng is not None:
logger.info("Iteration = %d/%d", i, max_iterations) trainer.initialize(machine, data, rng)
average_output_previous = average_output else:
trainer.m_step(machine, data) trainer.initialize(machine, data)
trainer.e_step(machine, data) trainer.e_step(machine, data)
average_output = 0
if hasattr(trainer,"compute_likelihood"): average_output_previous = 0
average_output = trainer.compute_likelihood(machine)
if hasattr(trainer, "compute_likelihood"):
if type(machine) is bob.learn.em.KMeansMachine: average_output = trainer.compute_likelihood(machine)
logger.info("average euclidean distance = %f", average_output)
else: for i in range(max_iterations):
logger.info("log likelihood = %f", average_output) logger.info("Iteration = %d/%d", i, max_iterations)
average_output_previous = average_output
convergence_value = abs((average_output_previous - average_output)/average_output_previous) trainer.m_step(machine, data)
logger.info("convergence value = %f",convergence_value) trainer.e_step(machine, data)
#Terminates if converged (and likelihood computation is set) if hasattr(trainer, "compute_likelihood"):
if convergence_threshold!=None and convergence_value <= convergence_threshold: average_output = trainer.compute_likelihood(machine)
break
if hasattr(trainer,"finalize"): if type(machine) is bob.learn.em.KMeansMachine:
trainer.finalize(machine, data) logger.info("average euclidean distance = %f", average_output)
else:
logger.info("log likelihood = %f", average_output)
convergence_value = abs((average_output_previous - average_output) / average_output_previous)
logger.info("convergence value = %f", convergence_value)
# Terminates if converged (and likelihood computation is set)
if convergence_threshold != None and convergence_value <= convergence_threshold:
break
if hasattr(trainer, "finalize"):
trainer.finalize(machine, data)
def train_jfa(trainer, jfa_base, data, max_iterations=10, initialize=True, rng=None): def train_jfa(trainer, jfa_base, data, max_iterations=10, initialize=True, rng=None):
""" """
Trains a :py:class:`bob.learn.em.JFABase` given a :py:class:`bob.learn.em.JFATrainer` and the proper data Trains a :py:class:`bob.learn.em.JFABase` given a :py:class:`bob.learn.em.JFATrainer` and the proper data
**Parameters**: **Parameters**:
trainer : :py:class:`bob.learn.em.JFATrainer` trainer : :py:class:`bob.learn.em.JFATrainer`
A JFA trainer mechanism A JFA trainer mechanism
jfa_base : :py:class:`bob.learn.em.JFABase` jfa_base : :py:class:`bob.learn.em.JFABase`
A container machine A container machine
data : [[:py:class:`bob.learn.em.GMMStats`]] data : [[:py:class:`bob.learn.em.GMMStats`]]
The data to be trained The data to be trained
max_iterations : int max_iterations : int
The maximum number of iterations to train a machine The maximum number of iterations to train a machine
initialize : bool initialize : bool
If True, runs the initialization procedure If True, runs the initialization procedure
rng : :py:class:`bob.core.random.mt19937` rng : :py:class:`bob.core.random.mt19937`
The Mersenne Twister mt19937 random generator used for the initialization of subspaces/arrays before the EM loops The Mersenne Twister mt19937 random generator used for the initialization of subspaces/arrays before the EM loops
""" """
if initialize: if initialize:
if rng is not None: if rng is not None:
trainer.initialize(jfa_base, data, rng) trainer.initialize(jfa_base, data, rng)
else: else:
trainer.initialize(jfa_base, data) trainer.initialize(jfa_base, data)
#V Subspace # V Subspace
logger.info("V subspace estimation...") logger.info("V subspace estimation...")
for i in range(max_iterations): for i in range(max_iterations):
logger.info("Iteration = %d/%d", i, max_iterations) logger.info("Iteration = %d/%d", i, max_iterations)
trainer.e_step_v(jfa_base, data) trainer.e_step_v(jfa_base, data)
trainer.m_step_v(jfa_base, data) trainer.m_step_v(jfa_base, data)
trainer.finalize_v(jfa_base, data) trainer.finalize_v(jfa_base, data)
#U subspace # U subspace
logger.info("U subspace estimation...") logger.info("U subspace estimation...")
for i in range(max_iterations): for i in range(max_iterations):
logger.info("Iteration = %d/%d", i, max_iterations) logger.info("Iteration = %d/%d", i, max_iterations)
trainer.e_step_u(jfa_base, data) trainer.e_step_u(jfa_base, data)
trainer.m_step_u(jfa_base, data) trainer.m_step_u(jfa_base, data)
trainer.finalize_u(jfa_base, data) trainer.finalize_u(jfa_base, data)
# D subspace # D subspace
logger.info("D subspace estimation...") logger.info("D subspace estimation...")
for i in range(max_iterations): for i in range(max_iterations):
logger.info("Iteration = %d/%d", i, max_iterations) logger.info("Iteration = %d/%d", i, max_iterations)
trainer.e_step_d(jfa_base, data) trainer.e_step_d(jfa_base, data)
trainer.m_step_d(jfa_base, data) trainer.m_step_d(jfa_base, data)
trainer.finalize_d(jfa_base, data) trainer.finalize_d(jfa_base, data)
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
/*** zt_norm ***/ /*** zt_norm ***/
bob::extension::FunctionDoc zt_norm = bob::extension::FunctionDoc( bob::extension::FunctionDoc zt_norm = bob::extension::FunctionDoc(
"ztnorm", "ztnorm",
"Normalise raw scores with ZT-Norm." "Normalise raw scores with :ref:`ZT-Norm <ztnorm>`."
"Assume that znorm and tnorm have no common subject id.", "Assume that znorm and tnorm have no common subject id.",
0, 0,
true true
...@@ -72,7 +72,7 @@ PyObject* PyBobLearnEM_ztNorm(PyObject*, PyObject* args, PyObject* kwargs) { ...@@ -72,7 +72,7 @@ PyObject* PyBobLearnEM_ztNorm(PyObject*, PyObject* args, PyObject* kwargs) {
/*** t_norm ***/ /*** t_norm ***/
bob::extension::FunctionDoc t_norm = bob::extension::FunctionDoc( bob::extension::FunctionDoc t_norm = bob::extension::FunctionDoc(
"tnorm", "tnorm",
"Normalise raw scores with T-Norm", "Normalise raw scores with :ref:`T-Norm <tnorm>`",
0, 0,
true true
) )
...@@ -109,7 +109,7 @@ PyObject* PyBobLearnEM_tNorm(PyObject*, PyObject* args, PyObject* kwargs) { ...@@ -109,7 +109,7 @@ PyObject* PyBobLearnEM_tNorm(PyObject*, PyObject* args, PyObject* kwargs) {
/*** z_norm ***/ /*** z_norm ***/
bob::extension::FunctionDoc z_norm = bob::extension::FunctionDoc( bob::extension::FunctionDoc z_norm = bob::extension::FunctionDoc(
"znorm", "znorm",
"Normalise raw scores with Z-Norm", "Normalise raw scores with :ref:`Z-Norm <znorm>`",
0, 0,
true true
) )
......
##############################################################################
#
# Copyright (c) 2006 Zope Foundation and Contributors.
# All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE.
#
##############################################################################
"""Bootstrap a buildout-based project
Simply run this script in a directory containing a buildout.cfg.
The script accepts buildout command-line options, so you can
use the -c option to specify an alternate configuration file.
"""
import os
import shutil
import sys
import tempfile
from optparse import OptionParser
__version__ = '2015-07-01'
# See zc.buildout's changelog if this version is up to date.
tmpeggs = tempfile.mkdtemp(prefix='bootstrap-')
usage = '''\
[DESIRED PYTHON FOR BUILDOUT] bootstrap.py [options]
Bootstraps a buildout-based project.
Simply run this script in a directory containing a buildout.cfg, using the
Python that you want bin/buildout to use.
Note that by using --find-links to point to local resources, you can keep
this script from going over the network.
'''
parser = OptionParser(usage=usage)
parser.add_option("--version",
action="store_true", default=False,
help=("Return bootstrap.py version."))
parser.add_option("-t", "--accept-buildout-test-releases",
dest='accept_buildout_test_releases',
action="store_true", default=False,
help=("Normally, if you do not specify a --version, the "
"bootstrap script and buildout gets the newest "
"*final* versions of zc.buildout and its recipes and "
"extensions for you. If you use this flag, "
"bootstrap and buildout will get the newest releases "
"even if they are alphas or betas."))
parser.add_option("-c", "--config-file",
help=("Specify the path to the buildout configuration "
"file to be used."))
parser.add_option("-f", "--find-links",
help=("Specify a URL to search for buildout releases"))
parser.add_option("--allow-site-packages",
action="store_true", default=False,
help=("Let bootstrap.py use existing site packages"))
parser.add_option("--buildout-version",
help="Use a specific zc.buildout version")
parser.add_option("--setuptools-version",
help="Use a specific setuptools version")
parser.add_option("--setuptools-to-dir",
help=("Allow for re-use of existing directory of "
"setuptools versions"))
options, args = parser.parse_args()
if options.version:
print("bootstrap.py version %s" % __version__)
sys.exit(0)
######################################################################
# load/install setuptools
try:
from urllib.request import urlopen
except ImportError:
from urllib2 import urlopen
ez = {}
if os.path.exists('ez_setup.py'):
exec(open('ez_setup.py').read(), ez)
else:
exec(urlopen('https://bootstrap.pypa.io/ez_setup.py').read(), ez)
if not options.allow_site_packages:
# ez_setup imports site, which adds site packages
# this will remove them from the path to ensure that incompatible versions
# of setuptools are not in the path
import site
# inside a virtualenv, there is no 'getsitepackages'.
# We can't remove these reliably
if hasattr(site, 'getsitepackages'):
for sitepackage_path in site.getsitepackages():
# Strip all site-packages directories from sys.path that
# are not sys.prefix; this is because on Windows
# sys.prefix is a site-package directory.
if sitepackage_path != sys.prefix:
sys.path[:] = [x for x in sys.path
if sitepackage_path not in x]
setup_args = dict(to_dir=tmpeggs, download_delay=0)
if options.setuptools_version is not None:
setup_args['version'] = options.setuptools_version
if options.setuptools_to_dir is not None:
setup_args['to_dir'] = options.setuptools_to_dir
ez['use_setuptools'](**setup_args)
import setuptools
import pkg_resources
# This does not (always?) update the default working set. We will
# do it.
for path in sys.path:
if path not in pkg_resources.working_set.entries:
pkg_resources.working_set.add_entry(path)
######################################################################
# Install buildout
ws = pkg_resources.working_set
setuptools_path = ws.find(
pkg_resources.Requirement.parse('setuptools')).location
# Fix sys.path here as easy_install.pth added before PYTHONPATH
cmd = [sys.executable, '-c',
'import sys; sys.path[0:0] = [%r]; ' % setuptools_path +
'from setuptools.command.easy_install import main; main()',
'-mZqNxd', tmpeggs]
find_links = os.environ.get(
'bootstrap-testing-find-links',
options.find_links or
('http://downloads.buildout.org/'
if options.accept_buildout_test_releases else None)
)
if find_links:
cmd.extend(['-f', find_links])
requirement = 'zc.buildout'
version = options.buildout_version
if version is None and not options.accept_buildout_test_releases:
# Figure out the most recent final version of zc.buildout.
import setuptools.package_index
_final_parts = '*final-', '*final'
def _final_version(parsed_version):
try:
return not parsed_version.is_prerelease
except AttributeError:
# Older setuptools
for part in parsed_version:
if (part[:1] == '*') and (part not in _final_parts):
return False
return True
index = setuptools.package_index.PackageIndex(
search_path=[setuptools_path])
if find_links:
index.add_find_links((find_links,))
req = pkg_resources.Requirement.parse(requirement)
if index.obtain(req) is not None:
best = []
bestv = None
for dist in index[req.project_name]:
distv = dist.parsed_version
if _final_version(distv):
if bestv is None or distv > bestv:
best = [dist]
bestv = distv
elif distv == bestv:
best.append(dist)
if best:
best.sort()
version = best[-1].version
if version:
requirement = '=='.join((requirement, version))
cmd.append(requirement)
import subprocess
if subprocess.call(cmd) != 0:
raise Exception(
"Failed to execute command:\n%s" % repr(cmd)[1:-1])
######################################################################
# Import and run buildout
ws.add_entry(tmpeggs)
ws.require(requirement)
import zc.buildout.buildout
if not [a for a in args if '=' not in a]:
args.append('bootstrap')
# if -c was provided, we push it back into args for buildout' main function
if options.config_file is not None:
args[0:0] = ['-c', options.config_file]
zc.buildout.buildout.main(args)
shutil.rmtree(tmpeggs)
...@@ -25,6 +25,7 @@ extensions = [ ...@@ -25,6 +25,7 @@ extensions = [
'sphinx.ext.intersphinx', 'sphinx.ext.intersphinx',
'sphinx.ext.napoleon', 'sphinx.ext.napoleon',
'sphinx.ext.viewcode', 'sphinx.ext.viewcode',
'matplotlib.sphinxext.plot_directive'
] ]
import sphinx import sphinx
...@@ -231,7 +232,6 @@ autodoc_member_order = 'bysource' ...@@ -231,7 +232,6 @@ autodoc_member_order = 'bysource'
autodoc_default_flags = [ autodoc_default_flags = [
'members', 'members',
'undoc-members', 'undoc-members',
'inherited-members',
'show-inheritance', 'show-inheritance',
] ]
......
This diff is collapsed.
.. vim: set fileencoding=utf-8 : .. vim: set fileencoding=utf-8 :
.. Tiago de Freitas Pereira <tiago.pereira@idiap.ch>
.. Tue 17 Feb 2015 13:50:06 CET
..
.. Copyright (C) 2011-2014 Idiap Research Institute, Martigny, Switzerland
.. _bob.learn.em: .. _bob.learn.em:
...@@ -10,12 +6,12 @@ ...@@ -10,12 +6,12 @@
Expectation Maximization Machine Learning Tools Expectation Maximization Machine Learning Tools
================================================ ================================================
The EM algorithm is an iterative method that estimates parameters for statistical models, where the model depends on unobserved latent variables. The EM iteration alternates between performing an expectation (E) step, which creates a function for the expectation of the log-likelihood evaluated using the current estimate for the parameters, and a maximization (M) step, which computes parameters maximizing the expected log-likelihood found on the E step. These parameter-estimates are then used to determine the distribution of the latent variables in the next E step [WikiEM]_. This package is a part of Bob_. It implements a general EM algorithm and
includes implementations of the following algorithms:
The package includes the machine definition per se and a selection of different trainers for specialized purposes: - K-Means
- Maximum Likelihood (ML) - Maximum Likelihood (ML)
- Maximum a Posteriori (MAP) - Maximum a Posteriori (MAP)
- K-Means
- Inter Session Variability Modelling (ISV) - Inter Session Variability Modelling (ISV)
- Joint Factor Analysis (JFA) - Joint Factor Analysis (JFA)
- Total Variability Modeling (iVectors) - Total Variability Modeling (iVectors)
...@@ -31,7 +27,7 @@ Documentation ...@@ -31,7 +27,7 @@ Documentation
guide guide
py_api py_api
References References
----------- -----------
...@@ -47,7 +43,9 @@ References ...@@ -47,7 +43,9 @@ References
.. [Roweis1998] Roweis, Sam. "EM algorithms for PCA and SPCA." Advances in neural information processing systems (1998): 626-632. .. [Roweis1998] Roweis, Sam. "EM algorithms for PCA and SPCA." Advances in neural information processing systems (1998): 626-632.
.. [WikiEM] `Expectation Maximization <http://en.wikipedia.org/wiki/Expectation%E2%80%93maximization_algorithm>`_ .. [WikiEM] `Expectation Maximization <http://en.wikipedia.org/wiki/Expectation%E2%80%93maximization_algorithm>`_
.. [Glembek2009] Glembek, Ondrej, et al. "Comparison of scoring methods used in speaker recognition with joint factor analysis." Acoustics, Speech and Signal Processing, 2009. ICASSP 2009. IEEE International Conference on. IEEE, 2009.
.. [Auckenthaler2000] Auckenthaler, Roland, Michael Carey, and Harvey Lloyd-Thomas. "Score normalization for text-independent speaker verification systems." Digital Signal Processing 10.1 (2000): 42-54.
.. [Mariethoz2005] Mariethoz, Johnny, and Samy Bengio. "A unified framework for score normalization techniques applied to text-independent speaker verification." IEEE signal processing letters 12.7 (2005): 532-535.
Indices and tables Indices and tables
......
...@@ -12,6 +12,7 @@ ...@@ -12,6 +12,7 @@
.. _blitz++: http://www.oonumerics.org/blitz .. _blitz++: http://www.oonumerics.org/blitz
.. _bob's idiap guide: https://gitlab.idiap.ch/bob/bob/wikis/Using-Bob-at-Idiap .. _bob's idiap guide: https://gitlab.idiap.ch/bob/bob/wikis/Using-Bob-at-Idiap
.. _bob's website: https://www.idiap.ch/software/bob .. _bob's website: https://www.idiap.ch/software/bob
.. _bob: https://www.idiap.ch/software/bob
.. _boost: http://www.boost.org .. _boost: http://www.boost.org
.. _buildbot: http://trac.buildbot.net .. _buildbot: http://trac.buildbot.net
.. _buildout: http://pypi.python.org/pypi/zc.buildout/ .. _buildout: http://pypi.python.org/pypi/zc.buildout/
......
import bob.db.iris
import bob.learn.em
import bob.learn.linear
import matplotlib.pyplot as plt
import numpy
numpy.random.seed(2) # FIXING A SEED
def train_ubm(features, n_gaussians):
"""
Train UBM
**Parameters**
features: 2D numpy array with the features
n_gaussians: Number of Gaussians
"""
input_size = features.shape[1]
kmeans_machine = bob.learn.em.KMeansMachine(int(n_gaussians), input_size)
ubm = bob.learn.em.GMMMachine(int(n_gaussians), input_size)
# The K-means clustering is firstly used to used to estimate the initial
# means, the final variances and the final weights for each gaussian
# component
kmeans_trainer = bob.learn.em.KMeansTrainer('RANDOM_NO_DUPLICATE')
bob.learn.em.train(kmeans_trainer, kmeans_machine, features)
# Getting the means, weights and the variances for each cluster. This is a
# very good estimator for the ML
(variances, weights) = kmeans_machine.get_variances_and_weights_for_each_cluster(features)
means = kmeans_machine.means
# initialize the UBM with the output of kmeans
ubm.means = means
ubm.variances = variances
ubm.weights = weights
# Creating the ML Trainer. We will adapt only the means
trainer = bob.learn.em.ML_GMMTrainer(
update_means=True, update_variances=False, update_weights=False)
bob.learn.em.train(trainer, ubm, features)
return ubm
def isv_train(features, ubm):
"""
Train U matrix
**Parameters**
features: List of :py:class:`bob.learn.em.GMMStats` organized by class
n_gaussians: UBM (:py:class:`bob.learn.em.GMMMachine`)
"""
stats = []
for user in features:
user_stats = []
for f in user:
s = bob.learn.em.GMMStats(ubm.shape[0], ubm.shape[1])
ubm.acc_statistics(f, s)
user_stats.append(s)
stats.append(user_stats)
relevance_factor = 4
subspace_dimension_of_u = 1
isvbase = bob.learn.em.ISVBase(ubm, subspace_dimension_of_u)
trainer = bob.learn.em.ISVTrainer(relevance_factor)
# trainer.rng = bob.core.random.mt19937(int(self.init_seed))
bob.learn.em.train(trainer, isvbase, stats, max_iterations=50)
return isvbase
# GENERATING DATA
data_per_class = bob.db.iris.data()
setosa = numpy.column_stack(
(data_per_class['setosa'][:, 0], data_per_class['setosa'][:, 3]))
versicolor = numpy.column_stack(
(data_per_class['versicolor'][:, 0], data_per_class['versicolor'][:, 3]))
virginica = numpy.column_stack(
(data_per_class['virginica'][:, 0], data_per_class['virginica'][:, 3]))
data = numpy.vstack((setosa, versicolor, virginica))
# TRAINING THE PRIOR
ubm = train_ubm(data, 3)
isvbase = isv_train([setosa, versicolor, virginica], ubm)
# Variability direction
u0 = isvbase.u[0:2, 0] / numpy.linalg.norm(isvbase.u[0:2, 0])
u1 = isvbase.u[2:4, 0] / numpy.linalg.norm(isvbase.u[2:4, 0])
u2 = isvbase.u[4:6, 0] / numpy.linalg.norm(isvbase.u[4:6, 0])
figure, ax = plt.subplots()
plt.scatter(setosa[:, 0], setosa[:, 1], c="darkcyan", label="setosa")
plt.scatter(versicolor[:, 0], versicolor[:, 1],
c="goldenrod", label="versicolor")
plt.scatter(virginica[:, 0], virginica[:, 1], c="dimgrey", label="virginica")
plt.scatter(ubm.means[:, 0], ubm.means[:, 1], c="blue",
marker="x", label="centroids - mle")
# plt.scatter(ubm.means[:, 0], ubm.means[:, 1], c="blue",
# marker=".", label="within class varibility", s=0.01)
ax.arrow(ubm.means[0, 0], ubm.means[0, 1], u0[0], u0[1],
fc="k", ec="k", head_width=0.05, head_length=0.1)
ax.arrow(ubm.means[1, 0], ubm.means[1, 1], u1[0], u1[1],
fc="k", ec="k", head_width=0.05, head_length=0.1)
ax.arrow(ubm.means[2, 0], ubm.means[2, 1], u2[0], u2[1],
fc="k", ec="k", head_width=0.05, head_length=0.1)
plt.text(ubm.means[0, 0] + u0[0], ubm.means[0, 1] +
u0[1] - 0.1, r'$\mathbf{U}_1$', fontsize=15)
plt.text(ubm.means[1, 0] + u1[0], ubm.means[1, 1] +
u1[1] - 0.1, r'$\mathbf{U}_2$', fontsize=15)
plt.text(ubm.means[2, 0] + u2[0], ubm.means[2, 1] +
u2[1] - 0.1, r'$\mathbf{U}_3$', fontsize=15)
plt.xticks([], [])
plt.yticks([], [])
# plt.grid(True)
plt.xlabel('Sepal length')
plt.ylabel('Petal width')
plt.legend()
plt.tight_layout()
plt.show()
import bob.db.iris
import bob.learn.em
import bob.learn.linear
import matplotlib.pyplot as plt
import numpy
numpy.random.seed(2) # FIXING A SEED
def train_ubm(features, n_gaussians):
"""
Train UBM
**Parameters**
features: 2D numpy array with the features
n_gaussians: Number of Gaussians
"""
input_size = features.shape[1]
kmeans_machine = bob.learn.em.KMeansMachine(int(n_gaussians), input_size)
ubm = bob.learn.em.GMMMachine(int(n_gaussians), input_size)
# The K-means clustering is firstly used to used to estimate the initial
# means, the final variances and the final weights for each gaussian
# component
kmeans_trainer = bob.learn.em.KMeansTrainer('RANDOM_NO_DUPLICATE')
bob.learn.em.train(kmeans_trainer, kmeans_machine, features)
# Getting the means, weights and the variances for each cluster. This is a
# very good estimator for the ML
(variances, weights) = kmeans_machine.get_variances_and_weights_for_each_cluster(features)
means = kmeans_machine.means
# initialize the UBM with the output of kmeans
ubm.means = means
ubm.variances = variances
ubm.weights = weights
# Creating the ML Trainer. We will adapt only the means
trainer = bob.learn.em.ML_GMMTrainer(
update_means=True, update_variances=False, update_weights=False)
bob.learn.em.train(trainer, ubm, features)
return ubm
def jfa_train(features, ubm):
"""
Trains U and V matrix
**Parameters**
features: List of :py:class:`bob.learn.em.GMMStats` organized by class
n_gaussians: UBM (:py:class:`bob.learn.em.GMMMachine`)
"""
stats = []
for user in features:
user_stats = []
for f in user:
s = bob.learn.em.GMMStats(ubm.shape[0], ubm.shape[1])
ubm.acc_statistics(f, s)
user_stats.append(s)
stats.append(user_stats)
subspace_dimension_of_u = 1
subspace_dimension_of_v = 1
jfa_base = bob.learn.em.JFABase(
ubm, subspace_dimension_of_u, subspace_dimension_of_v)
trainer = bob.learn.em.JFATrainer()
# trainer.rng = bob.core.random.mt19937(int(self.init_seed))
bob.learn.em.train_jfa(trainer, jfa_base, stats, max_iterations=50)
return jfa_base
# GENERATING DATA
data_per_class = bob.db.iris.data()
setosa = numpy.column_stack(
(data_per_class['setosa'][:, 0], data_per_class['setosa'][:, 3]))
versicolor = numpy.column_stack(
(data_per_class['versicolor'][:, 0], data_per_class['versicolor'][:, 3]))
virginica = numpy.column_stack(
(data_per_class['virginica'][:, 0], data_per_class['virginica'][:, 3]))
data = numpy.vstack((setosa, versicolor, virginica))
# TRAINING THE PRIOR
ubm = train_ubm(data, 3)
jfa_base = jfa_train([setosa, versicolor, virginica], ubm)
# Variability direction U
u0 = jfa_base.u[0:2, 0] / numpy.linalg.norm(jfa_base.u[0:2, 0])
u1 = jfa_base.u[2:4, 0] / numpy.linalg.norm(jfa_base.u[2:4, 0])
u2 = jfa_base.u[4:6, 0] / numpy.linalg.norm(jfa_base.u[4:6, 0])
# Variability direction V
v0 = jfa_base.v[0:2, 0] / numpy.linalg.norm(jfa_base.v[0:2, 0])
v1 = jfa_base.v[2:4, 0] / numpy.linalg.norm(jfa_base.v[2:4, 0])
v2 = jfa_base.v[4:6, 0] / numpy.linalg.norm(jfa_base.v[4:6, 0])
figure, ax = plt.subplots()
plt.scatter(setosa[:, 0], setosa[:, 1], c="darkcyan", label="setosa")
plt.scatter(versicolor[:, 0], versicolor[:, 1],
c="goldenrod", label="versicolor")
plt.scatter(virginica[:, 0], virginica[:, 1], c="dimgrey", label="virginica")
plt.scatter(ubm.means[:, 0], ubm.means[:, 1], c="blue",
marker="x", label="centroids - mle")
# plt.scatter(ubm.means[:, 0], ubm.means[:, 1], c="blue",
# marker=".", label="within class varibility", s=0.01)
# U
ax.arrow(ubm.means[0, 0], ubm.means[0, 1], u0[0], u0[1],
fc="k", ec="k", head_width=0.05, head_length=0.1)
ax.arrow(ubm.means[1, 0], ubm.means[1, 1], u1[0], u1[1],
fc="k", ec="k", head_width=0.05, head_length=0.1)
ax.arrow(ubm.means[2, 0], ubm.means[2, 1], u2[0], u2[1],
fc="k", ec="k", head_width=0.05, head_length=0.1)
plt.text(ubm.means[0, 0] + u0[0], ubm.means[0, 1] +
u0[1] - 0.1, r'$\mathbf{U}_1$', fontsize=15)
plt.text(ubm.means[1, 0] + u1[0], ubm.means[1, 1] +
u1[1] - 0.1, r'$\mathbf{U}_2$', fontsize=15)
plt.text(ubm.means[2, 0] + u2[0], ubm.means[2, 1] +
u2[1] - 0.1, r'$\mathbf{U}_3$', fontsize=15)
# V
ax.arrow(ubm.means[0, 0], ubm.means[0, 1], v0[0], v0[1],
fc="k", ec="k", head_width=0.05, head_length=0.1)
ax.arrow(ubm.means[1, 0], ubm.means[1, 1], v1[0], v1[1],
fc="k", ec="k", head_width=0.05, head_length=0.1)
ax.arrow(ubm.means[2, 0], ubm.means[2, 1], v2[0], v2[1],
fc="k", ec="k", head_width=0.05, head_length=0.1)
plt.text(ubm.means[0, 0] + v0[0], ubm.means[0, 1] +
v0[1] - 0.1, r'$\mathbf{V}_1$', fontsize=15)
plt.text(ubm.means[1, 0] + v1[0], ubm.means[1, 1] +
v1[1] - 0.1, r'$\mathbf{V}_2$', fontsize=15)
plt.text(ubm.means[2, 0] + v2[0], ubm.means[2, 1] +
v2[1] - 0.1, r'$\mathbf{V}_3$', fontsize=15)
plt.xticks([], [])
plt.yticks([], [])
# plt.grid(True)
plt.xlabel('Sepal length')
plt.ylabel('Petal width')
plt.legend(loc=2)
plt.ylim([-1, 3.5])
plt.tight_layout()
# plt.show()
import matplotlib.pyplot as plt
import bob.db.iris
import bob.learn.em
import numpy
numpy.random.seed(10)
data_per_class = bob.db.iris.data()
setosa = numpy.column_stack(
(data_per_class['setosa'][:, 0], data_per_class['setosa'][:, 3]))
versicolor = numpy.column_stack(
(data_per_class['versicolor'][:, 0], data_per_class['versicolor'][:, 3]))
virginica = numpy.column_stack(
(data_per_class['virginica'][:, 0], data_per_class['virginica'][:, 3]))
data = numpy.vstack((setosa, versicolor, virginica))
# Two clusters with a feature dimensionality of 3
mle_machine = bob.learn.em.GMMMachine(3, 2)
mle_machine.means = numpy.array([[5, 3], [4, 2], [7, 3.]])
# Creating some random data centered in
map_machine = bob.learn.em.GMMMachine(3, 2)
map_trainer = bob.learn.em.MAP_GMMTrainer(mle_machine, relevance_factor=4)
bob.learn.em.train(map_trainer, map_machine, data, max_iterations=200,
convergence_threshold=1e-5) # Train the KMeansMachine
figure, ax = plt.subplots()
# plt.scatter(data[:, 0], data[:, 1], c="olivedrab", label="new data")
plt.scatter(setosa[:, 0], setosa[:, 1], c="darkcyan", label="setosa")
plt.scatter(versicolor[:, 0], versicolor[:, 1],
c="goldenrod", label="versicolor")
plt.scatter(virginica[:, 0], virginica[:, 1],
c="dimgrey", label="virginica")
plt.scatter(mle_machine.means[:, 0],
mle_machine.means[:, 1], c="blue", marker="x",
label="prior centroids - mle", s=60)
plt.scatter(map_machine.means[:, 0], map_machine.means[:, 1], c="red",
marker="^", label="adapted centroids - map", s=60)
plt.legend()
plt.xticks([], [])
plt.yticks([], [])
ax.set_xlabel("Sepal length")
ax.set_ylabel("Petal width")
plt.tight_layout()
plt.show()
import bob.learn.em
import bob.db.iris
import numpy
import matplotlib.pyplot as plt
data_per_class = bob.db.iris.data()
setosa = numpy.column_stack(
(data_per_class['setosa'][:, 0], data_per_class['setosa'][:, 3]))
versicolor = numpy.column_stack(
(data_per_class['versicolor'][:, 0], data_per_class['versicolor'][:, 3]))
virginica = numpy.column_stack(
(data_per_class['virginica'][:, 0], data_per_class['virginica'][:, 3]))
data = numpy.vstack((setosa, versicolor, virginica))
# Two clusters with a feature dimensionality of 3
machine = bob.learn.em.GMMMachine(3, 2)
trainer = bob.learn.em.ML_GMMTrainer(True, True, True)
machine.means = numpy.array([[5, 3], [4, 2], [7, 3.]])
bob.learn.em.train(trainer, machine, data, max_iterations=200,
convergence_threshold=1e-5) # Train the KMeansMachine
figure, ax = plt.subplots()
plt.scatter(setosa[:, 0], setosa[:, 1], c="darkcyan", label="setosa")
plt.scatter(versicolor[:, 0], versicolor[:, 1],
c="goldenrod", label="versicolor")
plt.scatter(virginica[:, 0], virginica[:, 1],
c="dimgrey", label="virginica")
plt.scatter(machine.means[:, 0],
machine.means[:, 1], c="blue", marker="x", label="centroids", s=60)
plt.legend()
plt.xticks([], [])
plt.yticks([], [])
ax.set_xlabel("Sepal length")
ax.set_ylabel("Petal width")
plt.tight_layout()
plt.show()
import matplotlib.pyplot as plt
import bob.learn.em
import numpy
numpy.random.seed(10)
n_clients = 10
n_scores_per_client = 200
# Defining some fake scores for genuines and impostors
impostor_scores = numpy.random.normal(-15.5,
5, (n_scores_per_client, n_clients))
genuine_scores = numpy.random.normal(0.5, 5, (n_scores_per_client, n_clients))
# Defining the scores for the statistics computation
t_scores = numpy.random.normal(-5., 5, (n_scores_per_client, n_clients))
# T - Normalizing
t_norm_impostors = bob.learn.em.tnorm(impostor_scores, t_scores)
t_norm_genuine = bob.learn.em.tnorm(genuine_scores, t_scores)
# PLOTTING
figure = plt.subplot(2, 1, 1)
ax = figure.axes
plt.title("Raw scores", fontsize=8)
plt.hist(impostor_scores.reshape(n_scores_per_client * n_clients),
label='Impostors', normed=True,
color='C1', alpha=0.5, bins=50)
plt.hist(genuine_scores.reshape(n_scores_per_client * n_clients),
label='Genuine', normed=True,
color='C0', alpha=0.5, bins=50)
plt.legend(fontsize=8)
plt.yticks([], [])
figure = plt.subplot(2, 1, 2)
ax = figure.axes
plt.title("T-norm scores", fontsize=8)
plt.hist(t_norm_impostors.reshape(n_scores_per_client * n_clients),
label='T-Norm Impostors', normed=True,
color='C1', alpha=0.5, bins=50)
plt.hist(t_norm_genuine.reshape(n_scores_per_client * n_clients),
label='T-Norm Genuine', normed=True,
color='C0', alpha=0.5, bins=50)
plt.legend(fontsize=8)
plt.yticks([], [])
plt.tight_layout()
plt.show()
import matplotlib.pyplot as plt
import bob.learn.em
import numpy
numpy.random.seed(10)
n_clients = 10
n_scores_per_client = 200
# Defining some fake scores for genuines and impostors
impostor_scores = numpy.random.normal(-15.5,
5, (n_scores_per_client, n_clients))
genuine_scores = numpy.random.normal(0.5, 5, (n_scores_per_client, n_clients))
# Defining the scores for the statistics computation
z_scores = numpy.random.normal(-5., 5, (n_scores_per_client, n_clients))
t_scores = numpy.random.normal(-6., 5, (n_scores_per_client, n_clients))
# T-normalizing the Z-scores
zt_scores = bob.learn.em.tnorm(z_scores, t_scores)
# ZT - Normalizing
zt_norm_impostors = bob.learn.em.ztnorm(
impostor_scores, z_scores, t_scores, zt_scores)
zt_norm_genuine = bob.learn.em.ztnorm(
genuine_scores, z_scores, t_scores, zt_scores)
# PLOTTING
figure = plt.subplot(2, 1, 1)
ax = figure.axes
plt.title("Raw scores", fontsize=8)
plt.hist(impostor_scores.reshape(n_scores_per_client * n_clients),
label='Impostors', normed=True,
color='C1', alpha=0.5, bins=50)
plt.hist(genuine_scores.reshape(n_scores_per_client * n_clients),
label='Genuine', normed=True,
color='C0', alpha=0.5, bins=50)
plt.legend(fontsize=8)
plt.yticks([], [])
figure = plt.subplot(2, 1, 2)
ax = figure.axes
plt.title("T-norm scores", fontsize=8)
plt.hist(zt_norm_impostors.reshape(n_scores_per_client * n_clients),
label='T-Norm Impostors', normed=True,
color='C1', alpha=0.5, bins=50)
plt.hist(zt_norm_genuine.reshape(n_scores_per_client * n_clients),
label='T-Norm Genuine', normed=True,
color='C0', alpha=0.5, bins=50)
plt.legend(fontsize=8)
plt.yticks([], [])
plt.tight_layout()
plt.show()
import matplotlib.pyplot as plt
import bob.learn.em
import numpy
numpy.random.seed(10)
n_clients = 10
n_scores_per_client = 200
# Defining some fake scores for genuines and impostors
impostor_scores = numpy.random.normal(-15.5,
5, (n_scores_per_client, n_clients))
genuine_scores = numpy.random.normal(0.5, 5, (n_scores_per_client, n_clients))
# Defining the scores for the statistics computation
z_scores = numpy.random.normal(-5., 5, (n_scores_per_client, n_clients))
# Z - Normalizing
z_norm_impostors = bob.learn.em.znorm(impostor_scores, z_scores)
z_norm_genuine = bob.learn.em.znorm(genuine_scores, z_scores)
# PLOTTING
figure = plt.subplot(2, 1, 1)
ax = figure.axes
plt.title("Raw scores", fontsize=8)
plt.hist(impostor_scores.reshape(n_scores_per_client * n_clients),
label='Impostors', normed=True,
color='C1', alpha=0.5, bins=50)
plt.hist(genuine_scores.reshape(n_scores_per_client * n_clients),
label='Genuine', normed=True,
color='C0', alpha=0.5, bins=50)
plt.legend(fontsize=8)
plt.yticks([], [])
figure = plt.subplot(2, 1, 2)
ax = figure.axes
plt.title("Z-norm scores", fontsize=8)
plt.hist(z_norm_impostors.reshape(n_scores_per_client * n_clients),
label='Z-Norm Impostors', normed=True,
color='C1', alpha=0.5, bins=50)
plt.hist(z_norm_genuine.reshape(n_scores_per_client * n_clients),
label='Z-Norm Genuine', normed=True,
color='C0', alpha=0.5, bins=50)
plt.yticks([], [])
plt.legend(fontsize=8)
plt.tight_layout()
plt.show()
import bob.db.iris
import bob.learn.em
import bob.learn.linear
import matplotlib.pyplot as plt
import numpy
numpy.random.seed(2) # FIXING A SEED
def train_ubm(features, n_gaussians):
"""
Train UBM
**Parameters**
features: 2D numpy array with the features
n_gaussians: Number of Gaussians
"""
input_size = features.shape[1]
kmeans_machine = bob.learn.em.KMeansMachine(int(n_gaussians), input_size)
ubm = bob.learn.em.GMMMachine(int(n_gaussians), input_size)
# The K-means clustering is firstly used to used to estimate the initial
# means, the final variances and the final weights for each gaussian
# component
kmeans_trainer = bob.learn.em.KMeansTrainer('RANDOM_NO_DUPLICATE')
bob.learn.em.train(kmeans_trainer, kmeans_machine, features)
# Getting the means, weights and the variances for each cluster. This is a
# very good estimator for the ML
(variances, weights) = kmeans_machine.get_variances_and_weights_for_each_cluster(features)
means = kmeans_machine.means
# initialize the UBM with the output of kmeans
ubm.means = means
ubm.variances = variances
ubm.weights = weights
# Creating the ML Trainer. We will adapt only the means
trainer = bob.learn.em.ML_GMMTrainer(
update_means=True, update_variances=False, update_weights=False)
bob.learn.em.train(trainer, ubm, features)
return ubm
def ivector_train(features, ubm):
"""
Trains T matrix
**Parameters**
features: List of :py:class:`bob.learn.em.GMMStats`
n_gaussians: UBM (:py:class:`bob.learn.em.GMMMachine`)
"""
stats = []
for user in features:
s = bob.learn.em.GMMStats(ubm.shape[0], ubm.shape[1])
for f in user:
ubm.acc_statistics(f, s)
stats.append(s)
subspace_dimension_of_t = 2
ivector_trainer = bob.learn.em.IVectorTrainer(update_sigma=True)
ivector_machine = bob.learn.em.IVectorMachine(
ubm, subspace_dimension_of_t, 10e-5)
# train IVector model
bob.learn.em.train(ivector_trainer, ivector_machine, stats, 500)
return ivector_machine
def acc_stats(data, gmm):
gmm_stats = []
for d in data:
s = bob.learn.em.GMMStats(gmm.shape[0], gmm.shape[1])
gmm.acc_statistics(d, s)
gmm_stats.append(s)
return gmm_stats
def compute_ivectors(gmm_stats, ivector_machine):
"""
Given :py:class:`bob.learn.em.GMMStats` and an T matrix, get the iVectors.
"""
ivectors = []
for g in gmm_stats:
ivectors.append(ivector_machine(g))
return numpy.array(ivectors)
# GENERATING DATA
data_per_class = bob.db.iris.data()
setosa = numpy.column_stack(
(data_per_class['setosa'][:, 0], data_per_class['setosa'][:, 3]))
versicolor = numpy.column_stack(
(data_per_class['versicolor'][:, 0], data_per_class['versicolor'][:, 3]))
virginica = numpy.column_stack(
(data_per_class['virginica'][:, 0], data_per_class['virginica'][:, 3]))
data = numpy.vstack((setosa, versicolor, virginica))
# TRAINING THE PRIOR
ubm = train_ubm(data, 3)
ivector_machine = ivector_train([setosa, versicolor, virginica], ubm)
# Variability direction U
# t0 = T[0:2, 0] / numpy.linalg.norm(T[0:2, 0])
# t1 = T[2:4, 0] / numpy.linalg.norm(T[2:4, 0])
# t2 = T[4:6, 0] / numpy.linalg.norm(T[4:6, 0])
# figure, ax = plt.subplots()
figure = plt.subplot(2, 1, 1)
ax = figure.axes
plt.title("Raw fetures")
plt.scatter(setosa[:, 0], setosa[:, 1], c="darkcyan", label="setosa")
plt.scatter(versicolor[:, 0], versicolor[:, 1],
c="goldenrod", label="versicolor")
plt.scatter(virginica[:, 0], virginica[:, 1], c="dimgrey", label="virginica")
# plt.grid(True)
# plt.xlabel('Sepal length')
plt.ylabel('Petal width')
plt.legend(loc=2)
plt.ylim([-1, 3.5])
plt.xticks([], [])
plt.yticks([], [])
figure = plt.subplot(2, 1, 2)
ax = figure.axes
ivector_setosa = compute_ivectors(acc_stats(setosa, ubm), ivector_machine)
ivector_versicolor = compute_ivectors(
acc_stats(versicolor, ubm), ivector_machine)
ivector_virginica = compute_ivectors(
acc_stats(virginica, ubm), ivector_machine)
# Whitening iVectors
whitening_trainer = bob.learn.linear.WhiteningTrainer()
whitener_machine = bob.learn.linear.Machine(
ivector_setosa.shape[1], ivector_setosa.shape[1])
whitening_trainer.train(numpy.vstack(
(ivector_setosa, ivector_versicolor, ivector_virginica)), whitener_machine)
ivector_setosa = whitener_machine(ivector_setosa)
ivector_versicolor = whitener_machine(ivector_versicolor)
ivector_virginica = whitener_machine(ivector_virginica)
# LDA ivectors
lda_trainer = bob.learn.linear.FisherLDATrainer()
lda_machine = bob.learn.linear.Machine(
ivector_setosa.shape[1], ivector_setosa.shape[1])
lda_trainer.train([ivector_setosa, ivector_versicolor,
ivector_virginica], lda_machine)
ivector_setosa = lda_machine(ivector_setosa)
ivector_versicolor = lda_machine(ivector_versicolor)
ivector_virginica = lda_machine(ivector_virginica)
# WCCN ivectors
# wccn_trainer = bob.learn.linear.WCCNTrainer()
# wccn_machine = bob.learn.linear.Machine(
# ivector_setosa.shape[1], ivector_setosa.shape[1])
# wccn_trainer.train([ivector_setosa, ivector_versicolor,
# ivector_virginica], wccn_machine)
# ivector_setosa = wccn_machine(ivector_setosa)
# ivector_versicolor = wccn_machine(ivector_versicolor)
# ivector_virginica = wccn_machine(ivector_virginica)
plt.title("First two ivectors")
plt.scatter(ivector_setosa[:, 0],
ivector_setosa[:, 1], c="darkcyan", label="setosa",
marker="x")
plt.scatter(ivector_versicolor[:, 0],
ivector_versicolor[:, 1], c="goldenrod", label="versicolor",
marker="x")
plt.scatter(ivector_virginica[:, 0],
ivector_virginica[:, 1], c="dimgrey", label="virginica",
marker="x")
plt.xticks([], [])
plt.yticks([], [])
# plt.grid(True)
# plt.xlabel('Sepal length')
# plt.ylabel('Petal width')
plt.legend(loc=2)
plt.ylim([-1, 3.5])
plt.tight_layout()
plt.show()
import bob.learn.em
import bob.db.iris
import numpy
import matplotlib.pyplot as plt
data_per_class = bob.db.iris.data()
setosa = numpy.column_stack(
(data_per_class['setosa'][:, 0], data_per_class['setosa'][:, 3]))
versicolor = numpy.column_stack(
(data_per_class['versicolor'][:, 0], data_per_class['versicolor'][:, 3]))
virginica = numpy.column_stack(
(data_per_class['virginica'][:, 0], data_per_class['virginica'][:, 3]))
data = numpy.vstack((setosa, versicolor, virginica))
# Training KMeans
# Two clusters with a feature dimensionality of 3
machine = bob.learn.em.KMeansMachine(3, 2)
trainer = bob.learn.em.KMeansTrainer()
bob.learn.em.train(trainer, machine, data, max_iterations=200,
convergence_threshold=1e-5) # Train the KMeansMachine
# Plotting
figure, ax = plt.subplots()
plt.scatter(setosa[:, 0],
setosa[:, 1], c="darkcyan", label="setosa")
plt.scatter(versicolor[:, 0],
versicolor[:, 1], c="goldenrod", label="versicolor")
plt.scatter(virginica[:, 0],
virginica[:, 1], c="dimgrey", label="virginica")
plt.scatter(machine.means[:, 0],
machine.means[:, 1], c="blue", marker="x", label="centroids",
s=60)
plt.legend()
plt.xticks([], [])
plt.yticks([], [])
ax.set_xlabel("Sepal length")
ax.set_ylabel("Petal width")
plt.tight_layout()
...@@ -17,21 +17,21 @@ Trainers ...@@ -17,21 +17,21 @@ Trainers
........ ........
.. autosummary:: .. autosummary::
bob.learn.em.KMeansTrainer bob.learn.em.KMeansTrainer
bob.learn.em.ML_GMMTrainer bob.learn.em.ML_GMMTrainer
bob.learn.em.MAP_GMMTrainer bob.learn.em.MAP_GMMTrainer
bob.learn.em.ISVTrainer bob.learn.em.ISVTrainer
bob.learn.em.JFATrainer bob.learn.em.JFATrainer
bob.learn.em.IVectorTrainer bob.learn.em.IVectorTrainer
bob.learn.em.PLDATrainer bob.learn.em.PLDATrainer
bob.learn.em.EMPCATrainer bob.learn.em.EMPCATrainer
Machines Machines
........ ........
.. autosummary:: .. autosummary::
bob.learn.em.KMeansMachine bob.learn.em.KMeansMachine
bob.learn.em.Gaussian bob.learn.em.Gaussian
bob.learn.em.GMMStats bob.learn.em.GMMStats
...@@ -43,7 +43,7 @@ Machines ...@@ -43,7 +43,7 @@ Machines
bob.learn.em.IVectorMachine bob.learn.em.IVectorMachine
bob.learn.em.PLDABase bob.learn.em.PLDABase
bob.learn.em.PLDAMachine bob.learn.em.PLDAMachine
Functions Functions
--------- ---------
.. autosummary:: .. autosummary::
......