Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • bob/bob.learn.em
1 result
Show changes
Showing
with 1620 additions and 1036 deletions
......@@ -17,7 +17,7 @@ static inline bool f(PyObject* o){return o != 0 && PyObject_IsTrue(o) > 0;} /*
static auto ML_GMMTrainer_doc = bob::extension::ClassDoc(
BOB_EXT_MODULE_PREFIX ".ML_GMMTrainer",
"This class implements the maximum likelihood M-step of the expectation-maximisation algorithm for a GMM Machine."
"This class implements the maximum likelihood M-step (:ref:`MLE <mle>`) of the expectation-maximisation algorithm for a GMM Machine."
).add_constructor(
bob::extension::FunctionDoc(
"__init__",
......
......@@ -15,159 +15,179 @@ from bob.io.base.test_utils import datafile
from bob.learn.em import KMeansMachine, KMeansTrainer
def equals(x, y, epsilon):
return (abs(x - y) < epsilon).all()
return (abs(x - y) < epsilon).all()
def kmeans_plus_plus(machine, data, seed):
"""Python implementation of K-Means++ (initialization)"""
n_data = data.shape[0]
rng = bob.core.random.mt19937(seed)
u = bob.core.random.uniform('int32', 0, n_data-1)
index = u(rng)
machine.set_mean(0, data[index,:])
weights = numpy.zeros(shape=(n_data,), dtype=numpy.float64)
for m in range(1,machine.dim_c):
for s in range(n_data):
s_cur = data[s,:]
w_cur = machine.get_distance_from_mean(s_cur, 0)
for i in range(m):
w_cur = min(machine.get_distance_from_mean(s_cur, i), w_cur)
weights[s] = w_cur
weights *= weights
weights /= numpy.sum(weights)
d = bob.core.random.discrete('int32', weights)
index = d(rng)
machine.set_mean(m, data[index,:])
"""Python implementation of K-Means++ (initialization)"""
n_data = data.shape[0]
rng = bob.core.random.mt19937(seed)
u = bob.core.random.uniform('int32', 0, n_data - 1)
index = u(rng)
machine.set_mean(0, data[index, :])
weights = numpy.zeros(shape=(n_data,), dtype=numpy.float64)
for m in range(1, machine.dim_c):
for s in range(n_data):
s_cur = data[s, :]
w_cur = machine.get_distance_from_mean(s_cur, 0)
for i in range(m):
w_cur = min(machine.get_distance_from_mean(s_cur, i), w_cur)
weights[s] = w_cur
weights *= weights
weights /= numpy.sum(weights)
d = bob.core.random.discrete('int32', weights)
index = d(rng)
machine.set_mean(m, data[index, :])
def NormalizeStdArray(path):
array = bob.io.base.load(path).astype('float64')
std = array.std(axis=0)
return (array/std, std)
array = bob.io.base.load(path).astype('float64')
std = array.std(axis=0)
return (array / std, std)
def multiplyVectorsByFactors(matrix, vector):
for i in range(0, matrix.shape[0]):
for j in range(0, matrix.shape[1]):
matrix[i, j] *= vector[j]
for i in range(0, matrix.shape[0]):
for j in range(0, matrix.shape[1]):
matrix[i, j] *= vector[j]
def flipRows(array):
if len(array.shape) == 2:
return numpy.array([numpy.array(array[1, :]), numpy.array(array[0, :])], 'float64')
elif len(array.shape) == 1:
return numpy.array([array[1], array[0]], 'float64')
else:
raise Exception('Input type not supportd by flipRows')
if len(array.shape) == 2:
return numpy.array([numpy.array(array[1, :]), numpy.array(array[0, :])], 'float64')
elif len(array.shape) == 1:
return numpy.array([array[1], array[0]], 'float64')
else:
raise Exception('Input type not supportd by flipRows')
if hasattr(KMeansTrainer, 'KMEANS_PLUS_PLUS'):
def test_kmeans_plus_plus():
def test_kmeans_plus_plus():
# Tests the K-Means++ initialization
dim_c = 5
dim_d = 7
n_samples = 150
data = numpy.random.randn(n_samples, dim_d)
seed = 0
# C++ implementation
machine = KMeansMachine(dim_c, dim_d)
trainer = KMeansTrainer()
trainer.rng = bob.core.random.mt19937(seed)
trainer.initialization_method = 'KMEANS_PLUS_PLUS'
trainer.initialize(machine, data)
# Python implementation
py_machine = KMeansMachine(dim_c, dim_d)
kmeans_plus_plus(py_machine, data, seed)
assert equals(machine.means, py_machine.means, 1e-8)
# Tests the K-Means++ initialization
dim_c = 5
dim_d = 7
n_samples = 150
data = numpy.random.randn(n_samples,dim_d)
seed = 0
# C++ implementation
def test_kmeans_noduplicate():
# Data/dimensions
dim_c = 2
dim_d = 3
seed = 0
data = numpy.array([[1, 2, 3], [1, 2, 3], [1, 2, 3], [4, 5, 6.]])
# Defines machine and trainer
machine = KMeansMachine(dim_c, dim_d)
trainer = KMeansTrainer()
trainer.rng = bob.core.random.mt19937(seed)
trainer.initialization_method = 'KMEANS_PLUS_PLUS'
trainer.initialize(machine, data)
# Python implementation
py_machine = KMeansMachine(dim_c, dim_d)
kmeans_plus_plus(py_machine, data, seed)
assert equals(machine.means, py_machine.means, 1e-8)
def test_kmeans_noduplicate():
# Data/dimensions
dim_c = 2
dim_d = 3
seed = 0
data = numpy.array([[1,2,3],[1,2,3],[1,2,3],[4,5,6.]])
# Defines machine and trainer
machine = KMeansMachine(dim_c, dim_d)
trainer = KMeansTrainer()
rng = bob.core.random.mt19937(seed)
trainer.initialization_method = 'RANDOM_NO_DUPLICATE'
trainer.initialize(machine, data, rng)
# Makes sure that the two initial mean vectors selected are different
assert equals(machine.get_mean(0), machine.get_mean(1), 1e-8) == False
rng = bob.core.random.mt19937(seed)
trainer.initialization_method = 'RANDOM_NO_DUPLICATE'
trainer.initialize(machine, data, rng)
# Makes sure that the two initial mean vectors selected are different
assert equals(machine.get_mean(0), machine.get_mean(1), 1e-8) == False
def test_kmeans_a():
# Trains a KMeansMachine
# This files contains draws from two 1D Gaussian distributions:
# * 100 samples from N(-10,1)
# * 100 samples from N(10,1)
data = bob.io.base.load(datafile("samplesFrom2G_f64.hdf5", __name__, path="../data/"))
# Trains a KMeansMachine
# This files contains draws from two 1D Gaussian distributions:
# * 100 samples from N(-10,1)
# * 100 samples from N(10,1)
data = bob.io.base.load(datafile("samplesFrom2G_f64.hdf5", __name__, path="../data/"))
machine = KMeansMachine(2, 1)
trainer = KMeansTrainer()
#trainer.train(machine, data)
bob.learn.em.train(trainer,machine,data)
machine = KMeansMachine(2, 1)
[variances, weights] = machine.get_variances_and_weights_for_each_cluster(data)
variances_b = numpy.ndarray(shape=(2,1), dtype=numpy.float64)
weights_b = numpy.ndarray(shape=(2,), dtype=numpy.float64)
machine.__get_variances_and_weights_for_each_cluster_init__(variances_b, weights_b)
machine.__get_variances_and_weights_for_each_cluster_acc__(data, variances_b, weights_b)
machine.__get_variances_and_weights_for_each_cluster_fin__(variances_b, weights_b)
m1 = machine.get_mean(0)
m2 = machine.get_mean(1)
trainer = KMeansTrainer()
# trainer.train(machine, data)
bob.learn.em.train(trainer, machine, data)
[variances, weights] = machine.get_variances_and_weights_for_each_cluster(data)
variances_b = numpy.ndarray(shape=(2, 1), dtype=numpy.float64)
weights_b = numpy.ndarray(shape=(2,), dtype=numpy.float64)
machine.__get_variances_and_weights_for_each_cluster_init__(variances_b, weights_b)
machine.__get_variances_and_weights_for_each_cluster_acc__(data, variances_b, weights_b)
machine.__get_variances_and_weights_for_each_cluster_fin__(variances_b, weights_b)
m1 = machine.get_mean(0)
m2 = machine.get_mean(1)
## Check means [-10,10] / variances [1,1] / weights [0.5,0.5]
if (m1 < m2):
means = numpy.array(([m1[0], m2[0]]), 'float64')
else:
means = numpy.array(([m2[0], m1[0]]), 'float64')
assert equals(means, numpy.array([-10., 10.]), 2e-1)
assert equals(variances, numpy.array([1., 1.]), 2e-1)
assert equals(weights, numpy.array([0.5, 0.5]), 1e-3)
assert equals(variances, variances_b, 1e-8)
assert equals(weights, weights_b, 1e-8)
## Check means [-10,10] / variances [1,1] / weights [0.5,0.5]
if(m1<m2): means=numpy.array(([m1[0],m2[0]]), 'float64')
else: means=numpy.array(([m2[0],m1[0]]), 'float64')
assert equals(means, numpy.array([-10.,10.]), 2e-1)
assert equals(variances, numpy.array([1.,1.]), 2e-1)
assert equals(weights, numpy.array([0.5,0.5]), 1e-3)
assert equals(variances, variances_b, 1e-8)
assert equals(weights, weights_b, 1e-8)
def test_kmeans_b():
# Trains a KMeansMachine
(arStd, std) = NormalizeStdArray(datafile("faithful.torch3.hdf5", __name__, path="../data/"))
machine = KMeansMachine(2, 2)
trainer = KMeansTrainer()
# trainer.seed = 1337
bob.learn.em.train(trainer, machine, arStd, convergence_threshold=0.001)
def test_kmeans_b():
[variances, weights] = machine.get_variances_and_weights_for_each_cluster(arStd)
# Trains a KMeansMachine
(arStd,std) = NormalizeStdArray(datafile("faithful.torch3.hdf5", __name__, path="../data/"))
means = numpy.array(machine.means)
variances = numpy.array(variances)
machine = KMeansMachine(2, 2)
multiplyVectorsByFactors(means, std)
multiplyVectorsByFactors(variances, std ** 2)
trainer = KMeansTrainer()
#trainer.seed = 1337
bob.learn.em.train(trainer,machine, arStd, convergence_threshold=0.001)
gmmWeights = bob.io.base.load(datafile('gmm.init_weights.hdf5', __name__, path="../data/"))
gmmMeans = bob.io.base.load(datafile('gmm.init_means.hdf5', __name__, path="../data/"))
gmmVariances = bob.io.base.load(datafile('gmm.init_variances.hdf5', __name__, path="../data/"))
[variances, weights] = machine.get_variances_and_weights_for_each_cluster(arStd)
if (means[0, 0] < means[1, 0]):
means = flipRows(means)
variances = flipRows(variances)
weights = flipRows(weights)
means = numpy.array(machine.means)
variances = numpy.array(variances)
assert equals(means, gmmMeans, 1e-3)
assert equals(weights, gmmWeights, 1e-3)
assert equals(variances, gmmVariances, 1e-3)
multiplyVectorsByFactors(means, std)
multiplyVectorsByFactors(variances, std ** 2)
# Check that there is no duplicate means during initialization
machine = KMeansMachine(2, 1)
trainer = KMeansTrainer()
trainer.initialization_method = 'RANDOM_NO_DUPLICATE'
data = numpy.array([[1.], [1.], [1.], [1.], [1.], [1.], [2.], [3.]])
bob.learn.em.train(trainer, machine, data)
assert (numpy.isnan(machine.means).any()) == False
gmmWeights = bob.io.base.load(datafile('gmm.init_weights.hdf5', __name__, path="../data/"))
gmmMeans = bob.io.base.load(datafile('gmm.init_means.hdf5', __name__, path="../data/"))
gmmVariances = bob.io.base.load(datafile('gmm.init_variances.hdf5', __name__, path="../data/"))
if (means[0, 0] < means[1, 0]):
means = flipRows(means)
variances = flipRows(variances)
weights = flipRows(weights)
def test_trainer_execption():
from nose.tools import assert_raises
assert equals(means, gmmMeans, 1e-3)
assert equals(weights, gmmWeights, 1e-3)
assert equals(variances, gmmVariances, 1e-3)
# Testing Inf
machine = KMeansMachine(2, 2)
data = numpy.array([[1.0, 2.0], [2, 3.], [1, 1.], [2, 5.], [numpy.inf, 1.0]])
trainer = KMeansTrainer()
assert_raises(ValueError, bob.learn.em.train, trainer, machine, data, 10)
# Check that there is no duplicate means during initialization
machine = KMeansMachine(2, 1)
trainer = KMeansTrainer()
trainer.initialization_method = 'RANDOM_NO_DUPLICATE'
data = numpy.array([[1.], [1.], [1.], [1.], [1.], [1.], [2.], [3.]])
bob.learn.em.train(trainer, machine, data)
assert (numpy.isnan(machine.means).any()) == False
# Testing Nan
machine = KMeansMachine(2, 2)
data = numpy.array([[1.0, 2.0], [2, 3.], [1, numpy.nan], [2, 5.], [2.0, 1.0]])
trainer = KMeansTrainer()
assert_raises(ValueError, bob.learn.em.train, trainer, machine, data, 10)
......@@ -7,112 +7,125 @@
import numpy
import bob.learn.em
import logging
logger = logging.getLogger('bob.learn.em')
def train(trainer, machine, data, max_iterations = 50, convergence_threshold=None, initialize=True, rng=None):
"""
Trains a machine given a trainer and the proper data
**Parameters**:
trainer : one of :py:class:`KMeansTrainer`, :py:class:`MAP_GMMTrainer`, :py:class:`ML_GMMTrainer`, :py:class:`ISVTrainer`, :py:class:`IVectorTrainer`, :py:class:`PLDATrainer`, :py:class:`EMPCATrainer`
A trainer mechanism
machine : one of :py:class:`KMeansMachine`, :py:class:`GMMMachine`, :py:class:`ISVBase`, :py:class:`IVectorMachine`, :py:class:`PLDAMachine`, :py:class:`bob.learn.linear.Machine`
A container machine
data : array_like <float, 2D>
The data to be trained
max_iterations : int
The maximum number of iterations to train a machine
convergence_threshold : float
The convergence threshold to train a machine. If None, the training procedure will stop with the iterations criteria
initialize : bool
If True, runs the initialization procedure
rng : :py:class:`bob.core.random.mt19937`
The Mersenne Twister mt19937 random generator used for the initialization of subspaces/arrays before the EM loop
"""
#Initialization
if initialize:
if rng is not None:
trainer.initialize(machine, data, rng)
else:
trainer.initialize(machine, data)
trainer.e_step(machine, data)
average_output = 0
average_output_previous = 0
if hasattr(trainer,"compute_likelihood"):
average_output = trainer.compute_likelihood(machine)
for i in range(max_iterations):
logger.info("Iteration = %d/%d", i, max_iterations)
average_output_previous = average_output
trainer.m_step(machine, data)
def train(trainer, machine, data, max_iterations=50, convergence_threshold=None, initialize=True, rng=None,
check_inputs=True):
"""
Trains a machine given a trainer and the proper data
**Parameters**:
trainer : one of :py:class:`KMeansTrainer`, :py:class:`MAP_GMMTrainer`, :py:class:`ML_GMMTrainer`, :py:class:`ISVTrainer`, :py:class:`IVectorTrainer`, :py:class:`PLDATrainer`, :py:class:`EMPCATrainer`
A trainer mechanism
machine : one of :py:class:`KMeansMachine`, :py:class:`GMMMachine`, :py:class:`ISVBase`, :py:class:`IVectorMachine`, :py:class:`PLDAMachine`, :py:class:`bob.learn.linear.Machine`
A container machine
data : array_like <float, 2D>
The data to be trained
max_iterations : int
The maximum number of iterations to train a machine
convergence_threshold : float
The convergence threshold to train a machine. If None, the training procedure will stop with the iterations criteria
initialize : bool
If True, runs the initialization procedure
rng : :py:class:`bob.core.random.mt19937`
The Mersenne Twister mt19937 random generator used for the initialization of subspaces/arrays before the EM loop
check_inputs:
Shallow checks in the inputs. Check for inf and NaN
"""
if check_inputs and type(data) is numpy.ndarray:
if numpy.isinf(numpy.sum(data)):
raise ValueError("Please, check your inputs; numpy.inf detected in `data` ")
if numpy.isnan(numpy.sum(data)):
raise ValueError("Please, check your inputs; numpy.nan detected in `data` ")
# Initialization
if initialize:
if rng is not None:
trainer.initialize(machine, data, rng)
else:
trainer.initialize(machine, data)
trainer.e_step(machine, data)
if hasattr(trainer,"compute_likelihood"):
average_output = trainer.compute_likelihood(machine)
if type(machine) is bob.learn.em.KMeansMachine:
logger.info("average euclidean distance = %f", average_output)
else:
logger.info("log likelihood = %f", average_output)
convergence_value = abs((average_output_previous - average_output)/average_output_previous)
logger.info("convergence value = %f",convergence_value)
#Terminates if converged (and likelihood computation is set)
if convergence_threshold!=None and convergence_value <= convergence_threshold:
break
if hasattr(trainer,"finalize"):
trainer.finalize(machine, data)
average_output = 0
average_output_previous = 0
if hasattr(trainer, "compute_likelihood"):
average_output = trainer.compute_likelihood(machine)
for i in range(max_iterations):
logger.info("Iteration = %d/%d", i, max_iterations)
average_output_previous = average_output
trainer.m_step(machine, data)
trainer.e_step(machine, data)
if hasattr(trainer, "compute_likelihood"):
average_output = trainer.compute_likelihood(machine)
if type(machine) is bob.learn.em.KMeansMachine:
logger.info("average euclidean distance = %f", average_output)
else:
logger.info("log likelihood = %f", average_output)
convergence_value = abs((average_output_previous - average_output) / average_output_previous)
logger.info("convergence value = %f", convergence_value)
# Terminates if converged (and likelihood computation is set)
if convergence_threshold != None and convergence_value <= convergence_threshold:
break
if hasattr(trainer, "finalize"):
trainer.finalize(machine, data)
def train_jfa(trainer, jfa_base, data, max_iterations=10, initialize=True, rng=None):
"""
Trains a :py:class:`bob.learn.em.JFABase` given a :py:class:`bob.learn.em.JFATrainer` and the proper data
**Parameters**:
trainer : :py:class:`bob.learn.em.JFATrainer`
A JFA trainer mechanism
jfa_base : :py:class:`bob.learn.em.JFABase`
A container machine
data : [[:py:class:`bob.learn.em.GMMStats`]]
The data to be trained
max_iterations : int
The maximum number of iterations to train a machine
initialize : bool
If True, runs the initialization procedure
rng : :py:class:`bob.core.random.mt19937`
The Mersenne Twister mt19937 random generator used for the initialization of subspaces/arrays before the EM loops
"""
if initialize:
if rng is not None:
trainer.initialize(jfa_base, data, rng)
else:
trainer.initialize(jfa_base, data)
#V Subspace
logger.info("V subspace estimation...")
for i in range(max_iterations):
logger.info("Iteration = %d/%d", i, max_iterations)
trainer.e_step_v(jfa_base, data)
trainer.m_step_v(jfa_base, data)
trainer.finalize_v(jfa_base, data)
#U subspace
logger.info("U subspace estimation...")
for i in range(max_iterations):
logger.info("Iteration = %d/%d", i, max_iterations)
trainer.e_step_u(jfa_base, data)
trainer.m_step_u(jfa_base, data)
trainer.finalize_u(jfa_base, data)
# D subspace
logger.info("D subspace estimation...")
for i in range(max_iterations):
logger.info("Iteration = %d/%d", i, max_iterations)
trainer.e_step_d(jfa_base, data)
trainer.m_step_d(jfa_base, data)
trainer.finalize_d(jfa_base, data)
"""
Trains a :py:class:`bob.learn.em.JFABase` given a :py:class:`bob.learn.em.JFATrainer` and the proper data
**Parameters**:
trainer : :py:class:`bob.learn.em.JFATrainer`
A JFA trainer mechanism
jfa_base : :py:class:`bob.learn.em.JFABase`
A container machine
data : [[:py:class:`bob.learn.em.GMMStats`]]
The data to be trained
max_iterations : int
The maximum number of iterations to train a machine
initialize : bool
If True, runs the initialization procedure
rng : :py:class:`bob.core.random.mt19937`
The Mersenne Twister mt19937 random generator used for the initialization of subspaces/arrays before the EM loops
"""
if initialize:
if rng is not None:
trainer.initialize(jfa_base, data, rng)
else:
trainer.initialize(jfa_base, data)
# V Subspace
logger.info("V subspace estimation...")
for i in range(max_iterations):
logger.info("Iteration = %d/%d", i, max_iterations)
trainer.e_step_v(jfa_base, data)
trainer.m_step_v(jfa_base, data)
trainer.finalize_v(jfa_base, data)
# U subspace
logger.info("U subspace estimation...")
for i in range(max_iterations):
logger.info("Iteration = %d/%d", i, max_iterations)
trainer.e_step_u(jfa_base, data)
trainer.m_step_u(jfa_base, data)
trainer.finalize_u(jfa_base, data)
# D subspace
logger.info("D subspace estimation...")
for i in range(max_iterations):
logger.info("Iteration = %d/%d", i, max_iterations)
trainer.e_step_d(jfa_base, data)
trainer.m_step_d(jfa_base, data)
trainer.finalize_d(jfa_base, data)
......@@ -12,7 +12,7 @@
/*** zt_norm ***/
bob::extension::FunctionDoc zt_norm = bob::extension::FunctionDoc(
"ztnorm",
"Normalise raw scores with ZT-Norm."
"Normalise raw scores with :ref:`ZT-Norm <ztnorm>`."
"Assume that znorm and tnorm have no common subject id.",
0,
true
......@@ -72,7 +72,7 @@ PyObject* PyBobLearnEM_ztNorm(PyObject*, PyObject* args, PyObject* kwargs) {
/*** t_norm ***/
bob::extension::FunctionDoc t_norm = bob::extension::FunctionDoc(
"tnorm",
"Normalise raw scores with T-Norm",
"Normalise raw scores with :ref:`T-Norm <tnorm>`",
0,
true
)
......@@ -109,7 +109,7 @@ PyObject* PyBobLearnEM_tNorm(PyObject*, PyObject* args, PyObject* kwargs) {
/*** z_norm ***/
bob::extension::FunctionDoc z_norm = bob::extension::FunctionDoc(
"znorm",
"Normalise raw scores with Z-Norm",
"Normalise raw scores with :ref:`Z-Norm <znorm>`",
0,
true
)
......
##############################################################################
#
# Copyright (c) 2006 Zope Foundation and Contributors.
# All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.1 (ZPL). A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE.
#
##############################################################################
"""Bootstrap a buildout-based project
Simply run this script in a directory containing a buildout.cfg.
The script accepts buildout command-line options, so you can
use the -c option to specify an alternate configuration file.
"""
import os
import shutil
import sys
import tempfile
from optparse import OptionParser
__version__ = '2015-07-01'
# See zc.buildout's changelog if this version is up to date.
tmpeggs = tempfile.mkdtemp(prefix='bootstrap-')
usage = '''\
[DESIRED PYTHON FOR BUILDOUT] bootstrap.py [options]
Bootstraps a buildout-based project.
Simply run this script in a directory containing a buildout.cfg, using the
Python that you want bin/buildout to use.
Note that by using --find-links to point to local resources, you can keep
this script from going over the network.
'''
parser = OptionParser(usage=usage)
parser.add_option("--version",
action="store_true", default=False,
help=("Return bootstrap.py version."))
parser.add_option("-t", "--accept-buildout-test-releases",
dest='accept_buildout_test_releases',
action="store_true", default=False,
help=("Normally, if you do not specify a --version, the "
"bootstrap script and buildout gets the newest "
"*final* versions of zc.buildout and its recipes and "
"extensions for you. If you use this flag, "
"bootstrap and buildout will get the newest releases "
"even if they are alphas or betas."))
parser.add_option("-c", "--config-file",
help=("Specify the path to the buildout configuration "
"file to be used."))
parser.add_option("-f", "--find-links",
help=("Specify a URL to search for buildout releases"))
parser.add_option("--allow-site-packages",
action="store_true", default=False,
help=("Let bootstrap.py use existing site packages"))
parser.add_option("--buildout-version",
help="Use a specific zc.buildout version")
parser.add_option("--setuptools-version",
help="Use a specific setuptools version")
parser.add_option("--setuptools-to-dir",
help=("Allow for re-use of existing directory of "
"setuptools versions"))
options, args = parser.parse_args()
if options.version:
print("bootstrap.py version %s" % __version__)
sys.exit(0)
######################################################################
# load/install setuptools
try:
from urllib.request import urlopen
except ImportError:
from urllib2 import urlopen
ez = {}
if os.path.exists('ez_setup.py'):
exec(open('ez_setup.py').read(), ez)
else:
exec(urlopen('https://bootstrap.pypa.io/ez_setup.py').read(), ez)
if not options.allow_site_packages:
# ez_setup imports site, which adds site packages
# this will remove them from the path to ensure that incompatible versions
# of setuptools are not in the path
import site
# inside a virtualenv, there is no 'getsitepackages'.
# We can't remove these reliably
if hasattr(site, 'getsitepackages'):
for sitepackage_path in site.getsitepackages():
# Strip all site-packages directories from sys.path that
# are not sys.prefix; this is because on Windows
# sys.prefix is a site-package directory.
if sitepackage_path != sys.prefix:
sys.path[:] = [x for x in sys.path
if sitepackage_path not in x]
setup_args = dict(to_dir=tmpeggs, download_delay=0)
if options.setuptools_version is not None:
setup_args['version'] = options.setuptools_version
if options.setuptools_to_dir is not None:
setup_args['to_dir'] = options.setuptools_to_dir
ez['use_setuptools'](**setup_args)
import setuptools
import pkg_resources
# This does not (always?) update the default working set. We will
# do it.
for path in sys.path:
if path not in pkg_resources.working_set.entries:
pkg_resources.working_set.add_entry(path)
######################################################################
# Install buildout
ws = pkg_resources.working_set
setuptools_path = ws.find(
pkg_resources.Requirement.parse('setuptools')).location
# Fix sys.path here as easy_install.pth added before PYTHONPATH
cmd = [sys.executable, '-c',
'import sys; sys.path[0:0] = [%r]; ' % setuptools_path +
'from setuptools.command.easy_install import main; main()',
'-mZqNxd', tmpeggs]
find_links = os.environ.get(
'bootstrap-testing-find-links',
options.find_links or
('http://downloads.buildout.org/'
if options.accept_buildout_test_releases else None)
)
if find_links:
cmd.extend(['-f', find_links])
requirement = 'zc.buildout'
version = options.buildout_version
if version is None and not options.accept_buildout_test_releases:
# Figure out the most recent final version of zc.buildout.
import setuptools.package_index
_final_parts = '*final-', '*final'
def _final_version(parsed_version):
try:
return not parsed_version.is_prerelease
except AttributeError:
# Older setuptools
for part in parsed_version:
if (part[:1] == '*') and (part not in _final_parts):
return False
return True
index = setuptools.package_index.PackageIndex(
search_path=[setuptools_path])
if find_links:
index.add_find_links((find_links,))
req = pkg_resources.Requirement.parse(requirement)
if index.obtain(req) is not None:
best = []
bestv = None
for dist in index[req.project_name]:
distv = dist.parsed_version
if _final_version(distv):
if bestv is None or distv > bestv:
best = [dist]
bestv = distv
elif distv == bestv:
best.append(dist)
if best:
best.sort()
version = best[-1].version
if version:
requirement = '=='.join((requirement, version))
cmd.append(requirement)
import subprocess
if subprocess.call(cmd) != 0:
raise Exception(
"Failed to execute command:\n%s" % repr(cmd)[1:-1])
######################################################################
# Import and run buildout
ws.add_entry(tmpeggs)
ws.require(requirement)
import zc.buildout.buildout
if not [a for a in args if '=' not in a]:
args.append('bootstrap')
# if -c was provided, we push it back into args for buildout' main function
if options.config_file is not None:
args[0:0] = ['-c', options.config_file]
zc.buildout.buildout.main(args)
shutil.rmtree(tmpeggs)
......@@ -25,6 +25,7 @@ extensions = [
'sphinx.ext.intersphinx',
'sphinx.ext.napoleon',
'sphinx.ext.viewcode',
'matplotlib.sphinxext.plot_directive'
]
import sphinx
......@@ -231,7 +232,6 @@ autodoc_member_order = 'bysource'
autodoc_default_flags = [
'members',
'undoc-members',
'inherited-members',
'show-inheritance',
]
......
This diff is collapsed.
.. vim: set fileencoding=utf-8 :
.. Tiago de Freitas Pereira <tiago.pereira@idiap.ch>
.. Tue 17 Feb 2015 13:50:06 CET
..
.. Copyright (C) 2011-2014 Idiap Research Institute, Martigny, Switzerland
.. _bob.learn.em:
......@@ -10,12 +6,12 @@
Expectation Maximization Machine Learning Tools
================================================
The EM algorithm is an iterative method that estimates parameters for statistical models, where the model depends on unobserved latent variables. The EM iteration alternates between performing an expectation (E) step, which creates a function for the expectation of the log-likelihood evaluated using the current estimate for the parameters, and a maximization (M) step, which computes parameters maximizing the expected log-likelihood found on the E step. These parameter-estimates are then used to determine the distribution of the latent variables in the next E step [WikiEM]_.
This package is a part of Bob_. It implements a general EM algorithm and
includes implementations of the following algorithms:
The package includes the machine definition per se and a selection of different trainers for specialized purposes:
- K-Means
- Maximum Likelihood (ML)
- Maximum a Posteriori (MAP)
- K-Means
- Inter Session Variability Modelling (ISV)
- Joint Factor Analysis (JFA)
- Total Variability Modeling (iVectors)
......@@ -31,7 +27,7 @@ Documentation
guide
py_api
References
-----------
......@@ -47,7 +43,9 @@ References
.. [Roweis1998] Roweis, Sam. "EM algorithms for PCA and SPCA." Advances in neural information processing systems (1998): 626-632.
.. [WikiEM] `Expectation Maximization <http://en.wikipedia.org/wiki/Expectation%E2%80%93maximization_algorithm>`_
.. [Glembek2009] Glembek, Ondrej, et al. "Comparison of scoring methods used in speaker recognition with joint factor analysis." Acoustics, Speech and Signal Processing, 2009. ICASSP 2009. IEEE International Conference on. IEEE, 2009.
.. [Auckenthaler2000] Auckenthaler, Roland, Michael Carey, and Harvey Lloyd-Thomas. "Score normalization for text-independent speaker verification systems." Digital Signal Processing 10.1 (2000): 42-54.
.. [Mariethoz2005] Mariethoz, Johnny, and Samy Bengio. "A unified framework for score normalization techniques applied to text-independent speaker verification." IEEE signal processing letters 12.7 (2005): 532-535.
Indices and tables
......
......@@ -12,6 +12,7 @@
.. _blitz++: http://www.oonumerics.org/blitz
.. _bob's idiap guide: https://gitlab.idiap.ch/bob/bob/wikis/Using-Bob-at-Idiap
.. _bob's website: https://www.idiap.ch/software/bob
.. _bob: https://www.idiap.ch/software/bob
.. _boost: http://www.boost.org
.. _buildbot: http://trac.buildbot.net
.. _buildout: http://pypi.python.org/pypi/zc.buildout/
......
import bob.db.iris
import bob.learn.em
import bob.learn.linear
import matplotlib.pyplot as plt
import numpy
numpy.random.seed(2) # FIXING A SEED
def train_ubm(features, n_gaussians):
"""
Train UBM
**Parameters**
features: 2D numpy array with the features
n_gaussians: Number of Gaussians
"""
input_size = features.shape[1]
kmeans_machine = bob.learn.em.KMeansMachine(int(n_gaussians), input_size)
ubm = bob.learn.em.GMMMachine(int(n_gaussians), input_size)
# The K-means clustering is firstly used to used to estimate the initial
# means, the final variances and the final weights for each gaussian
# component
kmeans_trainer = bob.learn.em.KMeansTrainer('RANDOM_NO_DUPLICATE')
bob.learn.em.train(kmeans_trainer, kmeans_machine, features)
# Getting the means, weights and the variances for each cluster. This is a
# very good estimator for the ML
(variances, weights) = kmeans_machine.get_variances_and_weights_for_each_cluster(features)
means = kmeans_machine.means
# initialize the UBM with the output of kmeans
ubm.means = means
ubm.variances = variances
ubm.weights = weights
# Creating the ML Trainer. We will adapt only the means
trainer = bob.learn.em.ML_GMMTrainer(
update_means=True, update_variances=False, update_weights=False)
bob.learn.em.train(trainer, ubm, features)
return ubm
def isv_train(features, ubm):
"""
Train U matrix
**Parameters**
features: List of :py:class:`bob.learn.em.GMMStats` organized by class
n_gaussians: UBM (:py:class:`bob.learn.em.GMMMachine`)
"""
stats = []
for user in features:
user_stats = []
for f in user:
s = bob.learn.em.GMMStats(ubm.shape[0], ubm.shape[1])
ubm.acc_statistics(f, s)
user_stats.append(s)
stats.append(user_stats)
relevance_factor = 4
subspace_dimension_of_u = 1
isvbase = bob.learn.em.ISVBase(ubm, subspace_dimension_of_u)
trainer = bob.learn.em.ISVTrainer(relevance_factor)
# trainer.rng = bob.core.random.mt19937(int(self.init_seed))
bob.learn.em.train(trainer, isvbase, stats, max_iterations=50)
return isvbase
# GENERATING DATA
data_per_class = bob.db.iris.data()
setosa = numpy.column_stack(
(data_per_class['setosa'][:, 0], data_per_class['setosa'][:, 3]))
versicolor = numpy.column_stack(
(data_per_class['versicolor'][:, 0], data_per_class['versicolor'][:, 3]))
virginica = numpy.column_stack(
(data_per_class['virginica'][:, 0], data_per_class['virginica'][:, 3]))
data = numpy.vstack((setosa, versicolor, virginica))
# TRAINING THE PRIOR
ubm = train_ubm(data, 3)
isvbase = isv_train([setosa, versicolor, virginica], ubm)
# Variability direction
u0 = isvbase.u[0:2, 0] / numpy.linalg.norm(isvbase.u[0:2, 0])
u1 = isvbase.u[2:4, 0] / numpy.linalg.norm(isvbase.u[2:4, 0])
u2 = isvbase.u[4:6, 0] / numpy.linalg.norm(isvbase.u[4:6, 0])
figure, ax = plt.subplots()
plt.scatter(setosa[:, 0], setosa[:, 1], c="darkcyan", label="setosa")
plt.scatter(versicolor[:, 0], versicolor[:, 1],
c="goldenrod", label="versicolor")
plt.scatter(virginica[:, 0], virginica[:, 1], c="dimgrey", label="virginica")
plt.scatter(ubm.means[:, 0], ubm.means[:, 1], c="blue",
marker="x", label="centroids - mle")
# plt.scatter(ubm.means[:, 0], ubm.means[:, 1], c="blue",
# marker=".", label="within class varibility", s=0.01)
ax.arrow(ubm.means[0, 0], ubm.means[0, 1], u0[0], u0[1],
fc="k", ec="k", head_width=0.05, head_length=0.1)
ax.arrow(ubm.means[1, 0], ubm.means[1, 1], u1[0], u1[1],
fc="k", ec="k", head_width=0.05, head_length=0.1)
ax.arrow(ubm.means[2, 0], ubm.means[2, 1], u2[0], u2[1],
fc="k", ec="k", head_width=0.05, head_length=0.1)
plt.text(ubm.means[0, 0] + u0[0], ubm.means[0, 1] +
u0[1] - 0.1, r'$\mathbf{U}_1$', fontsize=15)
plt.text(ubm.means[1, 0] + u1[0], ubm.means[1, 1] +
u1[1] - 0.1, r'$\mathbf{U}_2$', fontsize=15)
plt.text(ubm.means[2, 0] + u2[0], ubm.means[2, 1] +
u2[1] - 0.1, r'$\mathbf{U}_3$', fontsize=15)
plt.xticks([], [])
plt.yticks([], [])
# plt.grid(True)
plt.xlabel('Sepal length')
plt.ylabel('Petal width')
plt.legend()
plt.tight_layout()
plt.show()
import bob.db.iris
import bob.learn.em
import bob.learn.linear
import matplotlib.pyplot as plt
import numpy
numpy.random.seed(2) # FIXING A SEED
def train_ubm(features, n_gaussians):
"""
Train UBM
**Parameters**
features: 2D numpy array with the features
n_gaussians: Number of Gaussians
"""
input_size = features.shape[1]
kmeans_machine = bob.learn.em.KMeansMachine(int(n_gaussians), input_size)
ubm = bob.learn.em.GMMMachine(int(n_gaussians), input_size)
# The K-means clustering is firstly used to used to estimate the initial
# means, the final variances and the final weights for each gaussian
# component
kmeans_trainer = bob.learn.em.KMeansTrainer('RANDOM_NO_DUPLICATE')
bob.learn.em.train(kmeans_trainer, kmeans_machine, features)
# Getting the means, weights and the variances for each cluster. This is a
# very good estimator for the ML
(variances, weights) = kmeans_machine.get_variances_and_weights_for_each_cluster(features)
means = kmeans_machine.means
# initialize the UBM with the output of kmeans
ubm.means = means
ubm.variances = variances
ubm.weights = weights
# Creating the ML Trainer. We will adapt only the means
trainer = bob.learn.em.ML_GMMTrainer(
update_means=True, update_variances=False, update_weights=False)
bob.learn.em.train(trainer, ubm, features)
return ubm
def jfa_train(features, ubm):
"""
Trains U and V matrix
**Parameters**
features: List of :py:class:`bob.learn.em.GMMStats` organized by class
n_gaussians: UBM (:py:class:`bob.learn.em.GMMMachine`)
"""
stats = []
for user in features:
user_stats = []
for f in user:
s = bob.learn.em.GMMStats(ubm.shape[0], ubm.shape[1])
ubm.acc_statistics(f, s)
user_stats.append(s)
stats.append(user_stats)
subspace_dimension_of_u = 1
subspace_dimension_of_v = 1
jfa_base = bob.learn.em.JFABase(
ubm, subspace_dimension_of_u, subspace_dimension_of_v)
trainer = bob.learn.em.JFATrainer()
# trainer.rng = bob.core.random.mt19937(int(self.init_seed))
bob.learn.em.train_jfa(trainer, jfa_base, stats, max_iterations=50)
return jfa_base
# GENERATING DATA
data_per_class = bob.db.iris.data()
setosa = numpy.column_stack(
(data_per_class['setosa'][:, 0], data_per_class['setosa'][:, 3]))
versicolor = numpy.column_stack(
(data_per_class['versicolor'][:, 0], data_per_class['versicolor'][:, 3]))
virginica = numpy.column_stack(
(data_per_class['virginica'][:, 0], data_per_class['virginica'][:, 3]))
data = numpy.vstack((setosa, versicolor, virginica))
# TRAINING THE PRIOR
ubm = train_ubm(data, 3)
jfa_base = jfa_train([setosa, versicolor, virginica], ubm)
# Variability direction U
u0 = jfa_base.u[0:2, 0] / numpy.linalg.norm(jfa_base.u[0:2, 0])
u1 = jfa_base.u[2:4, 0] / numpy.linalg.norm(jfa_base.u[2:4, 0])
u2 = jfa_base.u[4:6, 0] / numpy.linalg.norm(jfa_base.u[4:6, 0])
# Variability direction V
v0 = jfa_base.v[0:2, 0] / numpy.linalg.norm(jfa_base.v[0:2, 0])
v1 = jfa_base.v[2:4, 0] / numpy.linalg.norm(jfa_base.v[2:4, 0])
v2 = jfa_base.v[4:6, 0] / numpy.linalg.norm(jfa_base.v[4:6, 0])
figure, ax = plt.subplots()
plt.scatter(setosa[:, 0], setosa[:, 1], c="darkcyan", label="setosa")
plt.scatter(versicolor[:, 0], versicolor[:, 1],
c="goldenrod", label="versicolor")
plt.scatter(virginica[:, 0], virginica[:, 1], c="dimgrey", label="virginica")
plt.scatter(ubm.means[:, 0], ubm.means[:, 1], c="blue",
marker="x", label="centroids - mle")
# plt.scatter(ubm.means[:, 0], ubm.means[:, 1], c="blue",
# marker=".", label="within class varibility", s=0.01)
# U
ax.arrow(ubm.means[0, 0], ubm.means[0, 1], u0[0], u0[1],
fc="k", ec="k", head_width=0.05, head_length=0.1)
ax.arrow(ubm.means[1, 0], ubm.means[1, 1], u1[0], u1[1],
fc="k", ec="k", head_width=0.05, head_length=0.1)
ax.arrow(ubm.means[2, 0], ubm.means[2, 1], u2[0], u2[1],
fc="k", ec="k", head_width=0.05, head_length=0.1)
plt.text(ubm.means[0, 0] + u0[0], ubm.means[0, 1] +
u0[1] - 0.1, r'$\mathbf{U}_1$', fontsize=15)
plt.text(ubm.means[1, 0] + u1[0], ubm.means[1, 1] +
u1[1] - 0.1, r'$\mathbf{U}_2$', fontsize=15)
plt.text(ubm.means[2, 0] + u2[0], ubm.means[2, 1] +
u2[1] - 0.1, r'$\mathbf{U}_3$', fontsize=15)
# V
ax.arrow(ubm.means[0, 0], ubm.means[0, 1], v0[0], v0[1],
fc="k", ec="k", head_width=0.05, head_length=0.1)
ax.arrow(ubm.means[1, 0], ubm.means[1, 1], v1[0], v1[1],
fc="k", ec="k", head_width=0.05, head_length=0.1)
ax.arrow(ubm.means[2, 0], ubm.means[2, 1], v2[0], v2[1],
fc="k", ec="k", head_width=0.05, head_length=0.1)
plt.text(ubm.means[0, 0] + v0[0], ubm.means[0, 1] +
v0[1] - 0.1, r'$\mathbf{V}_1$', fontsize=15)
plt.text(ubm.means[1, 0] + v1[0], ubm.means[1, 1] +
v1[1] - 0.1, r'$\mathbf{V}_2$', fontsize=15)
plt.text(ubm.means[2, 0] + v2[0], ubm.means[2, 1] +
v2[1] - 0.1, r'$\mathbf{V}_3$', fontsize=15)
plt.xticks([], [])
plt.yticks([], [])
# plt.grid(True)
plt.xlabel('Sepal length')
plt.ylabel('Petal width')
plt.legend(loc=2)
plt.ylim([-1, 3.5])
plt.tight_layout()
# plt.show()
import matplotlib.pyplot as plt
import bob.db.iris
import bob.learn.em
import numpy
numpy.random.seed(10)
data_per_class = bob.db.iris.data()
setosa = numpy.column_stack(
(data_per_class['setosa'][:, 0], data_per_class['setosa'][:, 3]))
versicolor = numpy.column_stack(
(data_per_class['versicolor'][:, 0], data_per_class['versicolor'][:, 3]))
virginica = numpy.column_stack(
(data_per_class['virginica'][:, 0], data_per_class['virginica'][:, 3]))
data = numpy.vstack((setosa, versicolor, virginica))
# Two clusters with a feature dimensionality of 3
mle_machine = bob.learn.em.GMMMachine(3, 2)
mle_machine.means = numpy.array([[5, 3], [4, 2], [7, 3.]])
# Creating some random data centered in
map_machine = bob.learn.em.GMMMachine(3, 2)
map_trainer = bob.learn.em.MAP_GMMTrainer(mle_machine, relevance_factor=4)
bob.learn.em.train(map_trainer, map_machine, data, max_iterations=200,
convergence_threshold=1e-5) # Train the KMeansMachine
figure, ax = plt.subplots()
# plt.scatter(data[:, 0], data[:, 1], c="olivedrab", label="new data")
plt.scatter(setosa[:, 0], setosa[:, 1], c="darkcyan", label="setosa")
plt.scatter(versicolor[:, 0], versicolor[:, 1],
c="goldenrod", label="versicolor")
plt.scatter(virginica[:, 0], virginica[:, 1],
c="dimgrey", label="virginica")
plt.scatter(mle_machine.means[:, 0],
mle_machine.means[:, 1], c="blue", marker="x",
label="prior centroids - mle", s=60)
plt.scatter(map_machine.means[:, 0], map_machine.means[:, 1], c="red",
marker="^", label="adapted centroids - map", s=60)
plt.legend()
plt.xticks([], [])
plt.yticks([], [])
ax.set_xlabel("Sepal length")
ax.set_ylabel("Petal width")
plt.tight_layout()
plt.show()
import bob.learn.em
import bob.db.iris
import numpy
import matplotlib.pyplot as plt
data_per_class = bob.db.iris.data()
setosa = numpy.column_stack(
(data_per_class['setosa'][:, 0], data_per_class['setosa'][:, 3]))
versicolor = numpy.column_stack(
(data_per_class['versicolor'][:, 0], data_per_class['versicolor'][:, 3]))
virginica = numpy.column_stack(
(data_per_class['virginica'][:, 0], data_per_class['virginica'][:, 3]))
data = numpy.vstack((setosa, versicolor, virginica))
# Two clusters with a feature dimensionality of 3
machine = bob.learn.em.GMMMachine(3, 2)
trainer = bob.learn.em.ML_GMMTrainer(True, True, True)
machine.means = numpy.array([[5, 3], [4, 2], [7, 3.]])
bob.learn.em.train(trainer, machine, data, max_iterations=200,
convergence_threshold=1e-5) # Train the KMeansMachine
figure, ax = plt.subplots()
plt.scatter(setosa[:, 0], setosa[:, 1], c="darkcyan", label="setosa")
plt.scatter(versicolor[:, 0], versicolor[:, 1],
c="goldenrod", label="versicolor")
plt.scatter(virginica[:, 0], virginica[:, 1],
c="dimgrey", label="virginica")
plt.scatter(machine.means[:, 0],
machine.means[:, 1], c="blue", marker="x", label="centroids", s=60)
plt.legend()
plt.xticks([], [])
plt.yticks([], [])
ax.set_xlabel("Sepal length")
ax.set_ylabel("Petal width")
plt.tight_layout()
plt.show()
import matplotlib.pyplot as plt
import bob.learn.em
import numpy
numpy.random.seed(10)
n_clients = 10
n_scores_per_client = 200
# Defining some fake scores for genuines and impostors
impostor_scores = numpy.random.normal(-15.5,
5, (n_scores_per_client, n_clients))
genuine_scores = numpy.random.normal(0.5, 5, (n_scores_per_client, n_clients))
# Defining the scores for the statistics computation
t_scores = numpy.random.normal(-5., 5, (n_scores_per_client, n_clients))
# T - Normalizing
t_norm_impostors = bob.learn.em.tnorm(impostor_scores, t_scores)
t_norm_genuine = bob.learn.em.tnorm(genuine_scores, t_scores)
# PLOTTING
figure = plt.subplot(2, 1, 1)
ax = figure.axes
plt.title("Raw scores", fontsize=8)
plt.hist(impostor_scores.reshape(n_scores_per_client * n_clients),
label='Impostors', normed=True,
color='C1', alpha=0.5, bins=50)
plt.hist(genuine_scores.reshape(n_scores_per_client * n_clients),
label='Genuine', normed=True,
color='C0', alpha=0.5, bins=50)
plt.legend(fontsize=8)
plt.yticks([], [])
figure = plt.subplot(2, 1, 2)
ax = figure.axes
plt.title("T-norm scores", fontsize=8)
plt.hist(t_norm_impostors.reshape(n_scores_per_client * n_clients),
label='T-Norm Impostors', normed=True,
color='C1', alpha=0.5, bins=50)
plt.hist(t_norm_genuine.reshape(n_scores_per_client * n_clients),
label='T-Norm Genuine', normed=True,
color='C0', alpha=0.5, bins=50)
plt.legend(fontsize=8)
plt.yticks([], [])
plt.tight_layout()
plt.show()
import matplotlib.pyplot as plt
import bob.learn.em
import numpy
numpy.random.seed(10)
n_clients = 10
n_scores_per_client = 200
# Defining some fake scores for genuines and impostors
impostor_scores = numpy.random.normal(-15.5,
5, (n_scores_per_client, n_clients))
genuine_scores = numpy.random.normal(0.5, 5, (n_scores_per_client, n_clients))
# Defining the scores for the statistics computation
z_scores = numpy.random.normal(-5., 5, (n_scores_per_client, n_clients))
t_scores = numpy.random.normal(-6., 5, (n_scores_per_client, n_clients))
# T-normalizing the Z-scores
zt_scores = bob.learn.em.tnorm(z_scores, t_scores)
# ZT - Normalizing
zt_norm_impostors = bob.learn.em.ztnorm(
impostor_scores, z_scores, t_scores, zt_scores)
zt_norm_genuine = bob.learn.em.ztnorm(
genuine_scores, z_scores, t_scores, zt_scores)
# PLOTTING
figure = plt.subplot(2, 1, 1)
ax = figure.axes
plt.title("Raw scores", fontsize=8)
plt.hist(impostor_scores.reshape(n_scores_per_client * n_clients),
label='Impostors', normed=True,
color='C1', alpha=0.5, bins=50)
plt.hist(genuine_scores.reshape(n_scores_per_client * n_clients),
label='Genuine', normed=True,
color='C0', alpha=0.5, bins=50)
plt.legend(fontsize=8)
plt.yticks([], [])
figure = plt.subplot(2, 1, 2)
ax = figure.axes
plt.title("T-norm scores", fontsize=8)
plt.hist(zt_norm_impostors.reshape(n_scores_per_client * n_clients),
label='T-Norm Impostors', normed=True,
color='C1', alpha=0.5, bins=50)
plt.hist(zt_norm_genuine.reshape(n_scores_per_client * n_clients),
label='T-Norm Genuine', normed=True,
color='C0', alpha=0.5, bins=50)
plt.legend(fontsize=8)
plt.yticks([], [])
plt.tight_layout()
plt.show()
import matplotlib.pyplot as plt
import bob.learn.em
import numpy
numpy.random.seed(10)
n_clients = 10
n_scores_per_client = 200
# Defining some fake scores for genuines and impostors
impostor_scores = numpy.random.normal(-15.5,
5, (n_scores_per_client, n_clients))
genuine_scores = numpy.random.normal(0.5, 5, (n_scores_per_client, n_clients))
# Defining the scores for the statistics computation
z_scores = numpy.random.normal(-5., 5, (n_scores_per_client, n_clients))
# Z - Normalizing
z_norm_impostors = bob.learn.em.znorm(impostor_scores, z_scores)
z_norm_genuine = bob.learn.em.znorm(genuine_scores, z_scores)
# PLOTTING
figure = plt.subplot(2, 1, 1)
ax = figure.axes
plt.title("Raw scores", fontsize=8)
plt.hist(impostor_scores.reshape(n_scores_per_client * n_clients),
label='Impostors', normed=True,
color='C1', alpha=0.5, bins=50)
plt.hist(genuine_scores.reshape(n_scores_per_client * n_clients),
label='Genuine', normed=True,
color='C0', alpha=0.5, bins=50)
plt.legend(fontsize=8)
plt.yticks([], [])
figure = plt.subplot(2, 1, 2)
ax = figure.axes
plt.title("Z-norm scores", fontsize=8)
plt.hist(z_norm_impostors.reshape(n_scores_per_client * n_clients),
label='Z-Norm Impostors', normed=True,
color='C1', alpha=0.5, bins=50)
plt.hist(z_norm_genuine.reshape(n_scores_per_client * n_clients),
label='Z-Norm Genuine', normed=True,
color='C0', alpha=0.5, bins=50)
plt.yticks([], [])
plt.legend(fontsize=8)
plt.tight_layout()
plt.show()
import bob.db.iris
import bob.learn.em
import bob.learn.linear
import matplotlib.pyplot as plt
import numpy
numpy.random.seed(2) # FIXING A SEED
def train_ubm(features, n_gaussians):
"""
Train UBM
**Parameters**
features: 2D numpy array with the features
n_gaussians: Number of Gaussians
"""
input_size = features.shape[1]
kmeans_machine = bob.learn.em.KMeansMachine(int(n_gaussians), input_size)
ubm = bob.learn.em.GMMMachine(int(n_gaussians), input_size)
# The K-means clustering is firstly used to used to estimate the initial
# means, the final variances and the final weights for each gaussian
# component
kmeans_trainer = bob.learn.em.KMeansTrainer('RANDOM_NO_DUPLICATE')
bob.learn.em.train(kmeans_trainer, kmeans_machine, features)
# Getting the means, weights and the variances for each cluster. This is a
# very good estimator for the ML
(variances, weights) = kmeans_machine.get_variances_and_weights_for_each_cluster(features)
means = kmeans_machine.means
# initialize the UBM with the output of kmeans
ubm.means = means
ubm.variances = variances
ubm.weights = weights
# Creating the ML Trainer. We will adapt only the means
trainer = bob.learn.em.ML_GMMTrainer(
update_means=True, update_variances=False, update_weights=False)
bob.learn.em.train(trainer, ubm, features)
return ubm
def ivector_train(features, ubm):
"""
Trains T matrix
**Parameters**
features: List of :py:class:`bob.learn.em.GMMStats`
n_gaussians: UBM (:py:class:`bob.learn.em.GMMMachine`)
"""
stats = []
for user in features:
s = bob.learn.em.GMMStats(ubm.shape[0], ubm.shape[1])
for f in user:
ubm.acc_statistics(f, s)
stats.append(s)
subspace_dimension_of_t = 2
ivector_trainer = bob.learn.em.IVectorTrainer(update_sigma=True)
ivector_machine = bob.learn.em.IVectorMachine(
ubm, subspace_dimension_of_t, 10e-5)
# train IVector model
bob.learn.em.train(ivector_trainer, ivector_machine, stats, 500)
return ivector_machine
def acc_stats(data, gmm):
gmm_stats = []
for d in data:
s = bob.learn.em.GMMStats(gmm.shape[0], gmm.shape[1])
gmm.acc_statistics(d, s)
gmm_stats.append(s)
return gmm_stats
def compute_ivectors(gmm_stats, ivector_machine):
"""
Given :py:class:`bob.learn.em.GMMStats` and an T matrix, get the iVectors.
"""
ivectors = []
for g in gmm_stats:
ivectors.append(ivector_machine(g))
return numpy.array(ivectors)
# GENERATING DATA
data_per_class = bob.db.iris.data()
setosa = numpy.column_stack(
(data_per_class['setosa'][:, 0], data_per_class['setosa'][:, 3]))
versicolor = numpy.column_stack(
(data_per_class['versicolor'][:, 0], data_per_class['versicolor'][:, 3]))
virginica = numpy.column_stack(
(data_per_class['virginica'][:, 0], data_per_class['virginica'][:, 3]))
data = numpy.vstack((setosa, versicolor, virginica))
# TRAINING THE PRIOR
ubm = train_ubm(data, 3)
ivector_machine = ivector_train([setosa, versicolor, virginica], ubm)
# Variability direction U
# t0 = T[0:2, 0] / numpy.linalg.norm(T[0:2, 0])
# t1 = T[2:4, 0] / numpy.linalg.norm(T[2:4, 0])
# t2 = T[4:6, 0] / numpy.linalg.norm(T[4:6, 0])
# figure, ax = plt.subplots()
figure = plt.subplot(2, 1, 1)
ax = figure.axes
plt.title("Raw fetures")
plt.scatter(setosa[:, 0], setosa[:, 1], c="darkcyan", label="setosa")
plt.scatter(versicolor[:, 0], versicolor[:, 1],
c="goldenrod", label="versicolor")
plt.scatter(virginica[:, 0], virginica[:, 1], c="dimgrey", label="virginica")
# plt.grid(True)
# plt.xlabel('Sepal length')
plt.ylabel('Petal width')
plt.legend(loc=2)
plt.ylim([-1, 3.5])
plt.xticks([], [])
plt.yticks([], [])
figure = plt.subplot(2, 1, 2)
ax = figure.axes
ivector_setosa = compute_ivectors(acc_stats(setosa, ubm), ivector_machine)
ivector_versicolor = compute_ivectors(
acc_stats(versicolor, ubm), ivector_machine)
ivector_virginica = compute_ivectors(
acc_stats(virginica, ubm), ivector_machine)
# Whitening iVectors
whitening_trainer = bob.learn.linear.WhiteningTrainer()
whitener_machine = bob.learn.linear.Machine(
ivector_setosa.shape[1], ivector_setosa.shape[1])
whitening_trainer.train(numpy.vstack(
(ivector_setosa, ivector_versicolor, ivector_virginica)), whitener_machine)
ivector_setosa = whitener_machine(ivector_setosa)
ivector_versicolor = whitener_machine(ivector_versicolor)
ivector_virginica = whitener_machine(ivector_virginica)
# LDA ivectors
lda_trainer = bob.learn.linear.FisherLDATrainer()
lda_machine = bob.learn.linear.Machine(
ivector_setosa.shape[1], ivector_setosa.shape[1])
lda_trainer.train([ivector_setosa, ivector_versicolor,
ivector_virginica], lda_machine)
ivector_setosa = lda_machine(ivector_setosa)
ivector_versicolor = lda_machine(ivector_versicolor)
ivector_virginica = lda_machine(ivector_virginica)
# WCCN ivectors
# wccn_trainer = bob.learn.linear.WCCNTrainer()
# wccn_machine = bob.learn.linear.Machine(
# ivector_setosa.shape[1], ivector_setosa.shape[1])
# wccn_trainer.train([ivector_setosa, ivector_versicolor,
# ivector_virginica], wccn_machine)
# ivector_setosa = wccn_machine(ivector_setosa)
# ivector_versicolor = wccn_machine(ivector_versicolor)
# ivector_virginica = wccn_machine(ivector_virginica)
plt.title("First two ivectors")
plt.scatter(ivector_setosa[:, 0],
ivector_setosa[:, 1], c="darkcyan", label="setosa",
marker="x")
plt.scatter(ivector_versicolor[:, 0],
ivector_versicolor[:, 1], c="goldenrod", label="versicolor",
marker="x")
plt.scatter(ivector_virginica[:, 0],
ivector_virginica[:, 1], c="dimgrey", label="virginica",
marker="x")
plt.xticks([], [])
plt.yticks([], [])
# plt.grid(True)
# plt.xlabel('Sepal length')
# plt.ylabel('Petal width')
plt.legend(loc=2)
plt.ylim([-1, 3.5])
plt.tight_layout()
plt.show()
import bob.learn.em
import bob.db.iris
import numpy
import matplotlib.pyplot as plt
data_per_class = bob.db.iris.data()
setosa = numpy.column_stack(
(data_per_class['setosa'][:, 0], data_per_class['setosa'][:, 3]))
versicolor = numpy.column_stack(
(data_per_class['versicolor'][:, 0], data_per_class['versicolor'][:, 3]))
virginica = numpy.column_stack(
(data_per_class['virginica'][:, 0], data_per_class['virginica'][:, 3]))
data = numpy.vstack((setosa, versicolor, virginica))
# Training KMeans
# Two clusters with a feature dimensionality of 3
machine = bob.learn.em.KMeansMachine(3, 2)
trainer = bob.learn.em.KMeansTrainer()
bob.learn.em.train(trainer, machine, data, max_iterations=200,
convergence_threshold=1e-5) # Train the KMeansMachine
# Plotting
figure, ax = plt.subplots()
plt.scatter(setosa[:, 0],
setosa[:, 1], c="darkcyan", label="setosa")
plt.scatter(versicolor[:, 0],
versicolor[:, 1], c="goldenrod", label="versicolor")
plt.scatter(virginica[:, 0],
virginica[:, 1], c="dimgrey", label="virginica")
plt.scatter(machine.means[:, 0],
machine.means[:, 1], c="blue", marker="x", label="centroids",
s=60)
plt.legend()
plt.xticks([], [])
plt.yticks([], [])
ax.set_xlabel("Sepal length")
ax.set_ylabel("Petal width")
plt.tight_layout()
......@@ -17,21 +17,21 @@ Trainers
........
.. autosummary::
bob.learn.em.KMeansTrainer
bob.learn.em.ML_GMMTrainer
bob.learn.em.MAP_GMMTrainer
bob.learn.em.ISVTrainer
bob.learn.em.JFATrainer
bob.learn.em.JFATrainer
bob.learn.em.IVectorTrainer
bob.learn.em.PLDATrainer
bob.learn.em.EMPCATrainer
Machines
........
.. autosummary::
.. autosummary::
bob.learn.em.KMeansMachine
bob.learn.em.Gaussian
bob.learn.em.GMMStats
......@@ -43,7 +43,7 @@ Machines
bob.learn.em.IVectorMachine
bob.learn.em.PLDABase
bob.learn.em.PLDAMachine
Functions
---------
.. autosummary::
......