Skip to content
Snippets Groups Projects
Commit 5cad9397 authored by Yannick DAYER's avatar Yannick DAYER
Browse files

[doc] Remove the plots using removed C++ modules.

parent 74fcd5c5
No related branches found
No related tags found
2 merge requests!43Remove C++ code, tests, dependencies, and build scripts,!40Transition to a pure python implementation
Pipeline #56571 failed
......@@ -58,7 +58,8 @@ Follow bellow an snippet on how to train a KMeans using Bob_.
.. doctest::
:options: +NORMALIZE_WHITESPACE
>>> import bob.learn.em
>>> from bob.learn.em.cluster import KMeansMachine
>>> from bob.learn.em.cluster import KMeansTrainer
>>> import numpy
>>> data = numpy.array(
... [[3,-3,100],
......@@ -66,18 +67,14 @@ Follow bellow an snippet on how to train a KMeans using Bob_.
... [3.5,-3.5,99],
... [-7,7,-100],
... [-5,5,-101]], dtype='float64')
>>> # Create a kmeans m with k=2 clusters with a dimensionality equal to 3
>>> kmeans_machine = bob.learn.em.KMeansMachine(2, 3)
>>> kmeans_trainer = bob.learn.em.KMeansTrainer()
>>> max_iterations = 200
>>> convergence_threshold = 1e-5
>>> # Create a k-means machine with k=2 clusters
>>> kmeans_machine = KMeansMachine(2, convergence_threshold=1e-5)
>>> kmeans_trainer = KMeansTrainer(max_iter=200)
>>> # Train the KMeansMachine
>>> bob.learn.em.train(kmeans_trainer, kmeans_machine, data,
... max_iterations=max_iterations,
... convergence_threshold=convergence_threshold)
>>> print(kmeans_machine.means)
[[ -6. 6. -100.5]
[ 3.5 -3.5 99. ]]
>>> kmeans_machine = kmeans_machine.fit(data)
>>> print(numpy.array(kmeans_machine.centroids_))
[[ 3.5 -3.5 99. ]
[ -6. 6. -100.5]]
Bellow follow an intuition (source code + plot) of a kmeans training using the
......@@ -171,8 +168,10 @@ Bellow follow an intuition of the GMM trained the maximum likelihood estimator
using the Iris flower
`dataset <https://en.wikipedia.org/wiki/Iris_flower_data_set>`_.
.. plot:: plot/plot_ML.py
:include-source: False
..
TODO uncomment when implemented
.. plot:: plot/plot_ML.py
:include-source: False
Maximum a posteriori Estimator (MAP)
......@@ -232,8 +231,10 @@ Follow bellow an snippet on how to train a GMM using the MAP estimator.
Bellow follow an intuition of the GMM trained with the MAP estimator using the
Iris flower `dataset <https://en.wikipedia.org/wiki/Iris_flower_data_set>`_.
.. plot:: plot/plot_MAP.py
:include-source: False
..
TODO uncomment when implemented
.. plot:: plot/plot_MAP.py
:include-source: False
Session Variability Modeling with Gaussian Mixture Models
......@@ -326,8 +327,10 @@ The arrows :math:`U_{1}`, :math:`U_{2}` and :math:`U_{3}` are the directions of
the within class variations, with respect to each Gaussian component, that will
be suppressed a posteriori.
.. plot:: plot/plot_ISV.py
:include-source: False
..
TODO uncomment when implemented
.. plot:: plot/plot_ISV.py
:include-source: False
The ISV statistical model is stored in this container
......@@ -400,8 +403,10 @@ between class variations with respect to each Gaussian component that will be
added a posteriori.
.. plot:: plot/plot_JFA.py
:include-source: False
..
TODO uncomment when implemented
.. plot:: plot/plot_JFA.py
:include-source: False
The JFA statistical model is stored in this container
:py:class:`bob.learn.em.JFABase` and the training is performed by
......@@ -472,8 +477,10 @@ Follow bellow an intuition of the data from the Iris flower
`dataset <https://en.wikipedia.org/wiki/Iris_flower_data_set>`_, embedded in
the iVector space.
.. plot:: plot/plot_iVector.py
:include-source: False
..
TODO uncomment when implemented
.. plot:: plot/plot_iVector.py
:include-source: False
The iVector statistical model is stored in this container
......
import bob.db.iris
import bob.learn.em
import bob.learn.linear
import matplotlib.pyplot as plt
import numpy
numpy.random.seed(2) # FIXING A SEED
def train_ubm(features, n_gaussians):
"""
Train UBM
**Parameters**
features: 2D numpy array with the features
n_gaussians: Number of Gaussians
"""
input_size = features.shape[1]
kmeans_machine = bob.learn.em.KMeansMachine(int(n_gaussians), input_size)
ubm = bob.learn.em.GMMMachine(int(n_gaussians), input_size)
# The K-means clustering is firstly used to used to estimate the initial
# means, the final variances and the final weights for each gaussian
# component
kmeans_trainer = bob.learn.em.KMeansTrainer('RANDOM_NO_DUPLICATE')
bob.learn.em.train(kmeans_trainer, kmeans_machine, features)
# Getting the means, weights and the variances for each cluster. This is a
# very good estimator for the ML
(variances, weights) = kmeans_machine.get_variances_and_weights_for_each_cluster(features)
means = kmeans_machine.means
# initialize the UBM with the output of kmeans
ubm.means = means
ubm.variances = variances
ubm.weights = weights
# Creating the ML Trainer. We will adapt only the means
trainer = bob.learn.em.ML_GMMTrainer(
update_means=True, update_variances=False, update_weights=False)
bob.learn.em.train(trainer, ubm, features)
return ubm
def isv_train(features, ubm):
"""
Train U matrix
**Parameters**
features: List of :py:class:`bob.learn.em.GMMStats` organized by class
n_gaussians: UBM (:py:class:`bob.learn.em.GMMMachine`)
"""
stats = []
for user in features:
user_stats = []
for f in user:
s = bob.learn.em.GMMStats(ubm.shape[0], ubm.shape[1])
ubm.acc_statistics(f, s)
user_stats.append(s)
stats.append(user_stats)
relevance_factor = 4
subspace_dimension_of_u = 1
isvbase = bob.learn.em.ISVBase(ubm, subspace_dimension_of_u)
trainer = bob.learn.em.ISVTrainer(relevance_factor)
# trainer.rng = bob.core.random.mt19937(int(self.init_seed))
bob.learn.em.train(trainer, isvbase, stats, max_iterations=50)
return isvbase
# GENERATING DATA
data_per_class = bob.db.iris.data()
setosa = numpy.column_stack(
(data_per_class['setosa'][:, 0], data_per_class['setosa'][:, 3]))
versicolor = numpy.column_stack(
(data_per_class['versicolor'][:, 0], data_per_class['versicolor'][:, 3]))
virginica = numpy.column_stack(
(data_per_class['virginica'][:, 0], data_per_class['virginica'][:, 3]))
data = numpy.vstack((setosa, versicolor, virginica))
# TRAINING THE PRIOR
ubm = train_ubm(data, 3)
isvbase = isv_train([setosa, versicolor, virginica], ubm)
# Variability direction
u0 = isvbase.u[0:2, 0] / numpy.linalg.norm(isvbase.u[0:2, 0])
u1 = isvbase.u[2:4, 0] / numpy.linalg.norm(isvbase.u[2:4, 0])
u2 = isvbase.u[4:6, 0] / numpy.linalg.norm(isvbase.u[4:6, 0])
figure, ax = plt.subplots()
plt.scatter(setosa[:, 0], setosa[:, 1], c="darkcyan", label="setosa")
plt.scatter(versicolor[:, 0], versicolor[:, 1],
c="goldenrod", label="versicolor")
plt.scatter(virginica[:, 0], virginica[:, 1], c="dimgrey", label="virginica")
plt.scatter(ubm.means[:, 0], ubm.means[:, 1], c="blue",
marker="x", label="centroids - mle")
# plt.scatter(ubm.means[:, 0], ubm.means[:, 1], c="blue",
# marker=".", label="within class varibility", s=0.01)
ax.arrow(ubm.means[0, 0], ubm.means[0, 1], u0[0], u0[1],
fc="k", ec="k", head_width=0.05, head_length=0.1)
ax.arrow(ubm.means[1, 0], ubm.means[1, 1], u1[0], u1[1],
fc="k", ec="k", head_width=0.05, head_length=0.1)
ax.arrow(ubm.means[2, 0], ubm.means[2, 1], u2[0], u2[1],
fc="k", ec="k", head_width=0.05, head_length=0.1)
plt.text(ubm.means[0, 0] + u0[0], ubm.means[0, 1] +
u0[1] - 0.1, r'$\mathbf{U}_1$', fontsize=15)
plt.text(ubm.means[1, 0] + u1[0], ubm.means[1, 1] +
u1[1] - 0.1, r'$\mathbf{U}_2$', fontsize=15)
plt.text(ubm.means[2, 0] + u2[0], ubm.means[2, 1] +
u2[1] - 0.1, r'$\mathbf{U}_3$', fontsize=15)
plt.xticks([], [])
plt.yticks([], [])
# plt.grid(True)
plt.xlabel('Sepal length')
plt.ylabel('Petal width')
plt.legend()
plt.tight_layout()
plt.show()
import bob.db.iris
import bob.learn.em
import bob.learn.linear
import matplotlib.pyplot as plt
import numpy
numpy.random.seed(2) # FIXING A SEED
def train_ubm(features, n_gaussians):
"""
Train UBM
**Parameters**
features: 2D numpy array with the features
n_gaussians: Number of Gaussians
"""
input_size = features.shape[1]
kmeans_machine = bob.learn.em.KMeansMachine(int(n_gaussians), input_size)
ubm = bob.learn.em.GMMMachine(int(n_gaussians), input_size)
# The K-means clustering is firstly used to used to estimate the initial
# means, the final variances and the final weights for each gaussian
# component
kmeans_trainer = bob.learn.em.KMeansTrainer('RANDOM_NO_DUPLICATE')
bob.learn.em.train(kmeans_trainer, kmeans_machine, features)
# Getting the means, weights and the variances for each cluster. This is a
# very good estimator for the ML
(variances, weights) = kmeans_machine.get_variances_and_weights_for_each_cluster(features)
means = kmeans_machine.means
# initialize the UBM with the output of kmeans
ubm.means = means
ubm.variances = variances
ubm.weights = weights
# Creating the ML Trainer. We will adapt only the means
trainer = bob.learn.em.ML_GMMTrainer(
update_means=True, update_variances=False, update_weights=False)
bob.learn.em.train(trainer, ubm, features)
return ubm
def jfa_train(features, ubm):
"""
Trains U and V matrix
**Parameters**
features: List of :py:class:`bob.learn.em.GMMStats` organized by class
n_gaussians: UBM (:py:class:`bob.learn.em.GMMMachine`)
"""
stats = []
for user in features:
user_stats = []
for f in user:
s = bob.learn.em.GMMStats(ubm.shape[0], ubm.shape[1])
ubm.acc_statistics(f, s)
user_stats.append(s)
stats.append(user_stats)
subspace_dimension_of_u = 1
subspace_dimension_of_v = 1
jfa_base = bob.learn.em.JFABase(
ubm, subspace_dimension_of_u, subspace_dimension_of_v)
trainer = bob.learn.em.JFATrainer()
# trainer.rng = bob.core.random.mt19937(int(self.init_seed))
bob.learn.em.train_jfa(trainer, jfa_base, stats, max_iterations=50)
return jfa_base
# GENERATING DATA
data_per_class = bob.db.iris.data()
setosa = numpy.column_stack(
(data_per_class['setosa'][:, 0], data_per_class['setosa'][:, 3]))
versicolor = numpy.column_stack(
(data_per_class['versicolor'][:, 0], data_per_class['versicolor'][:, 3]))
virginica = numpy.column_stack(
(data_per_class['virginica'][:, 0], data_per_class['virginica'][:, 3]))
data = numpy.vstack((setosa, versicolor, virginica))
# TRAINING THE PRIOR
ubm = train_ubm(data, 3)
jfa_base = jfa_train([setosa, versicolor, virginica], ubm)
# Variability direction U
u0 = jfa_base.u[0:2, 0] / numpy.linalg.norm(jfa_base.u[0:2, 0])
u1 = jfa_base.u[2:4, 0] / numpy.linalg.norm(jfa_base.u[2:4, 0])
u2 = jfa_base.u[4:6, 0] / numpy.linalg.norm(jfa_base.u[4:6, 0])
# Variability direction V
v0 = jfa_base.v[0:2, 0] / numpy.linalg.norm(jfa_base.v[0:2, 0])
v1 = jfa_base.v[2:4, 0] / numpy.linalg.norm(jfa_base.v[2:4, 0])
v2 = jfa_base.v[4:6, 0] / numpy.linalg.norm(jfa_base.v[4:6, 0])
figure, ax = plt.subplots()
plt.scatter(setosa[:, 0], setosa[:, 1], c="darkcyan", label="setosa")
plt.scatter(versicolor[:, 0], versicolor[:, 1],
c="goldenrod", label="versicolor")
plt.scatter(virginica[:, 0], virginica[:, 1], c="dimgrey", label="virginica")
plt.scatter(ubm.means[:, 0], ubm.means[:, 1], c="blue",
marker="x", label="centroids - mle")
# plt.scatter(ubm.means[:, 0], ubm.means[:, 1], c="blue",
# marker=".", label="within class varibility", s=0.01)
# U
ax.arrow(ubm.means[0, 0], ubm.means[0, 1], u0[0], u0[1],
fc="k", ec="k", head_width=0.05, head_length=0.1)
ax.arrow(ubm.means[1, 0], ubm.means[1, 1], u1[0], u1[1],
fc="k", ec="k", head_width=0.05, head_length=0.1)
ax.arrow(ubm.means[2, 0], ubm.means[2, 1], u2[0], u2[1],
fc="k", ec="k", head_width=0.05, head_length=0.1)
plt.text(ubm.means[0, 0] + u0[0], ubm.means[0, 1] +
u0[1] - 0.1, r'$\mathbf{U}_1$', fontsize=15)
plt.text(ubm.means[1, 0] + u1[0], ubm.means[1, 1] +
u1[1] - 0.1, r'$\mathbf{U}_2$', fontsize=15)
plt.text(ubm.means[2, 0] + u2[0], ubm.means[2, 1] +
u2[1] - 0.1, r'$\mathbf{U}_3$', fontsize=15)
# V
ax.arrow(ubm.means[0, 0], ubm.means[0, 1], v0[0], v0[1],
fc="k", ec="k", head_width=0.05, head_length=0.1)
ax.arrow(ubm.means[1, 0], ubm.means[1, 1], v1[0], v1[1],
fc="k", ec="k", head_width=0.05, head_length=0.1)
ax.arrow(ubm.means[2, 0], ubm.means[2, 1], v2[0], v2[1],
fc="k", ec="k", head_width=0.05, head_length=0.1)
plt.text(ubm.means[0, 0] + v0[0], ubm.means[0, 1] +
v0[1] - 0.1, r'$\mathbf{V}_1$', fontsize=15)
plt.text(ubm.means[1, 0] + v1[0], ubm.means[1, 1] +
v1[1] - 0.1, r'$\mathbf{V}_2$', fontsize=15)
plt.text(ubm.means[2, 0] + v2[0], ubm.means[2, 1] +
v2[1] - 0.1, r'$\mathbf{V}_3$', fontsize=15)
plt.xticks([], [])
plt.yticks([], [])
# plt.grid(True)
plt.xlabel('Sepal length')
plt.ylabel('Petal width')
plt.legend(loc=2)
plt.ylim([-1, 3.5])
plt.tight_layout()
# plt.show()
import matplotlib.pyplot as plt
import bob.db.iris
import bob.learn.em
import numpy
numpy.random.seed(10)
data_per_class = bob.db.iris.data()
setosa = numpy.column_stack(
(data_per_class['setosa'][:, 0], data_per_class['setosa'][:, 3]))
versicolor = numpy.column_stack(
(data_per_class['versicolor'][:, 0], data_per_class['versicolor'][:, 3]))
virginica = numpy.column_stack(
(data_per_class['virginica'][:, 0], data_per_class['virginica'][:, 3]))
data = numpy.vstack((setosa, versicolor, virginica))
# Two clusters with a feature dimensionality of 3
mle_machine = bob.learn.em.GMMMachine(3, 2)
mle_machine.means = numpy.array([[5, 3], [4, 2], [7, 3.]])
# Creating some random data centered in
map_machine = bob.learn.em.GMMMachine(3, 2)
map_trainer = bob.learn.em.MAP_GMMTrainer(mle_machine, relevance_factor=4)
bob.learn.em.train(map_trainer, map_machine, data, max_iterations=200,
convergence_threshold=1e-5) # Train the KMeansMachine
figure, ax = plt.subplots()
# plt.scatter(data[:, 0], data[:, 1], c="olivedrab", label="new data")
plt.scatter(setosa[:, 0], setosa[:, 1], c="darkcyan", label="setosa")
plt.scatter(versicolor[:, 0], versicolor[:, 1],
c="goldenrod", label="versicolor")
plt.scatter(virginica[:, 0], virginica[:, 1],
c="dimgrey", label="virginica")
plt.scatter(mle_machine.means[:, 0],
mle_machine.means[:, 1], c="blue", marker="x",
label="prior centroids - mle", s=60)
plt.scatter(map_machine.means[:, 0], map_machine.means[:, 1], c="red",
marker="^", label="adapted centroids - map", s=60)
plt.legend()
plt.xticks([], [])
plt.yticks([], [])
ax.set_xlabel("Sepal length")
ax.set_ylabel("Petal width")
plt.tight_layout()
plt.show()
import bob.learn.em
import bob.db.iris
import numpy
import matplotlib.pyplot as plt
data_per_class = bob.db.iris.data()
setosa = numpy.column_stack(
(data_per_class['setosa'][:, 0], data_per_class['setosa'][:, 3]))
versicolor = numpy.column_stack(
(data_per_class['versicolor'][:, 0], data_per_class['versicolor'][:, 3]))
virginica = numpy.column_stack(
(data_per_class['virginica'][:, 0], data_per_class['virginica'][:, 3]))
data = numpy.vstack((setosa, versicolor, virginica))
# Two clusters with a feature dimensionality of 3
machine = bob.learn.em.GMMMachine(3, 2)
trainer = bob.learn.em.ML_GMMTrainer(True, True, True)
machine.means = numpy.array([[5, 3], [4, 2], [7, 3.]])
bob.learn.em.train(trainer, machine, data, max_iterations=200,
convergence_threshold=1e-5) # Train the KMeansMachine
figure, ax = plt.subplots()
plt.scatter(setosa[:, 0], setosa[:, 1], c="darkcyan", label="setosa")
plt.scatter(versicolor[:, 0], versicolor[:, 1],
c="goldenrod", label="versicolor")
plt.scatter(virginica[:, 0], virginica[:, 1],
c="dimgrey", label="virginica")
plt.scatter(machine.means[:, 0],
machine.means[:, 1], c="blue", marker="x", label="centroids", s=60)
plt.legend()
plt.xticks([], [])
plt.yticks([], [])
ax.set_xlabel("Sepal length")
ax.set_ylabel("Petal width")
plt.tight_layout()
plt.show()
import matplotlib.pyplot as plt
import bob.learn.em
import numpy
numpy.random.seed(10)
n_clients = 10
n_scores_per_client = 200
# Defining some fake scores for genuines and impostors
impostor_scores = numpy.random.normal(-15.5,
5, (n_scores_per_client, n_clients))
genuine_scores = numpy.random.normal(0.5, 5, (n_scores_per_client, n_clients))
# Defining the scores for the statistics computation
t_scores = numpy.random.normal(-5., 5, (n_scores_per_client, n_clients))
# T - Normalizing
t_norm_impostors = bob.learn.em.tnorm(impostor_scores, t_scores)
t_norm_genuine = bob.learn.em.tnorm(genuine_scores, t_scores)
# PLOTTING
figure = plt.subplot(2, 1, 1)
ax = figure.axes
plt.title("Raw scores", fontsize=8)
plt.hist(impostor_scores.reshape(n_scores_per_client * n_clients),
label='Impostors', normed=True,
color='C1', alpha=0.5, bins=50)
plt.hist(genuine_scores.reshape(n_scores_per_client * n_clients),
label='Genuine', normed=True,
color='C0', alpha=0.5, bins=50)
plt.legend(fontsize=8)
plt.yticks([], [])
figure = plt.subplot(2, 1, 2)
ax = figure.axes
plt.title("T-norm scores", fontsize=8)
plt.hist(t_norm_impostors.reshape(n_scores_per_client * n_clients),
label='T-Norm Impostors', normed=True,
color='C1', alpha=0.5, bins=50)
plt.hist(t_norm_genuine.reshape(n_scores_per_client * n_clients),
label='T-Norm Genuine', normed=True,
color='C0', alpha=0.5, bins=50)
plt.legend(fontsize=8)
plt.yticks([], [])
plt.tight_layout()
plt.show()
import matplotlib.pyplot as plt
import bob.learn.em
import numpy
numpy.random.seed(10)
n_clients = 10
n_scores_per_client = 200
# Defining some fake scores for genuines and impostors
impostor_scores = numpy.random.normal(-15.5,
5, (n_scores_per_client, n_clients))
genuine_scores = numpy.random.normal(0.5, 5, (n_scores_per_client, n_clients))
# Defining the scores for the statistics computation
z_scores = numpy.random.normal(-5., 5, (n_scores_per_client, n_clients))
t_scores = numpy.random.normal(-6., 5, (n_scores_per_client, n_clients))
# T-normalizing the Z-scores
zt_scores = bob.learn.em.tnorm(z_scores, t_scores)
# ZT - Normalizing
zt_norm_impostors = bob.learn.em.ztnorm(
impostor_scores, z_scores, t_scores, zt_scores)
zt_norm_genuine = bob.learn.em.ztnorm(
genuine_scores, z_scores, t_scores, zt_scores)
# PLOTTING
figure = plt.subplot(2, 1, 1)
ax = figure.axes
plt.title("Raw scores", fontsize=8)
plt.hist(impostor_scores.reshape(n_scores_per_client * n_clients),
label='Impostors', normed=True,
color='C1', alpha=0.5, bins=50)
plt.hist(genuine_scores.reshape(n_scores_per_client * n_clients),
label='Genuine', normed=True,
color='C0', alpha=0.5, bins=50)
plt.legend(fontsize=8)
plt.yticks([], [])
figure = plt.subplot(2, 1, 2)
ax = figure.axes
plt.title("T-norm scores", fontsize=8)
plt.hist(zt_norm_impostors.reshape(n_scores_per_client * n_clients),
label='T-Norm Impostors', normed=True,
color='C1', alpha=0.5, bins=50)
plt.hist(zt_norm_genuine.reshape(n_scores_per_client * n_clients),
label='T-Norm Genuine', normed=True,
color='C0', alpha=0.5, bins=50)
plt.legend(fontsize=8)
plt.yticks([], [])
plt.tight_layout()
plt.show()
import matplotlib.pyplot as plt
import bob.learn.em
import numpy
numpy.random.seed(10)
n_clients = 10
n_scores_per_client = 200
# Defining some fake scores for genuines and impostors
impostor_scores = numpy.random.normal(-15.5,
5, (n_scores_per_client, n_clients))
genuine_scores = numpy.random.normal(0.5, 5, (n_scores_per_client, n_clients))
# Defining the scores for the statistics computation
z_scores = numpy.random.normal(-5., 5, (n_scores_per_client, n_clients))
# Z - Normalizing
z_norm_impostors = bob.learn.em.znorm(impostor_scores, z_scores)
z_norm_genuine = bob.learn.em.znorm(genuine_scores, z_scores)
# PLOTTING
figure = plt.subplot(2, 1, 1)
ax = figure.axes
plt.title("Raw scores", fontsize=8)
plt.hist(impostor_scores.reshape(n_scores_per_client * n_clients),
label='Impostors', normed=True,
color='C1', alpha=0.5, bins=50)
plt.hist(genuine_scores.reshape(n_scores_per_client * n_clients),
label='Genuine', normed=True,
color='C0', alpha=0.5, bins=50)
plt.legend(fontsize=8)
plt.yticks([], [])
figure = plt.subplot(2, 1, 2)
ax = figure.axes
plt.title("Z-norm scores", fontsize=8)
plt.hist(z_norm_impostors.reshape(n_scores_per_client * n_clients),
label='Z-Norm Impostors', normed=True,
color='C1', alpha=0.5, bins=50)
plt.hist(z_norm_genuine.reshape(n_scores_per_client * n_clients),
label='Z-Norm Genuine', normed=True,
color='C0', alpha=0.5, bins=50)
plt.yticks([], [])
plt.legend(fontsize=8)
plt.tight_layout()
plt.show()
import bob.db.iris
import bob.learn.em
import bob.learn.linear
import matplotlib.pyplot as plt
import numpy
numpy.random.seed(2) # FIXING A SEED
def train_ubm(features, n_gaussians):
"""
Train UBM
**Parameters**
features: 2D numpy array with the features
n_gaussians: Number of Gaussians
"""
input_size = features.shape[1]
kmeans_machine = bob.learn.em.KMeansMachine(int(n_gaussians), input_size)
ubm = bob.learn.em.GMMMachine(int(n_gaussians), input_size)
# The K-means clustering is firstly used to used to estimate the initial
# means, the final variances and the final weights for each gaussian
# component
kmeans_trainer = bob.learn.em.KMeansTrainer('RANDOM_NO_DUPLICATE')
bob.learn.em.train(kmeans_trainer, kmeans_machine, features)
# Getting the means, weights and the variances for each cluster. This is a
# very good estimator for the ML
(variances, weights) = kmeans_machine.get_variances_and_weights_for_each_cluster(features)
means = kmeans_machine.means
# initialize the UBM with the output of kmeans
ubm.means = means
ubm.variances = variances
ubm.weights = weights
# Creating the ML Trainer. We will adapt only the means
trainer = bob.learn.em.ML_GMMTrainer(
update_means=True, update_variances=False, update_weights=False)
bob.learn.em.train(trainer, ubm, features)
return ubm
def ivector_train(features, ubm):
"""
Trains T matrix
**Parameters**
features: List of :py:class:`bob.learn.em.GMMStats`
n_gaussians: UBM (:py:class:`bob.learn.em.GMMMachine`)
"""
stats = []
for user in features:
s = bob.learn.em.GMMStats(ubm.shape[0], ubm.shape[1])
for f in user:
ubm.acc_statistics(f, s)
stats.append(s)
subspace_dimension_of_t = 2
ivector_trainer = bob.learn.em.IVectorTrainer(update_sigma=True)
ivector_machine = bob.learn.em.IVectorMachine(
ubm, subspace_dimension_of_t, 10e-5)
# train IVector model
bob.learn.em.train(ivector_trainer, ivector_machine, stats, 500)
return ivector_machine
def acc_stats(data, gmm):
gmm_stats = []
for d in data:
s = bob.learn.em.GMMStats(gmm.shape[0], gmm.shape[1])
gmm.acc_statistics(d, s)
gmm_stats.append(s)
return gmm_stats
def compute_ivectors(gmm_stats, ivector_machine):
"""
Given :py:class:`bob.learn.em.GMMStats` and an T matrix, get the iVectors.
"""
ivectors = []
for g in gmm_stats:
ivectors.append(ivector_machine(g))
return numpy.array(ivectors)
# GENERATING DATA
data_per_class = bob.db.iris.data()
setosa = numpy.column_stack(
(data_per_class['setosa'][:, 0], data_per_class['setosa'][:, 3]))
versicolor = numpy.column_stack(
(data_per_class['versicolor'][:, 0], data_per_class['versicolor'][:, 3]))
virginica = numpy.column_stack(
(data_per_class['virginica'][:, 0], data_per_class['virginica'][:, 3]))
data = numpy.vstack((setosa, versicolor, virginica))
# TRAINING THE PRIOR
ubm = train_ubm(data, 3)
ivector_machine = ivector_train([setosa, versicolor, virginica], ubm)
# Variability direction U
# t0 = T[0:2, 0] / numpy.linalg.norm(T[0:2, 0])
# t1 = T[2:4, 0] / numpy.linalg.norm(T[2:4, 0])
# t2 = T[4:6, 0] / numpy.linalg.norm(T[4:6, 0])
# figure, ax = plt.subplots()
figure = plt.subplot(2, 1, 1)
ax = figure.axes
plt.title("Raw fetures")
plt.scatter(setosa[:, 0], setosa[:, 1], c="darkcyan", label="setosa")
plt.scatter(versicolor[:, 0], versicolor[:, 1],
c="goldenrod", label="versicolor")
plt.scatter(virginica[:, 0], virginica[:, 1], c="dimgrey", label="virginica")
# plt.grid(True)
# plt.xlabel('Sepal length')
plt.ylabel('Petal width')
plt.legend(loc=2)
plt.ylim([-1, 3.5])
plt.xticks([], [])
plt.yticks([], [])
figure = plt.subplot(2, 1, 2)
ax = figure.axes
ivector_setosa = compute_ivectors(acc_stats(setosa, ubm), ivector_machine)
ivector_versicolor = compute_ivectors(
acc_stats(versicolor, ubm), ivector_machine)
ivector_virginica = compute_ivectors(
acc_stats(virginica, ubm), ivector_machine)
# Whitening iVectors
whitening_trainer = bob.learn.linear.WhiteningTrainer()
whitener_machine = bob.learn.linear.Machine(
ivector_setosa.shape[1], ivector_setosa.shape[1])
whitening_trainer.train(numpy.vstack(
(ivector_setosa, ivector_versicolor, ivector_virginica)), whitener_machine)
ivector_setosa = whitener_machine(ivector_setosa)
ivector_versicolor = whitener_machine(ivector_versicolor)
ivector_virginica = whitener_machine(ivector_virginica)
# LDA ivectors
lda_trainer = bob.learn.linear.FisherLDATrainer()
lda_machine = bob.learn.linear.Machine(
ivector_setosa.shape[1], ivector_setosa.shape[1])
lda_trainer.train([ivector_setosa, ivector_versicolor,
ivector_virginica], lda_machine)
ivector_setosa = lda_machine(ivector_setosa)
ivector_versicolor = lda_machine(ivector_versicolor)
ivector_virginica = lda_machine(ivector_virginica)
# WCCN ivectors
# wccn_trainer = bob.learn.linear.WCCNTrainer()
# wccn_machine = bob.learn.linear.Machine(
# ivector_setosa.shape[1], ivector_setosa.shape[1])
# wccn_trainer.train([ivector_setosa, ivector_versicolor,
# ivector_virginica], wccn_machine)
# ivector_setosa = wccn_machine(ivector_setosa)
# ivector_versicolor = wccn_machine(ivector_versicolor)
# ivector_virginica = wccn_machine(ivector_virginica)
plt.title("First two ivectors")
plt.scatter(ivector_setosa[:, 0],
ivector_setosa[:, 1], c="darkcyan", label="setosa",
marker="x")
plt.scatter(ivector_versicolor[:, 0],
ivector_versicolor[:, 1], c="goldenrod", label="versicolor",
marker="x")
plt.scatter(ivector_virginica[:, 0],
ivector_virginica[:, 1], c="dimgrey", label="virginica",
marker="x")
plt.xticks([], [])
plt.yticks([], [])
# plt.grid(True)
# plt.xlabel('Sepal length')
# plt.ylabel('Petal width')
plt.legend(loc=2)
plt.ylim([-1, 3.5])
plt.tight_layout()
plt.show()
......@@ -18,7 +18,10 @@ Trainers
.. autosummary::
bob.learn.em.KMeansTrainer
bob.learn.em.cluster.KMeansTrainer
..
TODO uncomment when implemented
bob.learn.em.ML_GMMTrainer
bob.learn.em.MAP_GMMTrainer
bob.learn.em.ISVTrainer
......@@ -32,7 +35,10 @@ Machines
.. autosummary::
bob.learn.em.KMeansMachine
bob.learn.em.cluster.KMeansMachine
..
TODO uncomment when implemented
bob.learn.em.Gaussian
bob.learn.em.GMMStats
bob.learn.em.GMMMachine
......@@ -46,11 +52,11 @@ Machines
Functions
---------
.. autosummary::
..
TODO uncomment when implemented
.. autosummary::
bob.learn.em.linear_scoring
bob.learn.em.train
bob.learn.em.train_jfa
bob.learn.em.linear_scoring
Detailed Information
--------------------
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment