diff --git a/doc/guide.rst b/doc/guide.rst index 1ce2e6d148319641d6c1922537fb16ac1ebc05c7..fecf54b4b773a454b9cd60855a2c3bb81110fec8 100644 --- a/doc/guide.rst +++ b/doc/guide.rst @@ -58,7 +58,8 @@ Follow bellow an snippet on how to train a KMeans using Bob_. .. doctest:: :options: +NORMALIZE_WHITESPACE - >>> import bob.learn.em + >>> from bob.learn.em.cluster import KMeansMachine + >>> from bob.learn.em.cluster import KMeansTrainer >>> import numpy >>> data = numpy.array( ... [[3,-3,100], @@ -66,18 +67,14 @@ Follow bellow an snippet on how to train a KMeans using Bob_. ... [3.5,-3.5,99], ... [-7,7,-100], ... [-5,5,-101]], dtype='float64') - >>> # Create a kmeans m with k=2 clusters with a dimensionality equal to 3 - >>> kmeans_machine = bob.learn.em.KMeansMachine(2, 3) - >>> kmeans_trainer = bob.learn.em.KMeansTrainer() - >>> max_iterations = 200 - >>> convergence_threshold = 1e-5 + >>> # Create a k-means machine with k=2 clusters + >>> kmeans_machine = KMeansMachine(2, convergence_threshold=1e-5) + >>> kmeans_trainer = KMeansTrainer(max_iter=200) >>> # Train the KMeansMachine - >>> bob.learn.em.train(kmeans_trainer, kmeans_machine, data, - ... max_iterations=max_iterations, - ... convergence_threshold=convergence_threshold) - >>> print(kmeans_machine.means) - [[ -6. 6. -100.5] - [ 3.5 -3.5 99. ]] + >>> kmeans_machine = kmeans_machine.fit(data) + >>> print(numpy.array(kmeans_machine.centroids_)) + [[ 3.5 -3.5 99. ] + [ -6. 6. -100.5]] Bellow follow an intuition (source code + plot) of a kmeans training using the @@ -171,8 +168,10 @@ Bellow follow an intuition of the GMM trained the maximum likelihood estimator using the Iris flower `dataset <https://en.wikipedia.org/wiki/Iris_flower_data_set>`_. -.. plot:: plot/plot_ML.py - :include-source: False +.. + TODO uncomment when implemented + .. plot:: plot/plot_ML.py + :include-source: False Maximum a posteriori Estimator (MAP) @@ -232,8 +231,10 @@ Follow bellow an snippet on how to train a GMM using the MAP estimator. Bellow follow an intuition of the GMM trained with the MAP estimator using the Iris flower `dataset <https://en.wikipedia.org/wiki/Iris_flower_data_set>`_. -.. plot:: plot/plot_MAP.py - :include-source: False +.. + TODO uncomment when implemented + .. plot:: plot/plot_MAP.py + :include-source: False Session Variability Modeling with Gaussian Mixture Models @@ -326,8 +327,10 @@ The arrows :math:`U_{1}`, :math:`U_{2}` and :math:`U_{3}` are the directions of the within class variations, with respect to each Gaussian component, that will be suppressed a posteriori. -.. plot:: plot/plot_ISV.py - :include-source: False +.. + TODO uncomment when implemented + .. plot:: plot/plot_ISV.py + :include-source: False The ISV statistical model is stored in this container @@ -400,8 +403,10 @@ between class variations with respect to each Gaussian component that will be added a posteriori. -.. plot:: plot/plot_JFA.py - :include-source: False +.. + TODO uncomment when implemented + .. plot:: plot/plot_JFA.py + :include-source: False The JFA statistical model is stored in this container :py:class:`bob.learn.em.JFABase` and the training is performed by @@ -472,8 +477,10 @@ Follow bellow an intuition of the data from the Iris flower `dataset <https://en.wikipedia.org/wiki/Iris_flower_data_set>`_, embedded in the iVector space. -.. plot:: plot/plot_iVector.py - :include-source: False +.. + TODO uncomment when implemented + .. plot:: plot/plot_iVector.py + :include-source: False The iVector statistical model is stored in this container diff --git a/doc/plot/plot_ISV.py b/doc/plot/plot_ISV.py deleted file mode 100755 index 876d71b8da5b61a53b944ecfcd21cbaa1e17155f..0000000000000000000000000000000000000000 --- a/doc/plot/plot_ISV.py +++ /dev/null @@ -1,130 +0,0 @@ -import bob.db.iris -import bob.learn.em -import bob.learn.linear -import matplotlib.pyplot as plt -import numpy -numpy.random.seed(2) # FIXING A SEED - - -def train_ubm(features, n_gaussians): - """ - Train UBM - - **Parameters** - features: 2D numpy array with the features - - n_gaussians: Number of Gaussians - - """ - input_size = features.shape[1] - - kmeans_machine = bob.learn.em.KMeansMachine(int(n_gaussians), input_size) - ubm = bob.learn.em.GMMMachine(int(n_gaussians), input_size) - - # The K-means clustering is firstly used to used to estimate the initial - # means, the final variances and the final weights for each gaussian - # component - kmeans_trainer = bob.learn.em.KMeansTrainer('RANDOM_NO_DUPLICATE') - bob.learn.em.train(kmeans_trainer, kmeans_machine, features) - - # Getting the means, weights and the variances for each cluster. This is a - # very good estimator for the ML - (variances, weights) = kmeans_machine.get_variances_and_weights_for_each_cluster(features) - means = kmeans_machine.means - - # initialize the UBM with the output of kmeans - ubm.means = means - ubm.variances = variances - ubm.weights = weights - - # Creating the ML Trainer. We will adapt only the means - trainer = bob.learn.em.ML_GMMTrainer( - update_means=True, update_variances=False, update_weights=False) - bob.learn.em.train(trainer, ubm, features) - - return ubm - - -def isv_train(features, ubm): - """ - Train U matrix - - **Parameters** - features: List of :py:class:`bob.learn.em.GMMStats` organized by class - - n_gaussians: UBM (:py:class:`bob.learn.em.GMMMachine`) - - """ - - stats = [] - for user in features: - user_stats = [] - for f in user: - s = bob.learn.em.GMMStats(ubm.shape[0], ubm.shape[1]) - ubm.acc_statistics(f, s) - user_stats.append(s) - stats.append(user_stats) - - relevance_factor = 4 - subspace_dimension_of_u = 1 - - isvbase = bob.learn.em.ISVBase(ubm, subspace_dimension_of_u) - trainer = bob.learn.em.ISVTrainer(relevance_factor) - # trainer.rng = bob.core.random.mt19937(int(self.init_seed)) - bob.learn.em.train(trainer, isvbase, stats, max_iterations=50) - - return isvbase - - -# GENERATING DATA -data_per_class = bob.db.iris.data() -setosa = numpy.column_stack( - (data_per_class['setosa'][:, 0], data_per_class['setosa'][:, 3])) -versicolor = numpy.column_stack( - (data_per_class['versicolor'][:, 0], data_per_class['versicolor'][:, 3])) -virginica = numpy.column_stack( - (data_per_class['virginica'][:, 0], data_per_class['virginica'][:, 3])) -data = numpy.vstack((setosa, versicolor, virginica)) - -# TRAINING THE PRIOR -ubm = train_ubm(data, 3) -isvbase = isv_train([setosa, versicolor, virginica], ubm) - -# Variability direction -u0 = isvbase.u[0:2, 0] / numpy.linalg.norm(isvbase.u[0:2, 0]) -u1 = isvbase.u[2:4, 0] / numpy.linalg.norm(isvbase.u[2:4, 0]) -u2 = isvbase.u[4:6, 0] / numpy.linalg.norm(isvbase.u[4:6, 0]) - -figure, ax = plt.subplots() -plt.scatter(setosa[:, 0], setosa[:, 1], c="darkcyan", label="setosa") -plt.scatter(versicolor[:, 0], versicolor[:, 1], - c="goldenrod", label="versicolor") -plt.scatter(virginica[:, 0], virginica[:, 1], c="dimgrey", label="virginica") - -plt.scatter(ubm.means[:, 0], ubm.means[:, 1], c="blue", - marker="x", label="centroids - mle") -# plt.scatter(ubm.means[:, 0], ubm.means[:, 1], c="blue", -# marker=".", label="within class varibility", s=0.01) - -ax.arrow(ubm.means[0, 0], ubm.means[0, 1], u0[0], u0[1], - fc="k", ec="k", head_width=0.05, head_length=0.1) -ax.arrow(ubm.means[1, 0], ubm.means[1, 1], u1[0], u1[1], - fc="k", ec="k", head_width=0.05, head_length=0.1) -ax.arrow(ubm.means[2, 0], ubm.means[2, 1], u2[0], u2[1], - fc="k", ec="k", head_width=0.05, head_length=0.1) -plt.text(ubm.means[0, 0] + u0[0], ubm.means[0, 1] + - u0[1] - 0.1, r'$\mathbf{U}_1$', fontsize=15) -plt.text(ubm.means[1, 0] + u1[0], ubm.means[1, 1] + - u1[1] - 0.1, r'$\mathbf{U}_2$', fontsize=15) -plt.text(ubm.means[2, 0] + u2[0], ubm.means[2, 1] + - u2[1] - 0.1, r'$\mathbf{U}_3$', fontsize=15) - -plt.xticks([], []) -plt.yticks([], []) - -# plt.grid(True) -plt.xlabel('Sepal length') -plt.ylabel('Petal width') -plt.legend() -plt.tight_layout() -plt.show() diff --git a/doc/plot/plot_JFA.py b/doc/plot/plot_JFA.py deleted file mode 100755 index a2b05d5731b3d8aaefd419df0184d0b9daaacf2a..0000000000000000000000000000000000000000 --- a/doc/plot/plot_JFA.py +++ /dev/null @@ -1,156 +0,0 @@ -import bob.db.iris -import bob.learn.em -import bob.learn.linear -import matplotlib.pyplot as plt -import numpy -numpy.random.seed(2) # FIXING A SEED - - -def train_ubm(features, n_gaussians): - """ - Train UBM - - **Parameters** - features: 2D numpy array with the features - - n_gaussians: Number of Gaussians - - """ - - input_size = features.shape[1] - - kmeans_machine = bob.learn.em.KMeansMachine(int(n_gaussians), input_size) - ubm = bob.learn.em.GMMMachine(int(n_gaussians), input_size) - - # The K-means clustering is firstly used to used to estimate the initial - # means, the final variances and the final weights for each gaussian - # component - kmeans_trainer = bob.learn.em.KMeansTrainer('RANDOM_NO_DUPLICATE') - bob.learn.em.train(kmeans_trainer, kmeans_machine, features) - - # Getting the means, weights and the variances for each cluster. This is a - # very good estimator for the ML - (variances, weights) = kmeans_machine.get_variances_and_weights_for_each_cluster(features) - means = kmeans_machine.means - - # initialize the UBM with the output of kmeans - ubm.means = means - ubm.variances = variances - ubm.weights = weights - - # Creating the ML Trainer. We will adapt only the means - trainer = bob.learn.em.ML_GMMTrainer( - update_means=True, update_variances=False, update_weights=False) - bob.learn.em.train(trainer, ubm, features) - - return ubm - - -def jfa_train(features, ubm): - """ - Trains U and V matrix - - **Parameters** - features: List of :py:class:`bob.learn.em.GMMStats` organized by class - - n_gaussians: UBM (:py:class:`bob.learn.em.GMMMachine`) - - """ - - stats = [] - for user in features: - user_stats = [] - for f in user: - s = bob.learn.em.GMMStats(ubm.shape[0], ubm.shape[1]) - ubm.acc_statistics(f, s) - user_stats.append(s) - stats.append(user_stats) - - subspace_dimension_of_u = 1 - subspace_dimension_of_v = 1 - - jfa_base = bob.learn.em.JFABase( - ubm, subspace_dimension_of_u, subspace_dimension_of_v) - trainer = bob.learn.em.JFATrainer() - # trainer.rng = bob.core.random.mt19937(int(self.init_seed)) - bob.learn.em.train_jfa(trainer, jfa_base, stats, max_iterations=50) - - return jfa_base - - -# GENERATING DATA -data_per_class = bob.db.iris.data() -setosa = numpy.column_stack( - (data_per_class['setosa'][:, 0], data_per_class['setosa'][:, 3])) -versicolor = numpy.column_stack( - (data_per_class['versicolor'][:, 0], data_per_class['versicolor'][:, 3])) -virginica = numpy.column_stack( - (data_per_class['virginica'][:, 0], data_per_class['virginica'][:, 3])) -data = numpy.vstack((setosa, versicolor, virginica)) - -# TRAINING THE PRIOR -ubm = train_ubm(data, 3) -jfa_base = jfa_train([setosa, versicolor, virginica], ubm) - -# Variability direction U -u0 = jfa_base.u[0:2, 0] / numpy.linalg.norm(jfa_base.u[0:2, 0]) -u1 = jfa_base.u[2:4, 0] / numpy.linalg.norm(jfa_base.u[2:4, 0]) -u2 = jfa_base.u[4:6, 0] / numpy.linalg.norm(jfa_base.u[4:6, 0]) - - -# Variability direction V -v0 = jfa_base.v[0:2, 0] / numpy.linalg.norm(jfa_base.v[0:2, 0]) -v1 = jfa_base.v[2:4, 0] / numpy.linalg.norm(jfa_base.v[2:4, 0]) -v2 = jfa_base.v[4:6, 0] / numpy.linalg.norm(jfa_base.v[4:6, 0]) - - -figure, ax = plt.subplots() -plt.scatter(setosa[:, 0], setosa[:, 1], c="darkcyan", label="setosa") -plt.scatter(versicolor[:, 0], versicolor[:, 1], - c="goldenrod", label="versicolor") -plt.scatter(virginica[:, 0], virginica[:, 1], c="dimgrey", label="virginica") - -plt.scatter(ubm.means[:, 0], ubm.means[:, 1], c="blue", - marker="x", label="centroids - mle") -# plt.scatter(ubm.means[:, 0], ubm.means[:, 1], c="blue", -# marker=".", label="within class varibility", s=0.01) - -# U -ax.arrow(ubm.means[0, 0], ubm.means[0, 1], u0[0], u0[1], - fc="k", ec="k", head_width=0.05, head_length=0.1) -ax.arrow(ubm.means[1, 0], ubm.means[1, 1], u1[0], u1[1], - fc="k", ec="k", head_width=0.05, head_length=0.1) -ax.arrow(ubm.means[2, 0], ubm.means[2, 1], u2[0], u2[1], - fc="k", ec="k", head_width=0.05, head_length=0.1) -plt.text(ubm.means[0, 0] + u0[0], ubm.means[0, 1] + - u0[1] - 0.1, r'$\mathbf{U}_1$', fontsize=15) -plt.text(ubm.means[1, 0] + u1[0], ubm.means[1, 1] + - u1[1] - 0.1, r'$\mathbf{U}_2$', fontsize=15) -plt.text(ubm.means[2, 0] + u2[0], ubm.means[2, 1] + - u2[1] - 0.1, r'$\mathbf{U}_3$', fontsize=15) - -# V -ax.arrow(ubm.means[0, 0], ubm.means[0, 1], v0[0], v0[1], - fc="k", ec="k", head_width=0.05, head_length=0.1) -ax.arrow(ubm.means[1, 0], ubm.means[1, 1], v1[0], v1[1], - fc="k", ec="k", head_width=0.05, head_length=0.1) -ax.arrow(ubm.means[2, 0], ubm.means[2, 1], v2[0], v2[1], - fc="k", ec="k", head_width=0.05, head_length=0.1) -plt.text(ubm.means[0, 0] + v0[0], ubm.means[0, 1] + - v0[1] - 0.1, r'$\mathbf{V}_1$', fontsize=15) -plt.text(ubm.means[1, 0] + v1[0], ubm.means[1, 1] + - v1[1] - 0.1, r'$\mathbf{V}_2$', fontsize=15) -plt.text(ubm.means[2, 0] + v2[0], ubm.means[2, 1] + - v2[1] - 0.1, r'$\mathbf{V}_3$', fontsize=15) - -plt.xticks([], []) -plt.yticks([], []) - -# plt.grid(True) -plt.xlabel('Sepal length') -plt.ylabel('Petal width') -plt.legend(loc=2) -plt.ylim([-1, 3.5]) - -plt.tight_layout() -# plt.show() diff --git a/doc/plot/plot_MAP.py b/doc/plot/plot_MAP.py deleted file mode 100644 index d873b2ca69f32363e76271070e76a21b81be7b6c..0000000000000000000000000000000000000000 --- a/doc/plot/plot_MAP.py +++ /dev/null @@ -1,46 +0,0 @@ -import matplotlib.pyplot as plt -import bob.db.iris -import bob.learn.em -import numpy -numpy.random.seed(10) - -data_per_class = bob.db.iris.data() -setosa = numpy.column_stack( - (data_per_class['setosa'][:, 0], data_per_class['setosa'][:, 3])) -versicolor = numpy.column_stack( - (data_per_class['versicolor'][:, 0], data_per_class['versicolor'][:, 3])) -virginica = numpy.column_stack( - (data_per_class['virginica'][:, 0], data_per_class['virginica'][:, 3])) - -data = numpy.vstack((setosa, versicolor, virginica)) - -# Two clusters with a feature dimensionality of 3 -mle_machine = bob.learn.em.GMMMachine(3, 2) -mle_machine.means = numpy.array([[5, 3], [4, 2], [7, 3.]]) - -# Creating some random data centered in -map_machine = bob.learn.em.GMMMachine(3, 2) -map_trainer = bob.learn.em.MAP_GMMTrainer(mle_machine, relevance_factor=4) -bob.learn.em.train(map_trainer, map_machine, data, max_iterations=200, - convergence_threshold=1e-5) # Train the KMeansMachine - - -figure, ax = plt.subplots() -# plt.scatter(data[:, 0], data[:, 1], c="olivedrab", label="new data") -plt.scatter(setosa[:, 0], setosa[:, 1], c="darkcyan", label="setosa") -plt.scatter(versicolor[:, 0], versicolor[:, 1], - c="goldenrod", label="versicolor") -plt.scatter(virginica[:, 0], virginica[:, 1], - c="dimgrey", label="virginica") -plt.scatter(mle_machine.means[:, 0], - mle_machine.means[:, 1], c="blue", marker="x", - label="prior centroids - mle", s=60) -plt.scatter(map_machine.means[:, 0], map_machine.means[:, 1], c="red", - marker="^", label="adapted centroids - map", s=60) -plt.legend() -plt.xticks([], []) -plt.yticks([], []) -ax.set_xlabel("Sepal length") -ax.set_ylabel("Petal width") -plt.tight_layout() -plt.show() diff --git a/doc/plot/plot_ML.py b/doc/plot/plot_ML.py deleted file mode 100644 index 18902c4dd8939e33e537b274f9d3b31deaa3b75b..0000000000000000000000000000000000000000 --- a/doc/plot/plot_ML.py +++ /dev/null @@ -1,37 +0,0 @@ -import bob.learn.em -import bob.db.iris -import numpy -import matplotlib.pyplot as plt - -data_per_class = bob.db.iris.data() -setosa = numpy.column_stack( - (data_per_class['setosa'][:, 0], data_per_class['setosa'][:, 3])) -versicolor = numpy.column_stack( - (data_per_class['versicolor'][:, 0], data_per_class['versicolor'][:, 3])) -virginica = numpy.column_stack( - (data_per_class['virginica'][:, 0], data_per_class['virginica'][:, 3])) - -data = numpy.vstack((setosa, versicolor, virginica)) - -# Two clusters with a feature dimensionality of 3 -machine = bob.learn.em.GMMMachine(3, 2) -trainer = bob.learn.em.ML_GMMTrainer(True, True, True) -machine.means = numpy.array([[5, 3], [4, 2], [7, 3.]]) -bob.learn.em.train(trainer, machine, data, max_iterations=200, - convergence_threshold=1e-5) # Train the KMeansMachine - -figure, ax = plt.subplots() -plt.scatter(setosa[:, 0], setosa[:, 1], c="darkcyan", label="setosa") -plt.scatter(versicolor[:, 0], versicolor[:, 1], - c="goldenrod", label="versicolor") -plt.scatter(virginica[:, 0], virginica[:, 1], - c="dimgrey", label="virginica") -plt.scatter(machine.means[:, 0], - machine.means[:, 1], c="blue", marker="x", label="centroids", s=60) -plt.legend() -plt.xticks([], []) -plt.yticks([], []) -ax.set_xlabel("Sepal length") -ax.set_ylabel("Petal width") -plt.tight_layout() -plt.show() diff --git a/doc/plot/plot_Tnorm.py b/doc/plot/plot_Tnorm.py deleted file mode 100644 index 8772a95d885ca0c93be117675a86ae04bf614522..0000000000000000000000000000000000000000 --- a/doc/plot/plot_Tnorm.py +++ /dev/null @@ -1,48 +0,0 @@ -import matplotlib.pyplot as plt -import bob.learn.em -import numpy -numpy.random.seed(10) - -n_clients = 10 -n_scores_per_client = 200 - -# Defining some fake scores for genuines and impostors -impostor_scores = numpy.random.normal(-15.5, - 5, (n_scores_per_client, n_clients)) -genuine_scores = numpy.random.normal(0.5, 5, (n_scores_per_client, n_clients)) - -# Defining the scores for the statistics computation -t_scores = numpy.random.normal(-5., 5, (n_scores_per_client, n_clients)) - -# T - Normalizing -t_norm_impostors = bob.learn.em.tnorm(impostor_scores, t_scores) -t_norm_genuine = bob.learn.em.tnorm(genuine_scores, t_scores) - -# PLOTTING -figure = plt.subplot(2, 1, 1) -ax = figure.axes -plt.title("Raw scores", fontsize=8) -plt.hist(impostor_scores.reshape(n_scores_per_client * n_clients), - label='Impostors', normed=True, - color='C1', alpha=0.5, bins=50) -plt.hist(genuine_scores.reshape(n_scores_per_client * n_clients), - label='Genuine', normed=True, - color='C0', alpha=0.5, bins=50) -plt.legend(fontsize=8) -plt.yticks([], []) - - -figure = plt.subplot(2, 1, 2) -ax = figure.axes -plt.title("T-norm scores", fontsize=8) -plt.hist(t_norm_impostors.reshape(n_scores_per_client * n_clients), - label='T-Norm Impostors', normed=True, - color='C1', alpha=0.5, bins=50) -plt.hist(t_norm_genuine.reshape(n_scores_per_client * n_clients), - label='T-Norm Genuine', normed=True, - color='C0', alpha=0.5, bins=50) -plt.legend(fontsize=8) -plt.yticks([], []) - -plt.tight_layout() -plt.show() diff --git a/doc/plot/plot_ZTnorm.py b/doc/plot/plot_ZTnorm.py deleted file mode 100644 index 8300fdccb419bdc8843c3396f3e78940b56684d9..0000000000000000000000000000000000000000 --- a/doc/plot/plot_ZTnorm.py +++ /dev/null @@ -1,54 +0,0 @@ -import matplotlib.pyplot as plt -import bob.learn.em -import numpy -numpy.random.seed(10) - -n_clients = 10 -n_scores_per_client = 200 - -# Defining some fake scores for genuines and impostors -impostor_scores = numpy.random.normal(-15.5, - 5, (n_scores_per_client, n_clients)) -genuine_scores = numpy.random.normal(0.5, 5, (n_scores_per_client, n_clients)) - -# Defining the scores for the statistics computation -z_scores = numpy.random.normal(-5., 5, (n_scores_per_client, n_clients)) -t_scores = numpy.random.normal(-6., 5, (n_scores_per_client, n_clients)) - -# T-normalizing the Z-scores -zt_scores = bob.learn.em.tnorm(z_scores, t_scores) - -# ZT - Normalizing -zt_norm_impostors = bob.learn.em.ztnorm( - impostor_scores, z_scores, t_scores, zt_scores) -zt_norm_genuine = bob.learn.em.ztnorm( - genuine_scores, z_scores, t_scores, zt_scores) - -# PLOTTING -figure = plt.subplot(2, 1, 1) -ax = figure.axes -plt.title("Raw scores", fontsize=8) -plt.hist(impostor_scores.reshape(n_scores_per_client * n_clients), - label='Impostors', normed=True, - color='C1', alpha=0.5, bins=50) -plt.hist(genuine_scores.reshape(n_scores_per_client * n_clients), - label='Genuine', normed=True, - color='C0', alpha=0.5, bins=50) -plt.legend(fontsize=8) -plt.yticks([], []) - - -figure = plt.subplot(2, 1, 2) -ax = figure.axes -plt.title("T-norm scores", fontsize=8) -plt.hist(zt_norm_impostors.reshape(n_scores_per_client * n_clients), - label='T-Norm Impostors', normed=True, - color='C1', alpha=0.5, bins=50) -plt.hist(zt_norm_genuine.reshape(n_scores_per_client * n_clients), - label='T-Norm Genuine', normed=True, - color='C0', alpha=0.5, bins=50) -plt.legend(fontsize=8) -plt.yticks([], []) - -plt.tight_layout() -plt.show() diff --git a/doc/plot/plot_Znorm.py b/doc/plot/plot_Znorm.py deleted file mode 100644 index 8c16ebbd08dc62864040205012b4682b8de96562..0000000000000000000000000000000000000000 --- a/doc/plot/plot_Znorm.py +++ /dev/null @@ -1,49 +0,0 @@ -import matplotlib.pyplot as plt -import bob.learn.em -import numpy -numpy.random.seed(10) - - -n_clients = 10 -n_scores_per_client = 200 - -# Defining some fake scores for genuines and impostors -impostor_scores = numpy.random.normal(-15.5, - 5, (n_scores_per_client, n_clients)) -genuine_scores = numpy.random.normal(0.5, 5, (n_scores_per_client, n_clients)) - -# Defining the scores for the statistics computation -z_scores = numpy.random.normal(-5., 5, (n_scores_per_client, n_clients)) - -# Z - Normalizing -z_norm_impostors = bob.learn.em.znorm(impostor_scores, z_scores) -z_norm_genuine = bob.learn.em.znorm(genuine_scores, z_scores) - -# PLOTTING -figure = plt.subplot(2, 1, 1) -ax = figure.axes -plt.title("Raw scores", fontsize=8) -plt.hist(impostor_scores.reshape(n_scores_per_client * n_clients), - label='Impostors', normed=True, - color='C1', alpha=0.5, bins=50) -plt.hist(genuine_scores.reshape(n_scores_per_client * n_clients), - label='Genuine', normed=True, - color='C0', alpha=0.5, bins=50) -plt.legend(fontsize=8) -plt.yticks([], []) - - -figure = plt.subplot(2, 1, 2) -ax = figure.axes -plt.title("Z-norm scores", fontsize=8) -plt.hist(z_norm_impostors.reshape(n_scores_per_client * n_clients), - label='Z-Norm Impostors', normed=True, - color='C1', alpha=0.5, bins=50) -plt.hist(z_norm_genuine.reshape(n_scores_per_client * n_clients), - label='Z-Norm Genuine', normed=True, - color='C0', alpha=0.5, bins=50) -plt.yticks([], []) -plt.legend(fontsize=8) - -plt.tight_layout() -plt.show() diff --git a/doc/plot/plot_iVector.py b/doc/plot/plot_iVector.py deleted file mode 100755 index 95953d3a2d40c64ca7a08d0dd6eff0d7346524fb..0000000000000000000000000000000000000000 --- a/doc/plot/plot_iVector.py +++ /dev/null @@ -1,201 +0,0 @@ -import bob.db.iris -import bob.learn.em -import bob.learn.linear -import matplotlib.pyplot as plt -import numpy -numpy.random.seed(2) # FIXING A SEED - - -def train_ubm(features, n_gaussians): - """ - Train UBM - - **Parameters** - features: 2D numpy array with the features - - n_gaussians: Number of Gaussians - - """ - - input_size = features.shape[1] - - kmeans_machine = bob.learn.em.KMeansMachine(int(n_gaussians), input_size) - ubm = bob.learn.em.GMMMachine(int(n_gaussians), input_size) - - # The K-means clustering is firstly used to used to estimate the initial - # means, the final variances and the final weights for each gaussian - # component - kmeans_trainer = bob.learn.em.KMeansTrainer('RANDOM_NO_DUPLICATE') - bob.learn.em.train(kmeans_trainer, kmeans_machine, features) - - # Getting the means, weights and the variances for each cluster. This is a - # very good estimator for the ML - (variances, weights) = kmeans_machine.get_variances_and_weights_for_each_cluster(features) - means = kmeans_machine.means - - # initialize the UBM with the output of kmeans - ubm.means = means - ubm.variances = variances - ubm.weights = weights - - # Creating the ML Trainer. We will adapt only the means - trainer = bob.learn.em.ML_GMMTrainer( - update_means=True, update_variances=False, update_weights=False) - bob.learn.em.train(trainer, ubm, features) - - return ubm - - -def ivector_train(features, ubm): - """ - Trains T matrix - - **Parameters** - features: List of :py:class:`bob.learn.em.GMMStats` - - n_gaussians: UBM (:py:class:`bob.learn.em.GMMMachine`) - - """ - - stats = [] - for user in features: - s = bob.learn.em.GMMStats(ubm.shape[0], ubm.shape[1]) - for f in user: - ubm.acc_statistics(f, s) - stats.append(s) - - subspace_dimension_of_t = 2 - - ivector_trainer = bob.learn.em.IVectorTrainer(update_sigma=True) - ivector_machine = bob.learn.em.IVectorMachine( - ubm, subspace_dimension_of_t, 10e-5) - - # train IVector model - bob.learn.em.train(ivector_trainer, ivector_machine, stats, 500) - - return ivector_machine - - -def acc_stats(data, gmm): - gmm_stats = [] - for d in data: - s = bob.learn.em.GMMStats(gmm.shape[0], gmm.shape[1]) - gmm.acc_statistics(d, s) - gmm_stats.append(s) - - return gmm_stats - - -def compute_ivectors(gmm_stats, ivector_machine): - """ - Given :py:class:`bob.learn.em.GMMStats` and an T matrix, get the iVectors. - """ - - ivectors = [] - for g in gmm_stats: - ivectors.append(ivector_machine(g)) - - return numpy.array(ivectors) - - -# GENERATING DATA -data_per_class = bob.db.iris.data() -setosa = numpy.column_stack( - (data_per_class['setosa'][:, 0], data_per_class['setosa'][:, 3])) -versicolor = numpy.column_stack( - (data_per_class['versicolor'][:, 0], data_per_class['versicolor'][:, 3])) -virginica = numpy.column_stack( - (data_per_class['virginica'][:, 0], data_per_class['virginica'][:, 3])) -data = numpy.vstack((setosa, versicolor, virginica)) - -# TRAINING THE PRIOR -ubm = train_ubm(data, 3) -ivector_machine = ivector_train([setosa, versicolor, virginica], ubm) - -# Variability direction U -# t0 = T[0:2, 0] / numpy.linalg.norm(T[0:2, 0]) -# t1 = T[2:4, 0] / numpy.linalg.norm(T[2:4, 0]) -# t2 = T[4:6, 0] / numpy.linalg.norm(T[4:6, 0]) - - -# figure, ax = plt.subplots() -figure = plt.subplot(2, 1, 1) -ax = figure.axes -plt.title("Raw fetures") -plt.scatter(setosa[:, 0], setosa[:, 1], c="darkcyan", label="setosa") -plt.scatter(versicolor[:, 0], versicolor[:, 1], - c="goldenrod", label="versicolor") -plt.scatter(virginica[:, 0], virginica[:, 1], c="dimgrey", label="virginica") -# plt.grid(True) -# plt.xlabel('Sepal length') -plt.ylabel('Petal width') -plt.legend(loc=2) -plt.ylim([-1, 3.5]) -plt.xticks([], []) -plt.yticks([], []) - - -figure = plt.subplot(2, 1, 2) -ax = figure.axes -ivector_setosa = compute_ivectors(acc_stats(setosa, ubm), ivector_machine) -ivector_versicolor = compute_ivectors( - acc_stats(versicolor, ubm), ivector_machine) -ivector_virginica = compute_ivectors( - acc_stats(virginica, ubm), ivector_machine) - - -# Whitening iVectors -whitening_trainer = bob.learn.linear.WhiteningTrainer() -whitener_machine = bob.learn.linear.Machine( - ivector_setosa.shape[1], ivector_setosa.shape[1]) -whitening_trainer.train(numpy.vstack( - (ivector_setosa, ivector_versicolor, ivector_virginica)), whitener_machine) -ivector_setosa = whitener_machine(ivector_setosa) -ivector_versicolor = whitener_machine(ivector_versicolor) -ivector_virginica = whitener_machine(ivector_virginica) - - -# LDA ivectors -lda_trainer = bob.learn.linear.FisherLDATrainer() -lda_machine = bob.learn.linear.Machine( - ivector_setosa.shape[1], ivector_setosa.shape[1]) -lda_trainer.train([ivector_setosa, ivector_versicolor, - ivector_virginica], lda_machine) -ivector_setosa = lda_machine(ivector_setosa) -ivector_versicolor = lda_machine(ivector_versicolor) -ivector_virginica = lda_machine(ivector_virginica) - - -# WCCN ivectors -# wccn_trainer = bob.learn.linear.WCCNTrainer() -# wccn_machine = bob.learn.linear.Machine( -# ivector_setosa.shape[1], ivector_setosa.shape[1]) -# wccn_trainer.train([ivector_setosa, ivector_versicolor, -# ivector_virginica], wccn_machine) -# ivector_setosa = wccn_machine(ivector_setosa) -# ivector_versicolor = wccn_machine(ivector_versicolor) -# ivector_virginica = wccn_machine(ivector_virginica) - - -plt.title("First two ivectors") -plt.scatter(ivector_setosa[:, 0], - ivector_setosa[:, 1], c="darkcyan", label="setosa", - marker="x") -plt.scatter(ivector_versicolor[:, 0], - ivector_versicolor[:, 1], c="goldenrod", label="versicolor", - marker="x") -plt.scatter(ivector_virginica[:, 0], - ivector_virginica[:, 1], c="dimgrey", label="virginica", - marker="x") - -plt.xticks([], []) -plt.yticks([], []) - -# plt.grid(True) -# plt.xlabel('Sepal length') -# plt.ylabel('Petal width') -plt.legend(loc=2) -plt.ylim([-1, 3.5]) - -plt.tight_layout() -plt.show() diff --git a/doc/py_api.rst b/doc/py_api.rst index ba31cb5074149fc23d7696d0fd44e4d1caebac58..420373802d21e23fec0ad6bcefbe9776439b7b7f 100644 --- a/doc/py_api.rst +++ b/doc/py_api.rst @@ -18,7 +18,10 @@ Trainers .. autosummary:: - bob.learn.em.KMeansTrainer + bob.learn.em.cluster.KMeansTrainer + +.. + TODO uncomment when implemented bob.learn.em.ML_GMMTrainer bob.learn.em.MAP_GMMTrainer bob.learn.em.ISVTrainer @@ -32,7 +35,10 @@ Machines .. autosummary:: - bob.learn.em.KMeansMachine + bob.learn.em.cluster.KMeansMachine + +.. + TODO uncomment when implemented bob.learn.em.Gaussian bob.learn.em.GMMStats bob.learn.em.GMMMachine @@ -46,11 +52,11 @@ Machines Functions --------- -.. autosummary:: +.. + TODO uncomment when implemented + .. autosummary:: - bob.learn.em.linear_scoring - bob.learn.em.train - bob.learn.em.train_jfa + bob.learn.em.linear_scoring Detailed Information --------------------