diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 1daa2300fc671ed82d211795f4abe5f019b8d7fe..833ff2341e4272ee8687a877a1a39ee360be8fb9 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -14,6 +14,11 @@ repos: rev: 3.9.2 hooks: - id: flake8 + exclude: | + (?x)^( + bob/devtools/templates/setup.py| + deps/bob-devel/run_test.py + )$ - repo: https://github.com/pre-commit/pre-commit-hooks rev: v4.1.0 hooks: diff --git a/bob/learn/em/__init__.py b/bob/learn/em/__init__.py index 669a8a320ec8785c3b45269cd495bb2ed7d0c50f..fda23d6cb6bd7992e628f634526fae1889945d66 100644 --- a/bob/learn/em/__init__.py +++ b/bob/learn/em/__init__.py @@ -1,11 +1,11 @@ import bob.extension +from .factor_analysis import ISVMachine, JFAMachine from .gmm import GMMMachine, GMMStats from .kmeans import KMeansMachine from .linear_scoring import linear_scoring # noqa: F401 from .wccn import WCCN from .whitening import Whitening -from .factor_analysis import ISVMachine, JFAMachine def get_config(): diff --git a/bob/learn/em/factor_analysis.py b/bob/learn/em/factor_analysis.py index 6eb2cd4c994fac5ab8290eb27e9a460d5b81bf07..318dbba91ead0a10f3a645712a4dec6b9bffd842 100644 --- a/bob/learn/em/factor_analysis.py +++ b/bob/learn/em/factor_analysis.py @@ -7,7 +7,8 @@ import logging import numpy as np from sklearn.base import BaseEstimator -from . import linear_scoring + +from .linear_scoring import linear_scoring logger = logging.getLogger(__name__) @@ -317,7 +318,9 @@ class FactorAnalysisBase(BaseEstimator): X = np.array(X) return list(X[np.where(np.array(y) == i)[0]]) - #################### Estimating U and x ###################### + """ + Estimating U and x + """ def _compute_id_plus_u_prod_ih(self, x_i, UProd): """ @@ -340,7 +343,7 @@ class FactorAnalysisBase(BaseEstimator): """ n_i = x_i.n - I = np.eye(self.r_U, self.r_U) + I = np.eye(self.r_U, self.r_U) # noqa: E741 # TODO: make the invertion matrix function as a parameter return np.linalg.inv(I + (UProd * n_i[:, None, None]).sum(axis=0)) @@ -362,13 +365,12 @@ class FactorAnalysisBase(BaseEstimator): E[y_i] for class `i` """ - f_i = x_i.sum_px n_i = x_i.n - n_ic = np.repeat(n_i, self.supervector_dimension // 2) + n_ic = np.repeat(n_i, self.feature_dimension) V = self._V - ## N_ih*( m + D*z) + # N_ih*( m + D*z) # z is zero when the computation flow comes from update_X if latent_z_i is None: # Fn_x_ih = N_{i,h}*(o_{i,h} - m) @@ -474,7 +476,7 @@ class FactorAnalysisBase(BaseEstimator): Computes U_c.T*inv(Sigma_c) @ U_c.T - ### https://gitlab.idiap.ch/bob/bob.learn.em/-/blob/da92d0e5799d018f311f1bf5cdd5a80e19e142ca/bob/learn/em/cpp/FABaseTrainer.cpp#L325 + https://gitlab.idiap.ch/bob/bob.learn.em/-/blob/da92d0e5799d018f311f1bf5cdd5a80e19e142ca/bob/learn/em/cpp/FABaseTrainer.cpp#L325 """ # UProd = (self.ubm.n_gaussians, self.r_U, self.r_U) @@ -495,10 +497,10 @@ class FactorAnalysisBase(BaseEstimator): The accumulators are defined as - :math:`A_1 = \sum\limits_{i=1}^{I}\sum\limits_{h=1}^{H}N_{i,h,c}E(x_{i,h,c} x^{\top}_{i,h,c})` + :math:`A_1 = \\sum\\limits_{i=1}^{I}\\sum\\limits_{h=1}^{H}N_{i,h,c}E(x_{i,h,c} x^{\\top}_{i,h,c})` - :math:`A_2 = \sum\limits_{i=1}^{I}\sum\limits_{h=1}^{H}N_{i,h,c}(o_{i,h} - \mu_c -D_{c}z_{i,c} -V_{c}y_{i,c} )E[x_{i,h}]^{\top}` + :math:`A_2 = \\sum\\limits_{i=1}^{I}\\sum\\limits_{h=1}^{H}N_{i,h,c}(o_{i,h} - \\mu_c -D_{c}z_{i,c} -V_{c}y_{i,c} )E[x_{i,h}]^{\\top}` More information, please, check the technical notes attached @@ -534,7 +536,7 @@ class FactorAnalysisBase(BaseEstimator): """ - ## U accumulators + # U accumulators acc_U_A1 = np.zeros((self.ubm.n_gaussians, self.r_U, self.r_U)) acc_U_A2 = np.zeros((self.supervector_dimension, self.r_U)) @@ -568,7 +570,9 @@ class FactorAnalysisBase(BaseEstimator): return acc_U_A1, acc_U_A2 - #################### Estimating D and z ###################### + """ + Estimating D and z + """ def update_z(self, X, y, latent_x, latent_y, latent_z, n_acc, f_acc): """ @@ -644,7 +648,7 @@ class FactorAnalysisBase(BaseEstimator): """ - tmp_CD = np.repeat(n_acc_i, self.supervector_dimension // 2) + tmp_CD = np.repeat(n_acc_i, self.feature_dimension) id_plus_d_prod = np.ones(tmp_CD.shape) + dt_inv_sigma_d * tmp_CD return 1 / id_plus_d_prod @@ -664,9 +668,9 @@ class FactorAnalysisBase(BaseEstimator): m = self.mean_supervector - tmp_CD = np.repeat(n_acc_i, self.supervector_dimension // 2) + tmp_CD = np.repeat(n_acc_i, self.feature_dimension) - ## JFA session part + # JFA session part V_dot_v = V @ latent_y_i if latent_y_i is not None else 0 # m_cache_Fn_z_i = Fi - m_tmp_CD * (m + m_tmp_CD_b); // Fn_yi = sum_{sessions h}(N_{i,h}*(o_{i,h} - m - V*y_{i}) @@ -675,7 +679,7 @@ class FactorAnalysisBase(BaseEstimator): # Looping over the sessions for session_id in range(len(X_i)): n_i = X_i[session_id].n - tmp_CD = np.repeat(n_i, self.supervector_dimension // 2) + tmp_CD = np.repeat(n_i, self.feature_dimension) x_i_h = latent_x_i[:, session_id] fn_z_i -= tmp_CD * (U @ x_i_h) @@ -690,10 +694,10 @@ class FactorAnalysisBase(BaseEstimator): The accumulators are defined as - :math:`A_1 = \sum\limits_{i=1}^{I}E[z_{i,c}z^{\top}_{i,c}]` + :math:`A_1 = \\sum\\limits_{i=1}^{I}E[z_{i,c}z^{\\top}_{i,c}]` - :math:`A_2 = \sum\limits_{i=1}^{I} \Bigg[\sum\limits_{h=1}^{H}N_{i,h,c}(o_{i,h} - \mu_c -U_{c}x_{i,h,c} -V_{c}y_{i,c} )\Bigg]E[z_{i}]^{\top}` + :math:`A_2 = \\sum\\limits_{i=1}^{I} \\Bigg[\\sum\\limits_{h=1}^{H}N_{i,h,c}(o_{i,h} - \\mu_c -U_{c}x_{i,h,c} -V_{c}y_{i,c} )\\Bigg]E[z_{i}]^{\\top}` More information, please, check the technical notes attached @@ -757,7 +761,7 @@ class FactorAnalysisBase(BaseEstimator): X_i, latent_x_i, latent_y_i, n_acc[y_i], f_acc[y_i] ) - tmp_CD = np.repeat(n_acc[y_i], self.supervector_dimension // 2) + tmp_CD = np.repeat(n_acc[y_i], self.feature_dimension) acc_D_A1 += ( id_plus_d_prod + latent_z[y_i] * latent_z[y_i] ) * tmp_CD @@ -806,7 +810,9 @@ class FactorAnalysisBase(BaseEstimator): return latent_x, latent_y, latent_z - #################### Estimating V and y ###################### + """ + Estimating V and y + """ def update_y(self, X, y, VProd, latent_x, latent_y, latent_z, n_acc, f_acc): """ @@ -874,7 +880,7 @@ class FactorAnalysisBase(BaseEstimator): """ - I = np.eye(self.r_V, self.r_V) + I = np.eye(self.r_V, self.r_V) # noqa: E741 # TODO: make the invertion matrix function as a parameter return np.linalg.inv(I + (VProd * n_acc_i[:, None, None]).sum(axis=0)) @@ -884,7 +890,7 @@ class FactorAnalysisBase(BaseEstimator): Computes V_c.T*inv(Sigma_c) @ V_c.T - ### https://gitlab.idiap.ch/bob/bob.learn.em/-/blob/da92d0e5799d018f311f1bf5cdd5a80e19e142ca/bob/learn/em/cpp/FABaseTrainer.cpp#L193 + https://gitlab.idiap.ch/bob/bob.learn.em/-/blob/da92d0e5799d018f311f1bf5cdd5a80e19e142ca/bob/learn/em/cpp/FABaseTrainer.cpp#L193 """ Vc = self._V.reshape( @@ -904,10 +910,10 @@ class FactorAnalysisBase(BaseEstimator): Computes the accumulators for the update of V matrix The accumulators are defined as - :math:`A_1 = \sum\limits_{i=1}^{I}E[y_{i,c}y^{\top}_{i,c}]` + :math:`A_1 = \\sum\\limits_{i=1}^{I}E[y_{i,c}y^{\\top}_{i,c}]` - :math:`A_2 = \sum\limits_{i=1}^{I} \Bigg[\sum\limits_{h=1}^{H}N_{i,h,c}(o_{i,h} - \mu_c -U_{c}x_{i,h,c} -D_{c}z_{i,c} )\Bigg]E[y_{i}]^{\top}` + :math:`A_2 = \\sum\\limits_{i=1}^{I} \\Bigg[\\sum\\limits_{h=1}^{H}N_{i,h,c}(o_{i,h} - \\mu_c -U_{c}x_{i,h,c} -D_{c}z_{i,c} )\\Bigg]E[y_{i}]^{\\top}` More information, please, check the technical notes attached @@ -951,7 +957,7 @@ class FactorAnalysisBase(BaseEstimator): """ - ## U accumulators + # U accumulators acc_V_A1 = np.zeros((self.ubm.n_gaussians, self.r_V, self.r_V)) acc_V_A2 = np.zeros((self.supervector_dimension, self.r_V)) @@ -1024,25 +1030,24 @@ class FactorAnalysisBase(BaseEstimator): # y = self.y[i] # Not doing JFA - tmp_CD = np.repeat(n_acc_i, self.supervector_dimension // 2) + tmp_CD = np.repeat(n_acc_i, self.feature_dimension) fn_y_i = f_acc_i.flatten() - tmp_CD * ( m - D * latent_z_i ) # Fn_yi = sum_{sessions h}(N_{i,h}*(o_{i,h} - m - D*z_{i}) - ### NOT DOING JFA - # Looping over the sessions of a ;ane; for session_id in range(len(X_i)): n_i = X_i[session_id].n U_dot_x = U @ latent_x_i[:, session_id] - tmp_CD = np.repeat(n_i, self.supervector_dimension // 2) + tmp_CD = np.repeat(n_i, self.feature_dimension) fn_y_i -= tmp_CD * U_dot_x return fn_y_i - #################################################################################################################### - # Scoring + """ + Scoring + """ def estimate_x(self, X): @@ -1064,7 +1069,7 @@ class FactorAnalysisBase(BaseEstimator): X_i: list of :py:class:`bob.learn.em.GMMStats` List of statistics for a class """ - I = np.eye(self.r_U, self.r_U) + I = np.eye(self.r_U, self.r_U) # noqa: E741 Uc = self._U.reshape( (self.ubm.n_gaussians, self.feature_dimension, self.r_U) @@ -1072,16 +1077,11 @@ class FactorAnalysisBase(BaseEstimator): UcT = np.transpose(Uc, axes=(0, 2, 1)) - sigma_c = np.reshape( - self.variance_supervector, - (self.ubm.n_gaussians, self.feature_dimension), - ) + sigma_c = self.ubm.variances[:, np.newaxis] n_i_c = np.expand_dims(X_i.n[:, np.newaxis], axis=2) - id_plus_us_prod_inv = I + ( - ((UcT / sigma_c[:, np.newaxis]) @ Uc) * n_i_c - ).sum(axis=0) + id_plus_us_prod_inv = I + (((UcT / sigma_c) @ Uc) * n_i_c).sum(axis=0) id_plus_us_prod_inv = np.linalg.inv(id_plus_us_prod_inv) return id_plus_us_prod_inv @@ -1135,7 +1135,9 @@ class ISVMachine(FactorAnalysisBase): """ - def __init__(self, ubm, r_U, em_iterations, relevance_factor=4.0, seed=0): + def __init__( + self, ubm, r_U, em_iterations=10, relevance_factor=4.0, seed=0 + ): super(ISVMachine, self).__init__( ubm, r_U=r_U, @@ -1206,6 +1208,8 @@ class ISVMachine(FactorAnalysisBase): if not hasattr(self, "_U") or not hasattr(self, "_D"): self.create_UVD() + y = y.tolist() if not isinstance(y, list) else y + # TODO: Point of parallelism n_acc, f_acc = self.initialize(X, y) for i in range(self.em_iterations): @@ -1292,7 +1296,7 @@ class JFAMachine(FactorAnalysisBase): within-class assumption (modeled with :math:`U`), it also hypothesize that between class variations are embedded in a low rank rectangular matrix :math:`V`. In the supervector notation, this modeling has the following shape: - :math:`\mu_{i, j} = m + Ux_{i, j} + Vy_{i} + D_z{i}`. + :math:`\\mu_{i, j} = m + Ux_{i, j} + Vy_{i} + D_z{i}`. For more information check [McCool2013]_ @@ -1320,7 +1324,7 @@ class JFAMachine(FactorAnalysisBase): """ def __init__( - self, ubm, r_U, r_V, em_iterations, relevance_factor=4.0, seed=0 + self, ubm, r_U, r_V, em_iterations=10, relevance_factor=4.0, seed=0 ): super(JFAMachine, self).__init__( ubm, @@ -1369,7 +1373,7 @@ class JFAMachine(FactorAnalysisBase): latent_x, latent_y, latent_z = self.initialize_XYZ(y) - #### UPDATE Y, X AND FINALY Z + # UPDATE Y, X AND FINALY Z latent_y = self.update_y( X, y, VProd, latent_x, latent_y, latent_z, n_acc, f_acc @@ -1440,7 +1444,7 @@ class JFAMachine(FactorAnalysisBase): latent_x, latent_y, latent_z = self.initialize_XYZ(y) - #### UPDATE Y, X AND FINALY Z + # UPDATE Y, X AND FINALY Z latent_y = self.update_y( X, y, VProd, latent_x, latent_y, latent_z, n_acc, f_acc @@ -1680,6 +1684,8 @@ class JFAMachine(FactorAnalysisBase): ): self.create_UVD() + y = y.tolist() if not isinstance(y, list) else y + # TODO: Point of parallelism n_acc, f_acc = self.initialize(X, y) diff --git a/bob/learn/em/test/test_jfa.py b/bob/learn/em/test/test_jfa.py index 23086de322ac17fc7deedc63343bdc934138a3a9..c96f59e1916f3b10b4b41b3a8d61be2d2fdaca73 100644 --- a/bob/learn/em/test/test_jfa.py +++ b/bob/learn/em/test/test_jfa.py @@ -7,8 +7,8 @@ # Copyright (C) 2011-2014 Idiap Research Institute, Martigny, Switzerland import numpy as np + from bob.learn.em import GMMMachine, GMMStats, ISVMachine, JFAMachine -import copy def test_JFAMachine(): @@ -97,4 +97,3 @@ def test_ISVMachine(): score_ref = -3.280498193082100 assert abs(score_ref - score) < eps - pass diff --git a/bob/learn/em/test/test_jfa_trainer.py b/bob/learn/em/test/test_jfa_trainer.py index f9e51e4473da960988edc2c1cfc1237b7e4d2b5c..b4b75b7a615fbd8501f313de6d181395c2af80aa 100644 --- a/bob/learn/em/test/test_jfa_trainer.py +++ b/bob/learn/em/test/test_jfa_trainer.py @@ -6,9 +6,11 @@ # # Copyright (C) 2011-2014 Idiap Research Institute, Martigny, Switzerland +import copy + import numpy as np + from bob.learn.em import GMMMachine, GMMStats, ISVMachine, JFAMachine -import copy # Define Training set and initial values for tests F1 = np.array( @@ -120,12 +122,11 @@ def test_JFATrainAndEnrol(): ubm.means = UBM_MEAN.reshape((2, 3)) ubm.variances = UBM_VAR.reshape((2, 3)) it = JFAMachine(ubm, 2, 2, em_iterations=10) - # n_acc, f_acc = it.initialize(TRAINING_STATS_X, TRAINING_STATS_y) + it.U = copy.deepcopy(M_u) it.V = copy.deepcopy(M_v) it.D = copy.deepcopy(M_d) it.fit(TRAINING_STATS_X, TRAINING_STATS_y) - # bob.learn.em.train_jfa(t, mb, TRAINING_STATS, initialize=False) v_ref = np.array( [ @@ -211,28 +212,6 @@ def test_JFATrainAndEnrol(): assert np.allclose(latent_y, y_ref, eps) assert np.allclose(latent_z, z_ref, eps) - # Testing exceptions - """ - nose.tools.assert_raises(RuntimeError, t.initialize, mb, [1, 2, 2]) - nose.tools.assert_raises(RuntimeError, t.initialize, mb, [[1, 2, 2]]) - nose.tools.assert_raises(RuntimeError, t.e_step_u, mb, [1, 2, 2]) - nose.tools.assert_raises(RuntimeError, t.e_step_u, mb, [[1, 2, 2]]) - nose.tools.assert_raises(RuntimeError, t.m_step_u, mb, [1, 2, 2]) - nose.tools.assert_raises(RuntimeError, t.m_step_u, mb, [[1, 2, 2]]) - - nose.tools.assert_raises(RuntimeError, t.e_step_v, mb, [1, 2, 2]) - nose.tools.assert_raises(RuntimeError, t.e_step_v, mb, [[1, 2, 2]]) - nose.tools.assert_raises(RuntimeError, t.m_step_v, mb, [1, 2, 2]) - nose.tools.assert_raises(RuntimeError, t.m_step_v, mb, [[1, 2, 2]]) - - nose.tools.assert_raises(RuntimeError, t.e_step_d, mb, [1, 2, 2]) - nose.tools.assert_raises(RuntimeError, t.e_step_d, mb, [[1, 2, 2]]) - nose.tools.assert_raises(RuntimeError, t.m_step_d, mb, [1, 2, 2]) - nose.tools.assert_raises(RuntimeError, t.m_step_d, mb, [[1, 2, 2]]) - - nose.tools.assert_raises(RuntimeError, t.enroll, m, [[1, 2, 2]], 5) - """ - def test_ISVTrainAndEnrol(): # Train and enroll an 'ISVMachine' @@ -272,9 +251,9 @@ def test_ISVTrainAndEnrol(): "float64", ) - ###################################### - # Calls the train function - ###################################### + """ + Calls the train function + """ ubm = GMMMachine(2, 3) ubm.means = UBM_MEAN.reshape((2, 3)) ubm.variances = UBM_VAR.reshape((2, 3)) @@ -292,9 +271,9 @@ def test_ISVTrainAndEnrol(): assert np.allclose(it.D, d_ref, eps) assert np.allclose(it.U, u_ref, eps) - ###################################### - # Calls the enroll function - ###################################### + """ + Calls the enroll function + """ Ne = np.array([0.1579, 0.9245, 0.1323, 0.2458]).reshape((2, 2)) Fe = np.array( @@ -325,13 +304,6 @@ def test_ISVTrainAndEnrol(): latent_z = it.enroll(gse, 5) assert np.allclose(latent_z, z_ref, eps) - # Testing exceptions - # nose.tools.assert_raises(RuntimeError, t.initialize, mb, [1, 2, 2]) - # nose.tools.assert_raises(RuntimeError, t.initialize, mb, [[1, 2, 2]]) - # nose.tools.assert_raises(RuntimeError, t.e_step, mb, [1, 2, 2]) - # nose.tools.assert_raises(RuntimeError, t.e_step, mb, [[1, 2, 2]]) - # nose.tools.assert_raises(RuntimeError, t.enroll, m, [[1, 2, 2]], 5) - def test_JFATrainInitialize(): # Check that the initialization is consistent and using the rng (cf. issue #118) @@ -343,7 +315,7 @@ def test_JFATrainInitialize(): ubm.means = UBM_MEAN.reshape((2, 3)) ubm.variances = UBM_VAR.reshape((2, 3)) - ## JFA + # JFA it = JFAMachine(ubm, 2, 2, em_iterations=10) # first round @@ -373,7 +345,7 @@ def test_ISVTrainInitialize(): ubm.means = UBM_MEAN.reshape((2, 3)) ubm.variances = UBM_VAR.reshape((2, 3)) - ## ISV + # ISV it = ISVMachine(ubm, 2, em_iterations=10) # it.rng = rng diff --git a/bob/learn/em/wccn.py b/bob/learn/em/wccn.py index 7c2c8246c4f70c80f3ae5499ce70db65c8976f65..47d5b80463ebb74e0c1e7f8b8dccc1f52ed31529 100644 --- a/bob/learn/em/wccn.py +++ b/bob/learn/em/wccn.py @@ -1,3 +1,6 @@ +#!/usr/bin/env python +# @author: Tiago de Freitas Pereira + import dask # Dask doesn't have an implementation for `pinv` diff --git a/bob/learn/em/whitening.py b/bob/learn/em/whitening.py index 3565f4fb2397bb7aadcd5e81727e79562056270e..bf81a36203580120e2d3f90664bed38401809d58 100644 --- a/bob/learn/em/whitening.py +++ b/bob/learn/em/whitening.py @@ -1,3 +1,6 @@ +#!/usr/bin/env python +# @author: Tiago de Freitas Pereira + import dask from scipy.linalg import pinv diff --git a/buildout.cfg b/buildout.cfg index d3c5cee7a448e29f08c7627e5a14591ec2e73122..044f2367cdf0522cb7d4a8b7159e47be68d2f696 100644 --- a/buildout.cfg +++ b/buildout.cfg @@ -8,6 +8,7 @@ eggs = bob.learn.em extensions = bob.buildout newest = false verbose = true +debug = true [scripts] recipe = bob.buildout:scripts diff --git a/doc/index.rst b/doc/index.rst index d81413146ca3255cce7f5462ff8d295c14cb7a03..14a5f1d6bdc43d789305b5f8a548c8f847ef7db2 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -37,7 +37,9 @@ References .. .. [Vogt2008] *R. Vogt, S. Sridharan*. **'Explicit Modelling of Session Variability for Speaker Verification'**, Computer Speech & Language, 2008, vol. 22, no. 1, pp. 17-38 .. - .. [McCool2013] *C. McCool, R. Wallace, M. McLaren, L. El Shafey, S. Marcel*. **'Session Variability Modelling for Face Authentication'**, IET Biometrics, 2013 + +.. [McCool2013] *C. McCool, R. Wallace, M. McLaren, L. El Shafey, S. Marcel*. **'Session Variability Modelling for Face Authentication'**, IET Biometrics, 2013 + .. .. [ElShafey2014] *Laurent El Shafey, Chris McCool, Roy Wallace, Sebastien Marcel*. **'A Scalable Formulation of Probabilistic Linear Discriminant Analysis: Applied to Face Recognition'**, TPAMI'2014 .. diff --git a/doc/plot/plot_ISV.py b/doc/plot/plot_ISV.py new file mode 100644 index 0000000000000000000000000000000000000000..cc7b5593057db9d6802e2b47598eab32c9749fc2 --- /dev/null +++ b/doc/plot/plot_ISV.py @@ -0,0 +1,168 @@ +import matplotlib.pyplot as plt +import numpy as np + +from sklearn.datasets import load_iris + +import bob.learn.em + +np.random.seed(2) # FIXING A SEED + + +def isv_train(features, ubm): + """ + Train U matrix + + **Parameters** + features: List of :py:class:`bob.learn.em.GMMStats` organized by class + + n_gaussians: UBM (:py:class:`bob.learn.em.GMMMachine`) + + """ + + stats = [] + for user in features: + user_stats = [] + for f in user: + s = bob.learn.em.GMMStats(ubm.shape[0], ubm.shape[1]) + ubm.acc_statistics(f, s) + user_stats.append(s) + stats.append(user_stats) + + relevance_factor = 4 + subspace_dimension_of_u = 1 + + isvbase = bob.learn.em.ISVBase(ubm, subspace_dimension_of_u) + trainer = bob.learn.em.ISVTrainer(relevance_factor) + # trainer.rng = bob.core.random.mt19937(int(self.init_seed)) + bob.learn.em.train(trainer, isvbase, stats, max_iterations=50) + + return isvbase + + +# GENERATING DATA +iris_data = load_iris() +X = np.column_stack((iris_data.data[:, 0], iris_data.data[:, 3])) +y = iris_data.target + +setosa = X[iris_data.target == 0] +versicolor = X[iris_data.target == 1] +virginica = X[iris_data.target == 2] + +n_gaussians = 3 +r_U = 1 + + +# TRAINING THE PRIOR +ubm = bob.learn.em.GMMMachine(n_gaussians) +# Initializing with old bob initialization +ubm.means = np.array( + [ + [5.0048631, 0.26047739], + [5.83509503, 1.40530362], + [6.76257257, 1.98965356], + ] +) +ubm.variances = np.array( + [ + [0.11311728, 0.05183813], + [0.11587106, 0.08492455], + [0.20482993, 0.10438209], + ] +) + +ubm.weights = np.array([0.36, 0.36, 0.28]) + +gmm_stats = [ubm.acc_statistics(x[np.newaxis]) for x in X] +isv_machine = bob.learn.em.ISVMachine(ubm, r_U, em_iterations=50) +isv_machine.U = np.array( + [[-0.150035, -0.44441, -1.67812, 2.47621, -0.52885, 0.659141]] +).T + +isv_machine = isv_machine.fit(gmm_stats, y) + +# gmm_stats = [ubm.acc_statistics(x) for x in [setosa, versicolor, virginica]] +# isv_machine = bob.learn.em.ISVMachine(ubm, r_U).fit(gmm_stats, [0, 1, 2]) + + +# isvbase = isv_train([setosa, versicolor, virginica], ubm) + +# Variability direction +u0 = isv_machine.U[0:2, 0] / np.linalg.norm(isv_machine.U[0:2, 0]) +u1 = isv_machine.U[2:4, 0] / np.linalg.norm(isv_machine.U[2:4, 0]) +u2 = isv_machine.U[4:6, 0] / np.linalg.norm(isv_machine.U[4:6, 0]) + +figure, ax = plt.subplots() +plt.scatter(setosa[:, 0], setosa[:, 1], c="darkcyan", label="setosa") +plt.scatter( + versicolor[:, 0], versicolor[:, 1], c="goldenrod", label="versicolor" +) +plt.scatter(virginica[:, 0], virginica[:, 1], c="dimgrey", label="virginica") + +plt.scatter( + ubm.means[:, 0], + ubm.means[:, 1], + c="blue", + marker="x", + label="centroids - mle", +) +# plt.scatter(ubm.means[:, 0], ubm.means[:, 1], c="blue", +# marker=".", label="within class varibility", s=0.01) + +ax.arrow( + ubm.means[0, 0], + ubm.means[0, 1], + u0[0], + u0[1], + fc="k", + ec="k", + head_width=0.05, + head_length=0.1, +) +ax.arrow( + ubm.means[1, 0], + ubm.means[1, 1], + u1[0], + u1[1], + fc="k", + ec="k", + head_width=0.05, + head_length=0.1, +) +ax.arrow( + ubm.means[2, 0], + ubm.means[2, 1], + u2[0], + u2[1], + fc="k", + ec="k", + head_width=0.05, + head_length=0.1, +) +plt.text( + ubm.means[0, 0] + u0[0], + ubm.means[0, 1] + u0[1] - 0.1, + r"$\mathbf{U}_1$", + fontsize=15, +) +plt.text( + ubm.means[1, 0] + u1[0], + ubm.means[1, 1] + u1[1] - 0.1, + r"$\mathbf{U}_2$", + fontsize=15, +) +plt.text( + ubm.means[2, 0] + u2[0], + ubm.means[2, 1] + u2[1] - 0.1, + r"$\mathbf{U}_3$", + fontsize=15, +) + +plt.xticks([], []) +plt.yticks([], []) + +# plt.grid(True) +plt.xlabel("Sepal length") +plt.ylabel("Petal width") +plt.legend() +plt.tight_layout() +plt.show() diff --git a/doc/plot/plot_JFA.py b/doc/plot/plot_JFA.py new file mode 100644 index 0000000000000000000000000000000000000000..5c19474a49322178bd6f9ffb8bc3f1788f684355 --- /dev/null +++ b/doc/plot/plot_JFA.py @@ -0,0 +1,240 @@ +import matplotlib.pyplot as plt +import numpy as np + +from sklearn.datasets import load_iris + +import bob.learn.em + +np.random.seed(2) # FIXING A SEED + + +def isv_train(features, ubm): + """ + Train U matrix + + **Parameters** + features: List of :py:class:`bob.learn.em.GMMStats` organized by class + + n_gaussians: UBM (:py:class:`bob.learn.em.GMMMachine`) + + """ + + stats = [] + for user in features: + user_stats = [] + for f in user: + s = bob.learn.em.GMMStats(ubm.shape[0], ubm.shape[1]) + ubm.acc_statistics(f, s) + user_stats.append(s) + stats.append(user_stats) + + relevance_factor = 4 + subspace_dimension_of_u = 1 + + isvbase = bob.learn.em.ISVBase(ubm, subspace_dimension_of_u) + trainer = bob.learn.em.ISVTrainer(relevance_factor) + # trainer.rng = bob.core.random.mt19937(int(self.init_seed)) + bob.learn.em.train(trainer, isvbase, stats, max_iterations=50) + + return isvbase + + +# GENERATING DATA +iris_data = load_iris() +X = np.column_stack((iris_data.data[:, 0], iris_data.data[:, 3])) +y = iris_data.target + + +setosa = X[iris_data.target == 0] +versicolor = X[iris_data.target == 1] +virginica = X[iris_data.target == 2] + +n_gaussians = 3 +r_U = 1 +r_V = 1 + + +# TRAINING THE PRIOR +ubm = bob.learn.em.GMMMachine(n_gaussians) +# Initializing with old bob initialization +ubm.means = np.array( + [ + [5.0048631, 0.26047739], + [5.83509503, 1.40530362], + [6.76257257, 1.98965356], + ] +) +ubm.variances = np.array( + [ + [0.11311728, 0.05183813], + [0.11587106, 0.08492455], + [0.20482993, 0.10438209], + ] +) + +ubm.weights = np.array([0.36, 0.36, 0.28]) +# .fit(X) + +gmm_stats = [ubm.acc_statistics(x[np.newaxis]) for x in X] +jfa_machine = bob.learn.em.JFAMachine(ubm, r_U, r_V, em_iterations=50) + +# Initializing with old bob initialization +jfa_machine.U = np.array( + [[-0.150035, -0.44441, -1.67812, 2.47621, -0.52885, 0.659141]] +).T + +jfa_machine.Y = np.array( + [[-0.538446, 1.67376, -0.111288, 2.06948, 1.39563, -1.65004]] +).T +jfa_machine.D = np.array( + [0.732467, 0.281321, 0.543212, -0.512974, 1.04108, 0.835224] +) +jfa_machine = jfa_machine.fit(gmm_stats, y) + + +# .fit(gmm_stats, y) + +# gmm_stats = [ubm.acc_statistics(x) for x in [setosa, versicolor, virginica]] +# jfa_machine = bob.learn.em.JFAMachine(ubm, r_U, r_V).fit(gmm_stats, [0, 1, 2]) + + +# Variability direction U +u0 = jfa_machine.U[0:2, 0] / np.linalg.norm(jfa_machine.U[0:2, 0]) +u1 = jfa_machine.U[2:4, 0] / np.linalg.norm(jfa_machine.U[2:4, 0]) +u2 = jfa_machine.U[4:6, 0] / np.linalg.norm(jfa_machine.U[4:6, 0]) + + +# Variability direction V +v0 = jfa_machine.V[0:2, 0] / np.linalg.norm(jfa_machine.V[0:2, 0]) +v1 = jfa_machine.V[2:4, 0] / np.linalg.norm(jfa_machine.V[2:4, 0]) +v2 = jfa_machine.V[4:6, 0] / np.linalg.norm(jfa_machine.V[4:6, 0]) + + +figure, ax = plt.subplots() +plt.scatter(setosa[:, 0], setosa[:, 1], c="darkcyan", label="setosa") +plt.scatter( + versicolor[:, 0], versicolor[:, 1], c="goldenrod", label="versicolor" +) +plt.scatter(virginica[:, 0], virginica[:, 1], c="dimgrey", label="virginica") + +plt.scatter( + ubm.means[:, 0], + ubm.means[:, 1], + c="blue", + marker="x", + label="centroids - mle", +) +# plt.scatter(ubm.means[:, 0], ubm.means[:, 1], c="blue", +# marker=".", label="within class varibility", s=0.01) + +# U +ax.arrow( + ubm.means[0, 0], + ubm.means[0, 1], + u0[0], + u0[1], + fc="k", + ec="k", + head_width=0.05, + head_length=0.1, +) +ax.arrow( + ubm.means[1, 0], + ubm.means[1, 1], + u1[0], + u1[1], + fc="k", + ec="k", + head_width=0.05, + head_length=0.1, +) +ax.arrow( + ubm.means[2, 0], + ubm.means[2, 1], + u2[0], + u2[1], + fc="k", + ec="k", + head_width=0.05, + head_length=0.1, +) +plt.text( + ubm.means[0, 0] + u0[0], + ubm.means[0, 1] + u0[1] - 0.1, + r"$\mathbf{U}_1$", + fontsize=15, +) +plt.text( + ubm.means[1, 0] + u1[0], + ubm.means[1, 1] + u1[1] - 0.1, + r"$\mathbf{U}_2$", + fontsize=15, +) +plt.text( + ubm.means[2, 0] + u2[0], + ubm.means[2, 1] + u2[1] - 0.1, + r"$\mathbf{U}_3$", + fontsize=15, +) + +# V +ax.arrow( + ubm.means[0, 0], + ubm.means[0, 1], + v0[0], + v0[1], + fc="k", + ec="k", + head_width=0.05, + head_length=0.1, +) +ax.arrow( + ubm.means[1, 0], + ubm.means[1, 1], + v1[0], + v1[1], + fc="k", + ec="k", + head_width=0.05, + head_length=0.1, +) +ax.arrow( + ubm.means[2, 0], + ubm.means[2, 1], + v2[0], + v2[1], + fc="k", + ec="k", + head_width=0.05, + head_length=0.1, +) +plt.text( + ubm.means[0, 0] + v0[0], + ubm.means[0, 1] + v0[1] - 0.1, + r"$\mathbf{V}_1$", + fontsize=15, +) +plt.text( + ubm.means[1, 0] + v1[0], + ubm.means[1, 1] + v1[1] - 0.1, + r"$\mathbf{V}_2$", + fontsize=15, +) +plt.text( + ubm.means[2, 0] + v2[0], + ubm.means[2, 1] + v2[1] - 0.1, + r"$\mathbf{V}_3$", + fontsize=15, +) + +plt.xticks([], []) +plt.yticks([], []) + +plt.xlabel("Sepal length") +plt.ylabel("Petal width") +plt.legend(loc=2) +plt.ylim([-1, 3.5]) + +plt.tight_layout() +plt.grid(True) +plt.show()