From ded8a6e9bdca810f34eeedcd44071461c9e50ef7 Mon Sep 17 00:00:00 2001 From: Yannick DAYER <yannick.dayer@idiap.ch> Date: Tue, 30 Nov 2021 14:29:06 +0100 Subject: [PATCH] Adapt GMMMachine to new KMeansMachine. --- bob/learn/em/data/gmm_ML.hdf5 | Bin 12920 -> 12920 bytes bob/learn/em/mixture/gmm.py | 34 +++++++++++++++++----------------- bob/learn/em/test/test_gmm.py | 28 ++++++++++++++++++++-------- 3 files changed, 37 insertions(+), 25 deletions(-) diff --git a/bob/learn/em/data/gmm_ML.hdf5 b/bob/learn/em/data/gmm_ML.hdf5 index 0326c186f11ad38387b3a4c4b1e3cc7a66d32b80..20362881b1661a826a8773d1658a8df559099d46 100644 GIT binary patch delta 186 zcmey7@*`!#9A;*1*=3vOGlw#P7?bBSPn@j4tTcHQTM&r9`9IrIK@bBZvDrdWhY`jB ztCEt2sLGdCg-HC@Y@A%6nKijdRtc;S#-Ahy<9ANH@M5xtLgVJ;9QEvz4OrMV>uH<= lYu@}{b1n-=*XH?pFxTi6PF7&nn7qNzYVspCDUc+{ApqybL4p7P delta 194 zcmey7@*`!#9A;+yxl1<BXAWfoF(%Jvo;X>7S!wbrwjdCH^MAIbf*=M+VzY&$4kL^M zRwX43QI#*P3X%A)**LjCGi!2_tP)ruj6X>Z#_ybX;l*SPg+{IoHVj~}!DjPpg;Ms( s1}tow^)$|b&Ds24b1n<WfX(yuU=GtOoTOL9ID2xno+3zQ@_fBQ05Ake9RL6T diff --git a/bob/learn/em/mixture/gmm.py b/bob/learn/em/mixture/gmm.py index d17c7ae..65c8137 100644 --- a/bob/learn/em/mixture/gmm.py +++ b/bob/learn/em/mixture/gmm.py @@ -13,7 +13,6 @@ import numpy as np from sklearn.base import BaseEstimator from bob.learn.em.cluster import KMeansMachine -from bob.learn.em.cluster import KMeansTrainer from h5py import File as HDF5File @@ -274,7 +273,7 @@ class GMMMachine(BaseEstimator): max_fitting_steps: Union[int, None] = 200, random_state: Union[int, np.random.RandomState] = 0, weights: "Union[np.ndarray[('n_gaussians',), float], None]" = None, - k_means_trainer: Union[KMeansTrainer, None] = None, + k_means_trainer: Union[KMeansMachine, None] = None, update_means: bool = True, update_variances: bool = False, update_weights: bool = False, @@ -317,7 +316,7 @@ class GMMMachine(BaseEstimator): Ratio for MAP adaptation. Used when `trainer == "map"` and `relevance_factor is None`) relevance_factor: - Factor for the computation of alpha with Reyolds adaptation. (Used when + Factor for the computation of alpha with Reynolds adaptation. (Used when `trainer == "map"`) variance_thresholds: The variance flooring thresholds, i.e. the minimum allowed value of variance in each dimension. @@ -395,14 +394,14 @@ class GMMMachine(BaseEstimator): def variances(self, variances: "np.ndarray[('n_gaussians', 'n_features'), float]"): self._variances = np.maximum(self.variance_thresholds, variances) # Recompute g_norm for each gaussian [array of shape (n_gaussians,)] - n_log_2pi = self.variances.shape[-1] * np.log(2 * np.pi) + n_log_2pi = self._variances.shape[-1] * np.log(2 * np.pi) self._g_norms = np.array(n_log_2pi + np.log(self._variances).sum(axis=-1)) @property def variance_thresholds(self): """Threshold below which variances are clamped to prevent precision losses.""" if self._variance_thresholds is None: - raise ValueError("GMMMachine variance thresholds were never set.") + return EPSILON return self._variance_thresholds @variance_thresholds.setter @@ -411,7 +410,8 @@ class GMMMachine(BaseEstimator): threshold: "Union[float, np.ndarray[('n_gaussians', 'n_features'), float]]", ): self._variance_thresholds = threshold - self.variances = np.maximum(threshold, self.variances) + if self._variances is not None: + self.variances = np.maximum(threshold, self._variances) @property def g_norms(self): @@ -542,12 +542,11 @@ class GMMMachine(BaseEstimator): if data is None: raise ValueError("Data is required when training with k-means.") logger.info("Initializing GMM with k-means.") - kmeans_trainer = self.k_means_trainer or KMeansTrainer( + kmeans_machine = self.k_means_trainer or KMeansMachine( + self.n_gaussians, random_state=self.random_state, ) - kmeans_machine = KMeansMachine(self.n_gaussians).fit( - data, trainer=kmeans_trainer - ) + kmeans_machine = kmeans_machine.fit(data) ( variances, @@ -706,6 +705,10 @@ class GMMMachine(BaseEstimator): else: logger.debug("GMM means already set. Initialization was not run!") + if self._variances is None: + logger.warning("Variances were not defined before fit. Using variance=1") + self.variances = np.ones_like(self.means) + average_output = 0 logger.info("Training GMM...") step = 0 @@ -713,11 +716,7 @@ class GMMMachine(BaseEstimator): step += 1 logger.info( f"Iteration {step:3d}" - + ( - f"/{self.max_fitting_steps:3d}" - if self.max_fitting_steps is not None - else "" - ) + + (f"/{self.max_fitting_steps:3d}" if self.max_fitting_steps else "") ) average_output_previous = average_output @@ -750,8 +749,9 @@ class GMMMachine(BaseEstimator): and convergence_value <= self.convergence_threshold ): logger.info("Reached convergence threshold. Training stopped.") - return self - logger.info("Reached maximum step. Training stopped without convergence.") + break + else: + logger.info("Reached maximum step. Training stopped without convergence.") self.compute() return self diff --git a/bob/learn/em/test/test_gmm.py b/bob/learn/em/test/test_gmm.py index 43a9449..c11d5e1 100644 --- a/bob/learn/em/test/test_gmm.py +++ b/bob/learn/em/test/test_gmm.py @@ -22,7 +22,7 @@ from bob.io.base import load as load_array from bob.learn.em.mixture import GMMMachine from bob.learn.em.mixture import GMMStats -from bob.learn.em.cluster import KMeansTrainer +from bob.learn.em.cluster import KMeansMachine def test_GMMStats(): # Test a GMMStats @@ -133,11 +133,10 @@ def test_GMMMachine_1(): # Checks particular varianceThresholds-related methods varianceThresholds1D = np.array([0.3, 1, 0.5], "float64") gmm.variance_thresholds = varianceThresholds1D - np.testing.assert_equal(gmm.variance_thresholds[0,:], varianceThresholds1D) - np.testing.assert_equal(gmm.variance_thresholds[1,:], varianceThresholds1D) + np.testing.assert_equal(gmm.variance_thresholds, varianceThresholds1D) gmm.variance_thresholds = 0.005 - np.testing.assert_equal(gmm.variance_thresholds, np.full((2,3), 0.005)) + np.testing.assert_equal(gmm.variance_thresholds, 0.005) gmm.means = newMeans gmm.variances = newVariances @@ -251,6 +250,7 @@ def test_GMMStats_2(): machine = GMMMachine(n_gaussians) machine.means = np.array([[0, 0, 0], [8, 8, 8]]) + machine.variances = np.ones_like(machine.means) # Populate the GMMStats stats = machine.acc_statistics(data) @@ -314,6 +314,7 @@ def test_machine_parameters(): n_features = 2 machine = GMMMachine(n_gaussians) machine.means = np.repeat([[0], [1], [-1]], n_features, 1) + machine.variances = np.ones_like(machine.means) np.testing.assert_equal(machine.means, np.repeat([[0], [1], [-1]], n_features, 1)) np.testing.assert_equal(machine.variances, np.ones((n_gaussians, n_features))) @@ -331,7 +332,10 @@ def test_machine_parameters(): def test_kmeans_plusplus_init(): n_gaussians = 3 - machine = GMMMachine(n_gaussians, k_means_trainer=KMeansTrainer("k-means++")) + machine = GMMMachine( + n_gaussians, + k_means_trainer=KMeansMachine(n_clusters=n_gaussians, init_method="k-means++"), + ) data = np.array([[1.5, 1], [1, 1.5], [-1, 0.5], [-1.5, 0], [2, 2], [2.5, 2.5]]) machine = machine.fit(data) expected_means = np.array([[2.25, 2.25], [-1.25, 0.25], [1.25, 1.25]]) @@ -342,7 +346,10 @@ def test_kmeans_plusplus_init(): def test_kmeans_parallel_init(): n_gaussians = 3 - machine = GMMMachine(n_gaussians, k_means_trainer=KMeansTrainer("k-means||")) + machine = GMMMachine( + n_gaussians, + k_means_trainer=KMeansMachine(n_clusters=n_gaussians, init_method="k-means||"), + ) data = np.array([[1.5, 1], [1, 1.5], [-1, 0.5], [-1.5, 0], [2, 2], [2.5, 2.5]]) machine = machine.fit(data) expected_means = np.array([[1.25, 1.25], [-1.25, 0.25], [2.25, 2.25]]) @@ -356,6 +363,7 @@ def test_likelihood(): n_gaussians = 3 machine = GMMMachine(n_gaussians) machine.means = np.repeat([[0], [1], [-1]], 3, 1) + machine.variances = np.ones_like(machine.means) log_likelihood = machine.log_likelihood(data) expected_ll = np.array( [-3.6519900964986527, -3.83151883210222, -3.83151883210222, -5.344374066745753] @@ -390,6 +398,7 @@ def test_likelihood_weight(): n_gaussians = 3 machine = GMMMachine(n_gaussians) machine.means = np.repeat([[0], [1], [-1]], 3, 1) + machine.variances = np.ones_like(machine.means) machine.weights = [0.6, 0.1, 0.3] log_likelihood = machine.log_likelihood(data) expected_ll = np.array( @@ -429,7 +438,7 @@ def test_ml_em(): machine = GMMMachine(n_gaussians, update_means=True, update_variances=True, update_weights=True) machine.means = np.repeat([[2], [8]], n_features, 1) - machine.initialize_gaussians(None) + machine.variances = np.ones_like(machine.means) stats = machine.e_step( data) machine.m_step(stats) @@ -447,6 +456,7 @@ def test_map_em(): n_gaussians = 2 prior_machine = GMMMachine(n_gaussians) prior_machine.means = np.array([[2, 2, 2], [8, 8, 8]]) + prior_machine.variances = np.ones_like(prior_machine.means) prior_machine.weights = np.array([0.5, 0.5]) machine = GMMMachine(n_gaussians, trainer="map", ubm=prior_machine, update_means=True, update_variances=True, update_weights=True) @@ -483,6 +493,7 @@ def test_ml_transformer(): machine = GMMMachine(n_gaussians, update_means=True, update_variances=True, update_weights=True) machine.means = np.array([[2, 2, 2], [8, 8, 8]]) + machine.variances = np.ones_like(machine.means) machine = machine.fit(data) @@ -514,6 +525,7 @@ def test_map_transformer(): n_features = 3 prior_machine = GMMMachine(n_gaussians) prior_machine.means = np.array([[2, 2, 2], [8, 8, 8]]) + prior_machine.variances = np.ones_like(prior_machine.means) prior_machine.weights = np.array([0.5, 0.5]) machine = GMMMachine(n_gaussians, trainer="map", ubm=prior_machine, update_means=True, update_variances=True, update_weights=True) @@ -586,7 +598,7 @@ def test_gmm_ML_1(): def test_gmm_ML_2(): - """Trains a GMMMachine with ML_GMMTrainer; compares to an old reference""" + """Trains a GMMMachine with ML_GMMTrainer; compares to a reference""" ar = load_array(resource_filename("bob.learn.em", "data/dataNormalized.hdf5")) # Initialize GMMMachine -- GitLab