Skip to content
Snippets Groups Projects
Commit ded8a6e9 authored by Yannick DAYER's avatar Yannick DAYER
Browse files

Adapt GMMMachine to new KMeansMachine.

parent 54fff114
No related branches found
No related tags found
2 merge requests!42GMM implementation in Python,!40Transition to a pure python implementation
Pipeline #56662 passed
No preview for this file type
......@@ -13,7 +13,6 @@ import numpy as np
from sklearn.base import BaseEstimator
from bob.learn.em.cluster import KMeansMachine
from bob.learn.em.cluster import KMeansTrainer
from h5py import File as HDF5File
......@@ -274,7 +273,7 @@ class GMMMachine(BaseEstimator):
max_fitting_steps: Union[int, None] = 200,
random_state: Union[int, np.random.RandomState] = 0,
weights: "Union[np.ndarray[('n_gaussians',), float], None]" = None,
k_means_trainer: Union[KMeansTrainer, None] = None,
k_means_trainer: Union[KMeansMachine, None] = None,
update_means: bool = True,
update_variances: bool = False,
update_weights: bool = False,
......@@ -317,7 +316,7 @@ class GMMMachine(BaseEstimator):
Ratio for MAP adaptation. Used when `trainer == "map"` and
`relevance_factor is None`)
relevance_factor:
Factor for the computation of alpha with Reyolds adaptation. (Used when
Factor for the computation of alpha with Reynolds adaptation. (Used when
`trainer == "map"`)
variance_thresholds:
The variance flooring thresholds, i.e. the minimum allowed value of variance in each dimension.
......@@ -395,14 +394,14 @@ class GMMMachine(BaseEstimator):
def variances(self, variances: "np.ndarray[('n_gaussians', 'n_features'), float]"):
self._variances = np.maximum(self.variance_thresholds, variances)
# Recompute g_norm for each gaussian [array of shape (n_gaussians,)]
n_log_2pi = self.variances.shape[-1] * np.log(2 * np.pi)
n_log_2pi = self._variances.shape[-1] * np.log(2 * np.pi)
self._g_norms = np.array(n_log_2pi + np.log(self._variances).sum(axis=-1))
@property
def variance_thresholds(self):
"""Threshold below which variances are clamped to prevent precision losses."""
if self._variance_thresholds is None:
raise ValueError("GMMMachine variance thresholds were never set.")
return EPSILON
return self._variance_thresholds
@variance_thresholds.setter
......@@ -411,7 +410,8 @@ class GMMMachine(BaseEstimator):
threshold: "Union[float, np.ndarray[('n_gaussians', 'n_features'), float]]",
):
self._variance_thresholds = threshold
self.variances = np.maximum(threshold, self.variances)
if self._variances is not None:
self.variances = np.maximum(threshold, self._variances)
@property
def g_norms(self):
......@@ -542,12 +542,11 @@ class GMMMachine(BaseEstimator):
if data is None:
raise ValueError("Data is required when training with k-means.")
logger.info("Initializing GMM with k-means.")
kmeans_trainer = self.k_means_trainer or KMeansTrainer(
kmeans_machine = self.k_means_trainer or KMeansMachine(
self.n_gaussians,
random_state=self.random_state,
)
kmeans_machine = KMeansMachine(self.n_gaussians).fit(
data, trainer=kmeans_trainer
)
kmeans_machine = kmeans_machine.fit(data)
(
variances,
......@@ -706,6 +705,10 @@ class GMMMachine(BaseEstimator):
else:
logger.debug("GMM means already set. Initialization was not run!")
if self._variances is None:
logger.warning("Variances were not defined before fit. Using variance=1")
self.variances = np.ones_like(self.means)
average_output = 0
logger.info("Training GMM...")
step = 0
......@@ -713,11 +716,7 @@ class GMMMachine(BaseEstimator):
step += 1
logger.info(
f"Iteration {step:3d}"
+ (
f"/{self.max_fitting_steps:3d}"
if self.max_fitting_steps is not None
else ""
)
+ (f"/{self.max_fitting_steps:3d}" if self.max_fitting_steps else "")
)
average_output_previous = average_output
......@@ -750,8 +749,9 @@ class GMMMachine(BaseEstimator):
and convergence_value <= self.convergence_threshold
):
logger.info("Reached convergence threshold. Training stopped.")
return self
logger.info("Reached maximum step. Training stopped without convergence.")
break
else:
logger.info("Reached maximum step. Training stopped without convergence.")
self.compute()
return self
......
......@@ -22,7 +22,7 @@ from bob.io.base import load as load_array
from bob.learn.em.mixture import GMMMachine
from bob.learn.em.mixture import GMMStats
from bob.learn.em.cluster import KMeansTrainer
from bob.learn.em.cluster import KMeansMachine
def test_GMMStats():
# Test a GMMStats
......@@ -133,11 +133,10 @@ def test_GMMMachine_1():
# Checks particular varianceThresholds-related methods
varianceThresholds1D = np.array([0.3, 1, 0.5], "float64")
gmm.variance_thresholds = varianceThresholds1D
np.testing.assert_equal(gmm.variance_thresholds[0,:], varianceThresholds1D)
np.testing.assert_equal(gmm.variance_thresholds[1,:], varianceThresholds1D)
np.testing.assert_equal(gmm.variance_thresholds, varianceThresholds1D)
gmm.variance_thresholds = 0.005
np.testing.assert_equal(gmm.variance_thresholds, np.full((2,3), 0.005))
np.testing.assert_equal(gmm.variance_thresholds, 0.005)
gmm.means = newMeans
gmm.variances = newVariances
......@@ -251,6 +250,7 @@ def test_GMMStats_2():
machine = GMMMachine(n_gaussians)
machine.means = np.array([[0, 0, 0], [8, 8, 8]])
machine.variances = np.ones_like(machine.means)
# Populate the GMMStats
stats = machine.acc_statistics(data)
......@@ -314,6 +314,7 @@ def test_machine_parameters():
n_features = 2
machine = GMMMachine(n_gaussians)
machine.means = np.repeat([[0], [1], [-1]], n_features, 1)
machine.variances = np.ones_like(machine.means)
np.testing.assert_equal(machine.means, np.repeat([[0], [1], [-1]], n_features, 1))
np.testing.assert_equal(machine.variances, np.ones((n_gaussians, n_features)))
......@@ -331,7 +332,10 @@ def test_machine_parameters():
def test_kmeans_plusplus_init():
n_gaussians = 3
machine = GMMMachine(n_gaussians, k_means_trainer=KMeansTrainer("k-means++"))
machine = GMMMachine(
n_gaussians,
k_means_trainer=KMeansMachine(n_clusters=n_gaussians, init_method="k-means++"),
)
data = np.array([[1.5, 1], [1, 1.5], [-1, 0.5], [-1.5, 0], [2, 2], [2.5, 2.5]])
machine = machine.fit(data)
expected_means = np.array([[2.25, 2.25], [-1.25, 0.25], [1.25, 1.25]])
......@@ -342,7 +346,10 @@ def test_kmeans_plusplus_init():
def test_kmeans_parallel_init():
n_gaussians = 3
machine = GMMMachine(n_gaussians, k_means_trainer=KMeansTrainer("k-means||"))
machine = GMMMachine(
n_gaussians,
k_means_trainer=KMeansMachine(n_clusters=n_gaussians, init_method="k-means||"),
)
data = np.array([[1.5, 1], [1, 1.5], [-1, 0.5], [-1.5, 0], [2, 2], [2.5, 2.5]])
machine = machine.fit(data)
expected_means = np.array([[1.25, 1.25], [-1.25, 0.25], [2.25, 2.25]])
......@@ -356,6 +363,7 @@ def test_likelihood():
n_gaussians = 3
machine = GMMMachine(n_gaussians)
machine.means = np.repeat([[0], [1], [-1]], 3, 1)
machine.variances = np.ones_like(machine.means)
log_likelihood = machine.log_likelihood(data)
expected_ll = np.array(
[-3.6519900964986527, -3.83151883210222, -3.83151883210222, -5.344374066745753]
......@@ -390,6 +398,7 @@ def test_likelihood_weight():
n_gaussians = 3
machine = GMMMachine(n_gaussians)
machine.means = np.repeat([[0], [1], [-1]], 3, 1)
machine.variances = np.ones_like(machine.means)
machine.weights = [0.6, 0.1, 0.3]
log_likelihood = machine.log_likelihood(data)
expected_ll = np.array(
......@@ -429,7 +438,7 @@ def test_ml_em():
machine = GMMMachine(n_gaussians, update_means=True, update_variances=True, update_weights=True)
machine.means = np.repeat([[2], [8]], n_features, 1)
machine.initialize_gaussians(None)
machine.variances = np.ones_like(machine.means)
stats = machine.e_step( data)
machine.m_step(stats)
......@@ -447,6 +456,7 @@ def test_map_em():
n_gaussians = 2
prior_machine = GMMMachine(n_gaussians)
prior_machine.means = np.array([[2, 2, 2], [8, 8, 8]])
prior_machine.variances = np.ones_like(prior_machine.means)
prior_machine.weights = np.array([0.5, 0.5])
machine = GMMMachine(n_gaussians, trainer="map", ubm=prior_machine, update_means=True, update_variances=True, update_weights=True)
......@@ -483,6 +493,7 @@ def test_ml_transformer():
machine = GMMMachine(n_gaussians, update_means=True, update_variances=True, update_weights=True)
machine.means = np.array([[2, 2, 2], [8, 8, 8]])
machine.variances = np.ones_like(machine.means)
machine = machine.fit(data)
......@@ -514,6 +525,7 @@ def test_map_transformer():
n_features = 3
prior_machine = GMMMachine(n_gaussians)
prior_machine.means = np.array([[2, 2, 2], [8, 8, 8]])
prior_machine.variances = np.ones_like(prior_machine.means)
prior_machine.weights = np.array([0.5, 0.5])
machine = GMMMachine(n_gaussians, trainer="map", ubm=prior_machine, update_means=True, update_variances=True, update_weights=True)
......@@ -586,7 +598,7 @@ def test_gmm_ML_1():
def test_gmm_ML_2():
"""Trains a GMMMachine with ML_GMMTrainer; compares to an old reference"""
"""Trains a GMMMachine with ML_GMMTrainer; compares to a reference"""
ar = load_array(resource_filename("bob.learn.em", "data/dataNormalized.hdf5"))
# Initialize GMMMachine
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment