From fb7b686b144cc62322f5615ed403c4b7c22bc27e Mon Sep 17 00:00:00 2001 From: Yannick DAYER <yannick.dayer@idiap.ch> Date: Tue, 7 Dec 2021 17:44:19 +0100 Subject: [PATCH] Added GMM dask tests, renamed tests --- bob/learn/em/test/test_gmm.py | 186 ++++++++++++++++++++++++---------- 1 file changed, 130 insertions(+), 56 deletions(-) diff --git a/bob/learn/em/test/test_gmm.py b/bob/learn/em/test/test_gmm.py index c11d5e1..785b0ae 100644 --- a/bob/learn/em/test/test_gmm.py +++ b/bob/learn/em/test/test_gmm.py @@ -101,7 +101,7 @@ def test_GMMStats(): # Clean-up os.unlink(filename) -def test_GMMMachine_1(): +def test_GMMMachine(): # Test a GMMMachine basic features weights = np.array([0.5, 0.5], "float64") @@ -182,9 +182,41 @@ def test_GMMMachine_1(): assert (gmm == gmm6) is False assert gmm.is_similar_to(gmm6) is False + # Saving and loading + filename = tempfile.mkstemp(suffix=".hdf5")[1] + gmm.save(HDF5File(filename, "w")) + gmm1 = GMMMachine.from_hdf5(HDF5File(filename, "r")) + assert gmm == gmm1 + gmm.save(filename) + gmm1 = GMMMachine.from_hdf5(filename) + assert gmm == gmm1 + os.unlink(filename) + + # Weights + n_gaussians = 5 + machine = GMMMachine(n_gaussians) + + default_weights = np.full(shape=(n_gaussians,), fill_value=1.0 / n_gaussians) + default_log_weights = np.full( + shape=(n_gaussians,), fill_value=np.log(1.0 / n_gaussians) + ) + + # Test weights getting and setting + np.testing.assert_almost_equal(machine.weights, default_weights) + np.testing.assert_almost_equal(machine.log_weights, default_log_weights) -def test_GMMMachine_2(): - # Test a GMMMachine (statistics) + modified_weights = default_weights + modified_weights[: n_gaussians // 2] = (1 / n_gaussians) / 2 + modified_weights[n_gaussians // 2 + n_gaussians % 2 :] = (1 / n_gaussians) * 1.5 + + # Ensure setter works (log_weights is updated correctly) + machine.weights = modified_weights + np.testing.assert_almost_equal(machine.weights, modified_weights) + np.testing.assert_almost_equal(machine.log_weights, np.log(modified_weights)) + + +def test_GMMMachine_stats(): + """Tests a GMMMachine (statistics)""" arrayset = load_array(resource_filename("bob.learn.em", "data/faithful.torch3_f64.hdf5")) gmm = GMMMachine(n_gaussians=2) @@ -206,8 +238,8 @@ def test_GMMMachine_2(): np.testing.assert_almost_equal(stats.sum_pxx, stats_ref.sum_pxx, decimal=10) -def test_GMMMachine_3(): - # Test a GMMMachine (log-likelihood computation) +def test_GMMMachine_ll_computation(): + """Test a GMMMachine (log-likelihood computation)""" data = load_array(resource_filename("bob.learn.em", "data/data.hdf5")) gmm = GMMMachine(n_gaussians=2) @@ -215,13 +247,12 @@ def test_GMMMachine_3(): gmm.means = load_array(resource_filename("bob.learn.em", "data/means.hdf5")) gmm.variances = load_array(resource_filename("bob.learn.em", "data/variances.hdf5")) - # Compare the log-likelihood with the one obtained using Chris Matlab - # implementation + # Compare the log-likelihood with the one obtained using Chris Matlab implementation matlab_ll_ref = -2.361583051672024e+02 np.testing.assert_almost_equal(gmm.log_likelihood(data), matlab_ll_ref, decimal=10) -def test_GMMMachine_4(): +def test_GMMMachine_single_ll_vs_multiple(): np.random.seed(3) # FIXING A SEED @@ -241,7 +272,7 @@ def test_GMMMachine_4(): assert np.isclose(ll, gmm.log_likelihood(data).mean()) -def test_GMMStats_2(): +def test_GMMStats_operations(): """Test a GMMStats.""" # Initializing a GMMStats data = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9], [7, 8, 9]]) @@ -319,18 +350,17 @@ def test_machine_parameters(): np.testing.assert_equal(machine.variances, np.ones((n_gaussians, n_features))) # Setters - new_means = np.repeat([[1], [2], [3]], n_features, axis=1) machine.means = new_means assert machine.means.shape == (n_gaussians, n_features) - np.testing.assert_almost_equal(machine.means, new_means) + np.testing.assert_equal(machine.means, new_means) new_variances = np.repeat([[0.2], [1.1], [1]], n_features, axis=1) machine.variances = new_variances assert machine.variances.shape == (n_gaussians, n_features) - np.testing.assert_almost_equal(machine.variances, new_variances) + np.testing.assert_equal(machine.variances, new_variances) -def test_kmeans_plusplus_init(): +def test_gmm_kmeans_plusplus_init(): n_gaussians = 3 machine = GMMMachine( n_gaussians, @@ -344,7 +374,7 @@ def test_kmeans_plusplus_init(): np.testing.assert_almost_equal(machine.variances, expected_variances) -def test_kmeans_parallel_init(): +def test_gmm_kmeans_parallel_init(): n_gaussians = 3 machine = GMMMachine( n_gaussians, @@ -567,9 +597,6 @@ def loadGMM(): return gmm -def equals(x, y, epsilon): - return (abs(x - y) < epsilon).all() - def test_gmm_ML_1(): """Trains a GMMMachine with ML_GMMTrainer""" ar = load_array(resource_filename("bob.learn.em", "data/faithful.torch3_f64.hdf5")) @@ -632,44 +659,6 @@ def test_gmm_ML_2(): np.testing.assert_allclose(gmm.weights, weightsML_ref, atol=1e-4) -def test_gmm_ML_parallel(): - """Trains a GMMMachine with ML_GMMTrainer; compares to a reference""" - - ar = da.array(load_array(resource_filename("bob.learn.em", "data/dataNormalized.hdf5"))) - - # Initialize GMMMachine - gmm = GMMMachine(n_gaussians=5) - gmm.means = load_array(resource_filename("bob.learn.em", "data/meansAfterKMeans.hdf5")).astype("float64") - gmm.variances = load_array(resource_filename("bob.learn.em", "data/variancesAfterKMeans.hdf5")).astype("float64") - gmm.weights = np.exp(load_array(resource_filename("bob.learn.em", "data/weightsAfterKMeans.hdf5")).astype("float64")) - - threshold = 0.001 - gmm.variance_thresholds = threshold - - # Initialize ML Trainer - gmm.mean_var_update_threshold = 0.001 - gmm.max_fitting_steps = 25 - gmm.convergence_threshold = 0.00001 - gmm.update_means = True - gmm.update_variances = True - gmm.update_weights = True - - # Run ML - gmm.fit(ar) - - # Test results - # Load torch3vision reference - meansML_ref = load_array(resource_filename("bob.learn.em", "data/meansAfterML.hdf5")) - variancesML_ref = load_array(resource_filename("bob.learn.em", "data/variancesAfterML.hdf5")) - weightsML_ref = load_array(resource_filename("bob.learn.em", "data/weightsAfterML.hdf5")) - - # Compare to current results - np.testing.assert_allclose(gmm.means, meansML_ref, atol=3e-3) - np.testing.assert_allclose(gmm.variances, variancesML_ref, atol=3e-3) - np.testing.assert_allclose(gmm.weights, weightsML_ref, atol=1e-4) - - - def test_gmm_MAP_1(): """Train a GMMMachine with MAP_GMMTrainer""" ar = load_array(resource_filename("bob.learn.em", "data/faithful.torch3_f64.hdf5")) @@ -788,7 +777,7 @@ def test_gmm_MAP_3(): def test_gmm_test(): - """ Tests a GMMMachine by computing scores against a model and comparing to a reference + """Tests a GMMMachine by computing scores against a model and comparing to a reference """ ar = load_array(resource_filename("bob.learn.em", "data/dataforMAP.hdf5")) @@ -810,3 +799,88 @@ def test_gmm_test(): # Compare current results to torch3vision assert abs(score-score_mean_ref)/score_mean_ref<1e-4 + + +def test_gmm_ML_dask(): + """Trains a GMMMachine with dask array data; compares to a reference""" + + ar = da.array(load_array(resource_filename("bob.learn.em", "data/dataNormalized.hdf5"))) + + # Initialize GMMMachine + gmm = GMMMachine(n_gaussians=5) + gmm.means = load_array(resource_filename("bob.learn.em", "data/meansAfterKMeans.hdf5")).astype("float64") + gmm.variances = load_array(resource_filename("bob.learn.em", "data/variancesAfterKMeans.hdf5")).astype("float64") + gmm.weights = np.exp(load_array(resource_filename("bob.learn.em", "data/weightsAfterKMeans.hdf5")).astype("float64")) + + threshold = 0.001 + gmm.variance_thresholds = threshold + + # Initialize ML Trainer + gmm.mean_var_update_threshold = 0.001 + gmm.max_fitting_steps = 25 + gmm.convergence_threshold = 0.00001 + gmm.update_means = True + gmm.update_variances = True + gmm.update_weights = True + + # Run ML + gmm.fit(ar) + + # Test results + # Load torch3vision reference + meansML_ref = load_array(resource_filename("bob.learn.em", "data/meansAfterML.hdf5")) + variancesML_ref = load_array(resource_filename("bob.learn.em", "data/variancesAfterML.hdf5")) + weightsML_ref = load_array(resource_filename("bob.learn.em", "data/weightsAfterML.hdf5")) + + # Compare to current results + np.testing.assert_allclose(gmm.means, meansML_ref, atol=3e-3) + np.testing.assert_allclose(gmm.variances, variancesML_ref, atol=3e-3) + np.testing.assert_allclose(gmm.weights, weightsML_ref, atol=1e-4) + +def test_gmm_MAP_dask(): + """Test a GMMMachine for MAP with a dask array as data.""" + ar = da.array(load_array(resource_filename("bob.learn.em", "data/dataforMAP.hdf5"))) + + # Initialize GMMMachine + n_gaussians = 5 + prior_gmm = GMMMachine(n_gaussians) + prior_gmm.means = load_array(resource_filename("bob.learn.em", "data/meansAfterML.hdf5")) + prior_gmm.variances = load_array(resource_filename("bob.learn.em", "data/variancesAfterML.hdf5")) + prior_gmm.weights = load_array(resource_filename("bob.learn.em", "data/weightsAfterML.hdf5")) + + threshold = 0.001 + prior_gmm.variance_thresholds = threshold + + # Initialize MAP Trainer + prior = 0.001 + accuracy = 0.00001 + gmm = GMMMachine( + n_gaussians, + trainer="map", + ubm=prior_gmm, + convergence_threshold=prior, + max_fitting_steps=1, + update_means=True, + update_variances=False, + update_weights=False, + mean_var_update_threshold=accuracy, + relevance_factor=None, + ) + gmm.variance_thresholds = threshold + + # Train + gmm = gmm.fit(ar) + + # Test results + # Load torch3vision reference + meansMAP_ref = load_array(resource_filename("bob.learn.em", "data/meansAfterMAP.hdf5")) + variancesMAP_ref = load_array(resource_filename("bob.learn.em", "data/variancesAfterMAP.hdf5")) + weightsMAP_ref = load_array(resource_filename("bob.learn.em", "data/weightsAfterMAP.hdf5")) + + # Compare to current results + # Gaps are quite large. This might be explained by the fact that there is no + # adaptation of a given Gaussian in torch3 when the corresponding responsibilities + # are below the responsibilities threshold + np.testing.assert_allclose(gmm.means, meansMAP_ref, atol=2e-1) + np.testing.assert_allclose(gmm.variances, variancesMAP_ref, atol=1e-4) + np.testing.assert_allclose(gmm.weights, weightsMAP_ref, atol=1e-4) -- GitLab