Skip to content
Snippets Groups Projects
Commit fb7b686b authored by Yannick DAYER's avatar Yannick DAYER
Browse files

Added GMM dask tests, renamed tests

parent 807ca422
No related branches found
No related tags found
2 merge requests!42GMM implementation in Python,!40Transition to a pure python implementation
...@@ -101,7 +101,7 @@ def test_GMMStats(): ...@@ -101,7 +101,7 @@ def test_GMMStats():
# Clean-up # Clean-up
os.unlink(filename) os.unlink(filename)
def test_GMMMachine_1(): def test_GMMMachine():
# Test a GMMMachine basic features # Test a GMMMachine basic features
weights = np.array([0.5, 0.5], "float64") weights = np.array([0.5, 0.5], "float64")
...@@ -182,9 +182,41 @@ def test_GMMMachine_1(): ...@@ -182,9 +182,41 @@ def test_GMMMachine_1():
assert (gmm == gmm6) is False assert (gmm == gmm6) is False
assert gmm.is_similar_to(gmm6) is False assert gmm.is_similar_to(gmm6) is False
# Saving and loading
filename = tempfile.mkstemp(suffix=".hdf5")[1]
gmm.save(HDF5File(filename, "w"))
gmm1 = GMMMachine.from_hdf5(HDF5File(filename, "r"))
assert gmm == gmm1
gmm.save(filename)
gmm1 = GMMMachine.from_hdf5(filename)
assert gmm == gmm1
os.unlink(filename)
# Weights
n_gaussians = 5
machine = GMMMachine(n_gaussians)
default_weights = np.full(shape=(n_gaussians,), fill_value=1.0 / n_gaussians)
default_log_weights = np.full(
shape=(n_gaussians,), fill_value=np.log(1.0 / n_gaussians)
)
# Test weights getting and setting
np.testing.assert_almost_equal(machine.weights, default_weights)
np.testing.assert_almost_equal(machine.log_weights, default_log_weights)
def test_GMMMachine_2(): modified_weights = default_weights
# Test a GMMMachine (statistics) modified_weights[: n_gaussians // 2] = (1 / n_gaussians) / 2
modified_weights[n_gaussians // 2 + n_gaussians % 2 :] = (1 / n_gaussians) * 1.5
# Ensure setter works (log_weights is updated correctly)
machine.weights = modified_weights
np.testing.assert_almost_equal(machine.weights, modified_weights)
np.testing.assert_almost_equal(machine.log_weights, np.log(modified_weights))
def test_GMMMachine_stats():
"""Tests a GMMMachine (statistics)"""
arrayset = load_array(resource_filename("bob.learn.em", "data/faithful.torch3_f64.hdf5")) arrayset = load_array(resource_filename("bob.learn.em", "data/faithful.torch3_f64.hdf5"))
gmm = GMMMachine(n_gaussians=2) gmm = GMMMachine(n_gaussians=2)
...@@ -206,8 +238,8 @@ def test_GMMMachine_2(): ...@@ -206,8 +238,8 @@ def test_GMMMachine_2():
np.testing.assert_almost_equal(stats.sum_pxx, stats_ref.sum_pxx, decimal=10) np.testing.assert_almost_equal(stats.sum_pxx, stats_ref.sum_pxx, decimal=10)
def test_GMMMachine_3(): def test_GMMMachine_ll_computation():
# Test a GMMMachine (log-likelihood computation) """Test a GMMMachine (log-likelihood computation)"""
data = load_array(resource_filename("bob.learn.em", "data/data.hdf5")) data = load_array(resource_filename("bob.learn.em", "data/data.hdf5"))
gmm = GMMMachine(n_gaussians=2) gmm = GMMMachine(n_gaussians=2)
...@@ -215,13 +247,12 @@ def test_GMMMachine_3(): ...@@ -215,13 +247,12 @@ def test_GMMMachine_3():
gmm.means = load_array(resource_filename("bob.learn.em", "data/means.hdf5")) gmm.means = load_array(resource_filename("bob.learn.em", "data/means.hdf5"))
gmm.variances = load_array(resource_filename("bob.learn.em", "data/variances.hdf5")) gmm.variances = load_array(resource_filename("bob.learn.em", "data/variances.hdf5"))
# Compare the log-likelihood with the one obtained using Chris Matlab # Compare the log-likelihood with the one obtained using Chris Matlab implementation
# implementation
matlab_ll_ref = -2.361583051672024e+02 matlab_ll_ref = -2.361583051672024e+02
np.testing.assert_almost_equal(gmm.log_likelihood(data), matlab_ll_ref, decimal=10) np.testing.assert_almost_equal(gmm.log_likelihood(data), matlab_ll_ref, decimal=10)
def test_GMMMachine_4(): def test_GMMMachine_single_ll_vs_multiple():
np.random.seed(3) # FIXING A SEED np.random.seed(3) # FIXING A SEED
...@@ -241,7 +272,7 @@ def test_GMMMachine_4(): ...@@ -241,7 +272,7 @@ def test_GMMMachine_4():
assert np.isclose(ll, gmm.log_likelihood(data).mean()) assert np.isclose(ll, gmm.log_likelihood(data).mean())
def test_GMMStats_2(): def test_GMMStats_operations():
"""Test a GMMStats.""" """Test a GMMStats."""
# Initializing a GMMStats # Initializing a GMMStats
data = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9], [7, 8, 9]]) data = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9], [7, 8, 9]])
...@@ -319,18 +350,17 @@ def test_machine_parameters(): ...@@ -319,18 +350,17 @@ def test_machine_parameters():
np.testing.assert_equal(machine.variances, np.ones((n_gaussians, n_features))) np.testing.assert_equal(machine.variances, np.ones((n_gaussians, n_features)))
# Setters # Setters
new_means = np.repeat([[1], [2], [3]], n_features, axis=1) new_means = np.repeat([[1], [2], [3]], n_features, axis=1)
machine.means = new_means machine.means = new_means
assert machine.means.shape == (n_gaussians, n_features) assert machine.means.shape == (n_gaussians, n_features)
np.testing.assert_almost_equal(machine.means, new_means) np.testing.assert_equal(machine.means, new_means)
new_variances = np.repeat([[0.2], [1.1], [1]], n_features, axis=1) new_variances = np.repeat([[0.2], [1.1], [1]], n_features, axis=1)
machine.variances = new_variances machine.variances = new_variances
assert machine.variances.shape == (n_gaussians, n_features) assert machine.variances.shape == (n_gaussians, n_features)
np.testing.assert_almost_equal(machine.variances, new_variances) np.testing.assert_equal(machine.variances, new_variances)
def test_kmeans_plusplus_init(): def test_gmm_kmeans_plusplus_init():
n_gaussians = 3 n_gaussians = 3
machine = GMMMachine( machine = GMMMachine(
n_gaussians, n_gaussians,
...@@ -344,7 +374,7 @@ def test_kmeans_plusplus_init(): ...@@ -344,7 +374,7 @@ def test_kmeans_plusplus_init():
np.testing.assert_almost_equal(machine.variances, expected_variances) np.testing.assert_almost_equal(machine.variances, expected_variances)
def test_kmeans_parallel_init(): def test_gmm_kmeans_parallel_init():
n_gaussians = 3 n_gaussians = 3
machine = GMMMachine( machine = GMMMachine(
n_gaussians, n_gaussians,
...@@ -567,9 +597,6 @@ def loadGMM(): ...@@ -567,9 +597,6 @@ def loadGMM():
return gmm return gmm
def equals(x, y, epsilon):
return (abs(x - y) < epsilon).all()
def test_gmm_ML_1(): def test_gmm_ML_1():
"""Trains a GMMMachine with ML_GMMTrainer""" """Trains a GMMMachine with ML_GMMTrainer"""
ar = load_array(resource_filename("bob.learn.em", "data/faithful.torch3_f64.hdf5")) ar = load_array(resource_filename("bob.learn.em", "data/faithful.torch3_f64.hdf5"))
...@@ -632,44 +659,6 @@ def test_gmm_ML_2(): ...@@ -632,44 +659,6 @@ def test_gmm_ML_2():
np.testing.assert_allclose(gmm.weights, weightsML_ref, atol=1e-4) np.testing.assert_allclose(gmm.weights, weightsML_ref, atol=1e-4)
def test_gmm_ML_parallel():
"""Trains a GMMMachine with ML_GMMTrainer; compares to a reference"""
ar = da.array(load_array(resource_filename("bob.learn.em", "data/dataNormalized.hdf5")))
# Initialize GMMMachine
gmm = GMMMachine(n_gaussians=5)
gmm.means = load_array(resource_filename("bob.learn.em", "data/meansAfterKMeans.hdf5")).astype("float64")
gmm.variances = load_array(resource_filename("bob.learn.em", "data/variancesAfterKMeans.hdf5")).astype("float64")
gmm.weights = np.exp(load_array(resource_filename("bob.learn.em", "data/weightsAfterKMeans.hdf5")).astype("float64"))
threshold = 0.001
gmm.variance_thresholds = threshold
# Initialize ML Trainer
gmm.mean_var_update_threshold = 0.001
gmm.max_fitting_steps = 25
gmm.convergence_threshold = 0.00001
gmm.update_means = True
gmm.update_variances = True
gmm.update_weights = True
# Run ML
gmm.fit(ar)
# Test results
# Load torch3vision reference
meansML_ref = load_array(resource_filename("bob.learn.em", "data/meansAfterML.hdf5"))
variancesML_ref = load_array(resource_filename("bob.learn.em", "data/variancesAfterML.hdf5"))
weightsML_ref = load_array(resource_filename("bob.learn.em", "data/weightsAfterML.hdf5"))
# Compare to current results
np.testing.assert_allclose(gmm.means, meansML_ref, atol=3e-3)
np.testing.assert_allclose(gmm.variances, variancesML_ref, atol=3e-3)
np.testing.assert_allclose(gmm.weights, weightsML_ref, atol=1e-4)
def test_gmm_MAP_1(): def test_gmm_MAP_1():
"""Train a GMMMachine with MAP_GMMTrainer""" """Train a GMMMachine with MAP_GMMTrainer"""
ar = load_array(resource_filename("bob.learn.em", "data/faithful.torch3_f64.hdf5")) ar = load_array(resource_filename("bob.learn.em", "data/faithful.torch3_f64.hdf5"))
...@@ -788,7 +777,7 @@ def test_gmm_MAP_3(): ...@@ -788,7 +777,7 @@ def test_gmm_MAP_3():
def test_gmm_test(): def test_gmm_test():
""" Tests a GMMMachine by computing scores against a model and comparing to a reference """Tests a GMMMachine by computing scores against a model and comparing to a reference
""" """
ar = load_array(resource_filename("bob.learn.em", "data/dataforMAP.hdf5")) ar = load_array(resource_filename("bob.learn.em", "data/dataforMAP.hdf5"))
...@@ -810,3 +799,88 @@ def test_gmm_test(): ...@@ -810,3 +799,88 @@ def test_gmm_test():
# Compare current results to torch3vision # Compare current results to torch3vision
assert abs(score-score_mean_ref)/score_mean_ref<1e-4 assert abs(score-score_mean_ref)/score_mean_ref<1e-4
def test_gmm_ML_dask():
"""Trains a GMMMachine with dask array data; compares to a reference"""
ar = da.array(load_array(resource_filename("bob.learn.em", "data/dataNormalized.hdf5")))
# Initialize GMMMachine
gmm = GMMMachine(n_gaussians=5)
gmm.means = load_array(resource_filename("bob.learn.em", "data/meansAfterKMeans.hdf5")).astype("float64")
gmm.variances = load_array(resource_filename("bob.learn.em", "data/variancesAfterKMeans.hdf5")).astype("float64")
gmm.weights = np.exp(load_array(resource_filename("bob.learn.em", "data/weightsAfterKMeans.hdf5")).astype("float64"))
threshold = 0.001
gmm.variance_thresholds = threshold
# Initialize ML Trainer
gmm.mean_var_update_threshold = 0.001
gmm.max_fitting_steps = 25
gmm.convergence_threshold = 0.00001
gmm.update_means = True
gmm.update_variances = True
gmm.update_weights = True
# Run ML
gmm.fit(ar)
# Test results
# Load torch3vision reference
meansML_ref = load_array(resource_filename("bob.learn.em", "data/meansAfterML.hdf5"))
variancesML_ref = load_array(resource_filename("bob.learn.em", "data/variancesAfterML.hdf5"))
weightsML_ref = load_array(resource_filename("bob.learn.em", "data/weightsAfterML.hdf5"))
# Compare to current results
np.testing.assert_allclose(gmm.means, meansML_ref, atol=3e-3)
np.testing.assert_allclose(gmm.variances, variancesML_ref, atol=3e-3)
np.testing.assert_allclose(gmm.weights, weightsML_ref, atol=1e-4)
def test_gmm_MAP_dask():
"""Test a GMMMachine for MAP with a dask array as data."""
ar = da.array(load_array(resource_filename("bob.learn.em", "data/dataforMAP.hdf5")))
# Initialize GMMMachine
n_gaussians = 5
prior_gmm = GMMMachine(n_gaussians)
prior_gmm.means = load_array(resource_filename("bob.learn.em", "data/meansAfterML.hdf5"))
prior_gmm.variances = load_array(resource_filename("bob.learn.em", "data/variancesAfterML.hdf5"))
prior_gmm.weights = load_array(resource_filename("bob.learn.em", "data/weightsAfterML.hdf5"))
threshold = 0.001
prior_gmm.variance_thresholds = threshold
# Initialize MAP Trainer
prior = 0.001
accuracy = 0.00001
gmm = GMMMachine(
n_gaussians,
trainer="map",
ubm=prior_gmm,
convergence_threshold=prior,
max_fitting_steps=1,
update_means=True,
update_variances=False,
update_weights=False,
mean_var_update_threshold=accuracy,
relevance_factor=None,
)
gmm.variance_thresholds = threshold
# Train
gmm = gmm.fit(ar)
# Test results
# Load torch3vision reference
meansMAP_ref = load_array(resource_filename("bob.learn.em", "data/meansAfterMAP.hdf5"))
variancesMAP_ref = load_array(resource_filename("bob.learn.em", "data/variancesAfterMAP.hdf5"))
weightsMAP_ref = load_array(resource_filename("bob.learn.em", "data/weightsAfterMAP.hdf5"))
# Compare to current results
# Gaps are quite large. This might be explained by the fact that there is no
# adaptation of a given Gaussian in torch3 when the corresponding responsibilities
# are below the responsibilities threshold
np.testing.assert_allclose(gmm.means, meansMAP_ref, atol=2e-1)
np.testing.assert_allclose(gmm.variances, variancesMAP_ref, atol=1e-4)
np.testing.assert_allclose(gmm.weights, weightsMAP_ref, atol=1e-4)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment