From 852c3d8f7cd82f766b5824bbab73beffd8388e3f Mon Sep 17 00:00:00 2001 From: Yannick DAYER <yannick.dayer@idiap.ch> Date: Fri, 26 Nov 2021 16:24:44 +0100 Subject: [PATCH] [black][isort] Refactor --- bob/learn/em/__init__.py | 4 +- bob/learn/em/cluster/k_means.py | 8 +- bob/learn/em/test/test_gaussian.py | 184 ++++----- bob/learn/em/test/test_gmm.py | 477 ++++++++++++------------ bob/learn/em/test/test_kmeans.py | 18 +- bob/learn/em/test/test_linearscoring.py | 351 +++++++++++------ bob/learn/em/test/test_picklability.py | 9 +- 7 files changed, 600 insertions(+), 451 deletions(-) diff --git a/bob/learn/em/__init__.py b/bob/learn/em/__init__.py index 99d4c77..472a7d9 100644 --- a/bob/learn/em/__init__.py +++ b/bob/learn/em/__init__.py @@ -1,7 +1,8 @@ +import bob.extension + from . import cluster from . import mixture -import bob.extension def get_config(): """Returns a string containing the configuration information.""" @@ -10,4 +11,3 @@ def get_config(): # gets sphinx autodoc done right - don't remove it __all__ = [_ for _ in dir() if not _.startswith("_")] - diff --git a/bob/learn/em/cluster/k_means.py b/bob/learn/em/cluster/k_means.py index fcf1050..98418b4 100644 --- a/bob/learn/em/cluster/k_means.py +++ b/bob/learn/em/cluster/k_means.py @@ -3,11 +3,11 @@ # @date: Tue 27 Jul 2021 11:04:10 UTC+02 import logging -from typing import Union from typing import Tuple +from typing import Union -import numpy as np import dask.array as da +import numpy as np from dask_ml.cluster.k_means import k_init from sklearn.base import BaseEstimator @@ -152,7 +152,9 @@ class KMeansMachine(BaseEstimator): """ if trainer is None: logger.info("Using default k-means trainer.") - trainer = KMeansTrainer(init_method="k-means||", random_state=self.random_state) + trainer = KMeansTrainer( + init_method="k-means||", random_state=self.random_state + ) logger.debug(f"Initializing trainer.") trainer.initialize( diff --git a/bob/learn/em/test/test_gaussian.py b/bob/learn/em/test/test_gaussian.py index 0e64705..db01890 100644 --- a/bob/learn/em/test/test_gaussian.py +++ b/bob/learn/em/test/test_gaussian.py @@ -9,104 +9,106 @@ """ import os -import numpy import tempfile -import bob.io.base +import numpy +import bob.io.base from bob.learn.em import Gaussian + def equals(x, y, epsilon): - return (abs(x - y) < epsilon) + return abs(x - y) < epsilon + def test_GaussianNormal(): - # Test the likelihood computation of a simple normal Gaussian - gaussian = Gaussian(2) - # By default, initialized with zero mean and unit variance - logLH = gaussian.log_likelihood(numpy.array([0.4, 0.2], 'float64')) - assert equals(logLH, -1.93787706641, 1e-10) + # Test the likelihood computation of a simple normal Gaussian + gaussian = Gaussian(2) + # By default, initialized with zero mean and unit variance + logLH = gaussian.log_likelihood(numpy.array([0.4, 0.2], "float64")) + assert equals(logLH, -1.93787706641, 1e-10) + def test_GaussianMachine(): - # Test a GaussianMachine more thoroughly - - # Initializes a Gaussian with zero mean and unit variance - g = Gaussian(3) - assert (g.mean == 0.0).all() - assert (g.variance == 1.0).all() - assert g.shape == (3,) - - # Set and check mean, variance, variance thresholds - mean = numpy.array([0, 1, 2], 'float64') - variance = numpy.array([3, 2, 1], 'float64') - g.mean = mean - g.variance = variance - g.set_variance_thresholds(0.0005) - assert (g.mean == mean).all() - assert (g.variance == variance).all() - assert (g.variance_thresholds == 0.0005).all() - - # Save and read from file - filename = str(tempfile.mkstemp(".hdf5")[1]) - g.save(bob.io.base.HDF5File(filename, 'w')) - g_loaded = Gaussian(bob.io.base.HDF5File(filename)) - assert g == g_loaded - assert (g != g_loaded ) is False - assert g.is_similar_to(g_loaded) - - # Save and read from file using the keyword argument - filename = str(tempfile.mkstemp(".hdf5")[1]) - g.save(hdf5=bob.io.base.HDF5File(filename, 'w')) - g_loaded = Gaussian(hdf5=bob.io.base.HDF5File(filename)) - assert g == g_loaded - assert (g != g_loaded ) is False - assert g.is_similar_to(g_loaded) - - # Save and loading from file using the keyword argument - filename = str(tempfile.mkstemp(".hdf5")[1]) - g.save(bob.io.base.HDF5File(filename, 'w')) - g_loaded = bob.learn.em.Gaussian() - g_loaded.load(bob.io.base.HDF5File(filename)) - assert g == g_loaded - assert (g != g_loaded ) is False - assert g.is_similar_to(g_loaded) - - # Save and loading from file using the keyword argument - filename = str(tempfile.mkstemp(".hdf5")[1]) - g.save(bob.io.base.HDF5File(filename, 'w')) - g_loaded = bob.learn.em.Gaussian() - g_loaded.load(hdf5=bob.io.base.HDF5File(filename)) - assert g == g_loaded - assert (g != g_loaded ) is False - assert g.is_similar_to(g_loaded) - - - # Make them different - g_loaded.set_variance_thresholds(0.001) - assert (g == g_loaded ) is False - assert g != g_loaded - - # Check likelihood computation - sample1 = numpy.array([0, 1, 2], 'float64') - sample2 = numpy.array([1, 2, 3], 'float64') - sample3 = numpy.array([2, 3, 4], 'float64') - ref1 = -3.652695334228046 - ref2 = -4.569362000894712 - ref3 = -7.319362000894712 - eps = 1e-10 - assert equals(g.log_likelihood(sample1), ref1, eps) - assert equals(g.log_likelihood(sample2), ref2, eps) - assert equals(g.log_likelihood(sample3), ref3, eps) - - # Check resize and assignment - g.resize(5) - assert g.shape == (5,) - g2 = Gaussian() - g2 = g - assert g == g2 - assert (g != g2 ) is False - g3 = Gaussian(g) - assert g == g3 - assert (g != g3 ) is False - - # Clean-up - os.unlink(filename) + # Test a GaussianMachine more thoroughly + + # Initializes a Gaussian with zero mean and unit variance + g = Gaussian(3) + assert (g.mean == 0.0).all() + assert (g.variance == 1.0).all() + assert g.shape == (3,) + + # Set and check mean, variance, variance thresholds + mean = numpy.array([0, 1, 2], "float64") + variance = numpy.array([3, 2, 1], "float64") + g.mean = mean + g.variance = variance + g.set_variance_thresholds(0.0005) + assert (g.mean == mean).all() + assert (g.variance == variance).all() + assert (g.variance_thresholds == 0.0005).all() + + # Save and read from file + filename = str(tempfile.mkstemp(".hdf5")[1]) + g.save(bob.io.base.HDF5File(filename, "w")) + g_loaded = Gaussian(bob.io.base.HDF5File(filename)) + assert g == g_loaded + assert (g != g_loaded) is False + assert g.is_similar_to(g_loaded) + + # Save and read from file using the keyword argument + filename = str(tempfile.mkstemp(".hdf5")[1]) + g.save(hdf5=bob.io.base.HDF5File(filename, "w")) + g_loaded = Gaussian(hdf5=bob.io.base.HDF5File(filename)) + assert g == g_loaded + assert (g != g_loaded) is False + assert g.is_similar_to(g_loaded) + + # Save and loading from file using the keyword argument + filename = str(tempfile.mkstemp(".hdf5")[1]) + g.save(bob.io.base.HDF5File(filename, "w")) + g_loaded = bob.learn.em.Gaussian() + g_loaded.load(bob.io.base.HDF5File(filename)) + assert g == g_loaded + assert (g != g_loaded) is False + assert g.is_similar_to(g_loaded) + + # Save and loading from file using the keyword argument + filename = str(tempfile.mkstemp(".hdf5")[1]) + g.save(bob.io.base.HDF5File(filename, "w")) + g_loaded = bob.learn.em.Gaussian() + g_loaded.load(hdf5=bob.io.base.HDF5File(filename)) + assert g == g_loaded + assert (g != g_loaded) is False + assert g.is_similar_to(g_loaded) + + # Make them different + g_loaded.set_variance_thresholds(0.001) + assert (g == g_loaded) is False + assert g != g_loaded + + # Check likelihood computation + sample1 = numpy.array([0, 1, 2], "float64") + sample2 = numpy.array([1, 2, 3], "float64") + sample3 = numpy.array([2, 3, 4], "float64") + ref1 = -3.652695334228046 + ref2 = -4.569362000894712 + ref3 = -7.319362000894712 + eps = 1e-10 + assert equals(g.log_likelihood(sample1), ref1, eps) + assert equals(g.log_likelihood(sample2), ref2, eps) + assert equals(g.log_likelihood(sample3), ref3, eps) + + # Check resize and assignment + g.resize(5) + assert g.shape == (5,) + g2 = Gaussian() + g2 = g + assert g == g2 + assert (g != g2) is False + g3 = Gaussian(g) + assert g == g3 + assert (g != g3) is False + + # Clean-up + os.unlink(filename) diff --git a/bob/learn/em/test/test_gmm.py b/bob/learn/em/test/test_gmm.py index 1733c66..0d2bd2f 100644 --- a/bob/learn/em/test/test_gmm.py +++ b/bob/learn/em/test/test_gmm.py @@ -9,255 +9,266 @@ """ import os -import numpy import tempfile +import numpy + import bob.io.base from bob.io.base.test_utils import datafile +from bob.learn.em import GMMMachine +from bob.learn.em import GMMStats -from bob.learn.em import GMMStats, GMMMachine def test_GMMStats(): - # Test a GMMStats - # Initializes a GMMStats - gs = GMMStats(2,3) - log_likelihood = -3. - T = 57 - n = numpy.array([4.37, 5.31], 'float64') - sumpx = numpy.array([[1., 2., 3.], [4., 5., 6.]], 'float64') - sumpxx = numpy.array([[10., 20., 30.], [40., 50., 60.]], 'float64') - gs.log_likelihood = log_likelihood - gs.t = T - gs.n = n - gs.sum_px = sumpx - gs.sum_pxx = sumpxx - assert gs.log_likelihood == log_likelihood - assert gs.t == T - assert (gs.n == n).all() - assert (gs.sum_px == sumpx).all() - assert (gs.sum_pxx == sumpxx).all() - assert gs.shape==(2,3) - - # Saves and reads from file - filename = str(tempfile.mkstemp(".hdf5")[1]) - gs.save(bob.io.base.HDF5File(filename, 'w')) - gs_loaded = GMMStats(bob.io.base.HDF5File(filename)) - assert gs == gs_loaded - assert (gs != gs_loaded ) is False - assert gs.is_similar_to(gs_loaded) - - # Saves and reads from file using the keyword argument - filename = str(tempfile.mkstemp(".hdf5")[1]) - gs.save(hdf5=bob.io.base.HDF5File(filename, 'w')) - gs_loaded = GMMStats(bob.io.base.HDF5File(filename)) - assert gs == gs_loaded - assert (gs != gs_loaded ) is False - assert gs.is_similar_to(gs_loaded) - - # Saves and load from file using the keyword argument - filename = str(tempfile.mkstemp(".hdf5")[1]) - gs.save(hdf5=bob.io.base.HDF5File(filename, 'w')) - gs_loaded = GMMStats() - gs_loaded.load(bob.io.base.HDF5File(filename)) - assert gs == gs_loaded - assert (gs != gs_loaded ) is False - assert gs.is_similar_to(gs_loaded) - - # Saves and load from file using the keyword argument - filename = str(tempfile.mkstemp(".hdf5")[1]) - gs.save(hdf5=bob.io.base.HDF5File(filename, 'w')) - gs_loaded = GMMStats() - gs_loaded.load(hdf5=bob.io.base.HDF5File(filename)) - assert gs == gs_loaded - assert (gs != gs_loaded ) is False - assert gs.is_similar_to(gs_loaded) - - - # Makes them different - gs_loaded.t = 58 - assert (gs == gs_loaded ) is False - assert gs != gs_loaded - assert (gs.is_similar_to(gs_loaded)) is False - # Accumulates from another GMMStats - gs2 = GMMStats(2,3) - gs2.log_likelihood = log_likelihood - gs2.t = T - gs2.n = n - gs2.sum_px = sumpx - gs2.sum_pxx = sumpxx - gs2 += gs - eps = 1e-8 - assert gs2.log_likelihood == 2*log_likelihood - assert gs2.t == 2*T - assert numpy.allclose(gs2.n, 2*n, eps) - assert numpy.allclose(gs2.sum_px, 2*sumpx, eps) - assert numpy.allclose(gs2.sum_pxx, 2*sumpxx, eps) - - # Reinit and checks for zeros - gs_loaded.init() - assert gs_loaded.log_likelihood == 0 - assert gs_loaded.t == 0 - assert (gs_loaded.n == 0).all() - assert (gs_loaded.sum_px == 0).all() - assert (gs_loaded.sum_pxx == 0).all() - # Resize and checks size - assert gs_loaded.shape==(2,3) - gs_loaded.resize(4,5) - assert gs_loaded.shape==(4,5) - assert gs_loaded.sum_px.shape[0] == 4 - assert gs_loaded.sum_px.shape[1] == 5 - - # Clean-up - os.unlink(filename) + # Test a GMMStats + # Initializes a GMMStats + gs = GMMStats(2, 3) + log_likelihood = -3.0 + T = 57 + n = numpy.array([4.37, 5.31], "float64") + sumpx = numpy.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]], "float64") + sumpxx = numpy.array([[10.0, 20.0, 30.0], [40.0, 50.0, 60.0]], "float64") + gs.log_likelihood = log_likelihood + gs.t = T + gs.n = n + gs.sum_px = sumpx + gs.sum_pxx = sumpxx + assert gs.log_likelihood == log_likelihood + assert gs.t == T + assert (gs.n == n).all() + assert (gs.sum_px == sumpx).all() + assert (gs.sum_pxx == sumpxx).all() + assert gs.shape == (2, 3) + + # Saves and reads from file + filename = str(tempfile.mkstemp(".hdf5")[1]) + gs.save(bob.io.base.HDF5File(filename, "w")) + gs_loaded = GMMStats(bob.io.base.HDF5File(filename)) + assert gs == gs_loaded + assert (gs != gs_loaded) is False + assert gs.is_similar_to(gs_loaded) + + # Saves and reads from file using the keyword argument + filename = str(tempfile.mkstemp(".hdf5")[1]) + gs.save(hdf5=bob.io.base.HDF5File(filename, "w")) + gs_loaded = GMMStats(bob.io.base.HDF5File(filename)) + assert gs == gs_loaded + assert (gs != gs_loaded) is False + assert gs.is_similar_to(gs_loaded) + + # Saves and load from file using the keyword argument + filename = str(tempfile.mkstemp(".hdf5")[1]) + gs.save(hdf5=bob.io.base.HDF5File(filename, "w")) + gs_loaded = GMMStats() + gs_loaded.load(bob.io.base.HDF5File(filename)) + assert gs == gs_loaded + assert (gs != gs_loaded) is False + assert gs.is_similar_to(gs_loaded) + + # Saves and load from file using the keyword argument + filename = str(tempfile.mkstemp(".hdf5")[1]) + gs.save(hdf5=bob.io.base.HDF5File(filename, "w")) + gs_loaded = GMMStats() + gs_loaded.load(hdf5=bob.io.base.HDF5File(filename)) + assert gs == gs_loaded + assert (gs != gs_loaded) is False + assert gs.is_similar_to(gs_loaded) + + # Makes them different + gs_loaded.t = 58 + assert (gs == gs_loaded) is False + assert gs != gs_loaded + assert (gs.is_similar_to(gs_loaded)) is False + # Accumulates from another GMMStats + gs2 = GMMStats(2, 3) + gs2.log_likelihood = log_likelihood + gs2.t = T + gs2.n = n + gs2.sum_px = sumpx + gs2.sum_pxx = sumpxx + gs2 += gs + eps = 1e-8 + assert gs2.log_likelihood == 2 * log_likelihood + assert gs2.t == 2 * T + assert numpy.allclose(gs2.n, 2 * n, eps) + assert numpy.allclose(gs2.sum_px, 2 * sumpx, eps) + assert numpy.allclose(gs2.sum_pxx, 2 * sumpxx, eps) + + # Reinit and checks for zeros + gs_loaded.init() + assert gs_loaded.log_likelihood == 0 + assert gs_loaded.t == 0 + assert (gs_loaded.n == 0).all() + assert (gs_loaded.sum_px == 0).all() + assert (gs_loaded.sum_pxx == 0).all() + # Resize and checks size + assert gs_loaded.shape == (2, 3) + gs_loaded.resize(4, 5) + assert gs_loaded.shape == (4, 5) + assert gs_loaded.sum_px.shape[0] == 4 + assert gs_loaded.sum_px.shape[1] == 5 + + # Clean-up + os.unlink(filename) + def test_GMMMachine_1(): - # Test a GMMMachine basic features - - weights = numpy.array([0.5, 0.5], 'float64') - weights2 = numpy.array([0.6, 0.4], 'float64') - means = numpy.array([[3, 70, 0], [4, 72, 0]], 'float64') - means2 = numpy.array([[3, 7, 0], [4, 72, 0]], 'float64') - variances = numpy.array([[1, 10, 1], [2, 5, 2]], 'float64') - variances2 = numpy.array([[10, 10, 1], [2, 5, 2]], 'float64') - varianceThresholds = numpy.array([[0, 0, 0], [0, 0, 0]], 'float64') - varianceThresholds2 = numpy.array([[0.0005, 0.0005, 0.0005], [0, 0, 0]], 'float64') - - # Initializes a GMMMachine - gmm = GMMMachine(2,3) - # Sets the weights, means, variances and varianceThresholds and - # Checks correctness - gmm.weights = weights - gmm.means = means - gmm.variances = variances - gmm.variance_thresholds = varianceThresholds - assert gmm.shape == (2,3) - assert (gmm.weights == weights).all() - assert (gmm.means == means).all() - assert (gmm.variances == variances).all() - assert (gmm.variance_thresholds == varianceThresholds).all() - - # Checks supervector-like accesses - assert (gmm.mean_supervector == means.reshape(means.size)).all() - assert (gmm.variance_supervector == variances.reshape(variances.size)).all() - newMeans = numpy.array([[3, 70, 2], [4, 72, 2]], 'float64') - newVariances = numpy.array([[1, 1, 1], [2, 2, 2]], 'float64') - - - # Checks particular varianceThresholds-related methods - varianceThresholds1D = numpy.array([0.3, 1, 0.5], 'float64') - gmm.set_variance_thresholds(varianceThresholds1D) - assert (gmm.variance_thresholds[0,:] == varianceThresholds1D).all() - assert (gmm.variance_thresholds[1,:] == varianceThresholds1D).all() - - gmm.set_variance_thresholds(0.005) - assert (gmm.variance_thresholds == 0.005).all() - - # Checks Gaussians access - gmm.means = newMeans - gmm.variances = newVariances - assert (gmm.get_gaussian(0).mean == newMeans[0,:]).all() - assert (gmm.get_gaussian(1).mean == newMeans[1,:]).all() - assert (gmm.get_gaussian(0).variance == newVariances[0,:]).all() - assert (gmm.get_gaussian(1).variance == newVariances[1,:]).all() - - # Checks resize - gmm.resize(4,5) - assert gmm.shape == (4,5) - - # Checks comparison - gmm2 = GMMMachine(gmm) - gmm3 = GMMMachine(2,3) - gmm3.weights = weights2 - gmm3.means = means - gmm3.variances = variances - #gmm3.varianceThresholds = varianceThresholds - gmm4 = GMMMachine(2,3) - gmm4.weights = weights - gmm4.means = means2 - gmm4.variances = variances - #gmm4.varianceThresholds = varianceThresholds - gmm5 = GMMMachine(2,3) - gmm5.weights = weights - gmm5.means = means - gmm5.variances = variances2 - #gmm5.varianceThresholds = varianceThresholds - gmm6 = GMMMachine(2,3) - gmm6.weights = weights - gmm6.means = means - gmm6.variances = variances - #gmm6.varianceThresholds = varianceThresholds2 - - assert gmm == gmm2 - assert (gmm != gmm2) is False - assert gmm.is_similar_to(gmm2) - assert gmm != gmm3 - assert (gmm == gmm3) is False - assert gmm.is_similar_to(gmm3) is False - assert gmm != gmm4 - assert (gmm == gmm4) is False - assert gmm.is_similar_to(gmm4) is False - assert gmm != gmm5 - assert (gmm == gmm5) is False - assert gmm.is_similar_to(gmm5) is False - assert gmm != gmm6 - assert (gmm == gmm6) is False - assert gmm.is_similar_to(gmm6) is False + # Test a GMMMachine basic features + + weights = numpy.array([0.5, 0.5], "float64") + weights2 = numpy.array([0.6, 0.4], "float64") + means = numpy.array([[3, 70, 0], [4, 72, 0]], "float64") + means2 = numpy.array([[3, 7, 0], [4, 72, 0]], "float64") + variances = numpy.array([[1, 10, 1], [2, 5, 2]], "float64") + variances2 = numpy.array([[10, 10, 1], [2, 5, 2]], "float64") + varianceThresholds = numpy.array([[0, 0, 0], [0, 0, 0]], "float64") + varianceThresholds2 = numpy.array([[0.0005, 0.0005, 0.0005], [0, 0, 0]], "float64") + + # Initializes a GMMMachine + gmm = GMMMachine(2, 3) + # Sets the weights, means, variances and varianceThresholds and + # Checks correctness + gmm.weights = weights + gmm.means = means + gmm.variances = variances + gmm.variance_thresholds = varianceThresholds + assert gmm.shape == (2, 3) + assert (gmm.weights == weights).all() + assert (gmm.means == means).all() + assert (gmm.variances == variances).all() + assert (gmm.variance_thresholds == varianceThresholds).all() + + # Checks supervector-like accesses + assert (gmm.mean_supervector == means.reshape(means.size)).all() + assert (gmm.variance_supervector == variances.reshape(variances.size)).all() + newMeans = numpy.array([[3, 70, 2], [4, 72, 2]], "float64") + newVariances = numpy.array([[1, 1, 1], [2, 2, 2]], "float64") + + # Checks particular varianceThresholds-related methods + varianceThresholds1D = numpy.array([0.3, 1, 0.5], "float64") + gmm.set_variance_thresholds(varianceThresholds1D) + assert (gmm.variance_thresholds[0, :] == varianceThresholds1D).all() + assert (gmm.variance_thresholds[1, :] == varianceThresholds1D).all() + + gmm.set_variance_thresholds(0.005) + assert (gmm.variance_thresholds == 0.005).all() + + # Checks Gaussians access + gmm.means = newMeans + gmm.variances = newVariances + assert (gmm.get_gaussian(0).mean == newMeans[0, :]).all() + assert (gmm.get_gaussian(1).mean == newMeans[1, :]).all() + assert (gmm.get_gaussian(0).variance == newVariances[0, :]).all() + assert (gmm.get_gaussian(1).variance == newVariances[1, :]).all() + + # Checks resize + gmm.resize(4, 5) + assert gmm.shape == (4, 5) + + # Checks comparison + gmm2 = GMMMachine(gmm) + gmm3 = GMMMachine(2, 3) + gmm3.weights = weights2 + gmm3.means = means + gmm3.variances = variances + # gmm3.varianceThresholds = varianceThresholds + gmm4 = GMMMachine(2, 3) + gmm4.weights = weights + gmm4.means = means2 + gmm4.variances = variances + # gmm4.varianceThresholds = varianceThresholds + gmm5 = GMMMachine(2, 3) + gmm5.weights = weights + gmm5.means = means + gmm5.variances = variances2 + # gmm5.varianceThresholds = varianceThresholds + gmm6 = GMMMachine(2, 3) + gmm6.weights = weights + gmm6.means = means + gmm6.variances = variances + # gmm6.varianceThresholds = varianceThresholds2 + + assert gmm == gmm2 + assert (gmm != gmm2) is False + assert gmm.is_similar_to(gmm2) + assert gmm != gmm3 + assert (gmm == gmm3) is False + assert gmm.is_similar_to(gmm3) is False + assert gmm != gmm4 + assert (gmm == gmm4) is False + assert gmm.is_similar_to(gmm4) is False + assert gmm != gmm5 + assert (gmm == gmm5) is False + assert gmm.is_similar_to(gmm5) is False + assert gmm != gmm6 + assert (gmm == gmm6) is False + assert gmm.is_similar_to(gmm6) is False + def test_GMMMachine_2(): - # Test a GMMMachine (statistics) + # Test a GMMMachine (statistics) + + arrayset = bob.io.base.load( + datafile("faithful.torch3_f64.hdf5", __name__, path="../data/") + ) + gmm = GMMMachine(2, 2) + gmm.weights = numpy.array([0.5, 0.5], "float64") + gmm.means = numpy.array([[3, 70], [4, 72]], "float64") + gmm.variances = numpy.array([[1, 10], [2, 5]], "float64") + gmm.variance_thresholds = numpy.array([[0, 0], [0, 0]], "float64") - arrayset = bob.io.base.load(datafile("faithful.torch3_f64.hdf5", __name__, path="../data/")) - gmm = GMMMachine(2, 2) - gmm.weights = numpy.array([0.5, 0.5], 'float64') - gmm.means = numpy.array([[3, 70], [4, 72]], 'float64') - gmm.variances = numpy.array([[1, 10], [2, 5]], 'float64') - gmm.variance_thresholds = numpy.array([[0, 0], [0, 0]], 'float64') + stats = GMMStats(2, 2) + gmm.acc_statistics(arrayset, stats) - stats = GMMStats(2, 2) - gmm.acc_statistics(arrayset, stats) + stats_ref = GMMStats( + bob.io.base.HDF5File(datafile("stats.hdf5", __name__, path="../data/")) + ) - stats_ref = GMMStats(bob.io.base.HDF5File(datafile("stats.hdf5",__name__, path="../data/"))) + assert stats.t == stats_ref.t + assert numpy.allclose(stats.n, stats_ref.n, atol=1e-10) + # assert numpy.array_equal(stats.sumPx, stats_ref.sumPx) + # Note AA: precision error above + assert numpy.allclose(stats.sum_px, stats_ref.sum_px, atol=1e-10) + assert numpy.allclose(stats.sum_pxx, stats_ref.sum_pxx, atol=1e-10) - assert stats.t == stats_ref.t - assert numpy.allclose(stats.n, stats_ref.n, atol=1e-10) - #assert numpy.array_equal(stats.sumPx, stats_ref.sumPx) - #Note AA: precision error above - assert numpy.allclose(stats.sum_px, stats_ref.sum_px, atol=1e-10) - assert numpy.allclose(stats.sum_pxx, stats_ref.sum_pxx, atol=1e-10) def test_GMMMachine_3(): - # Test a GMMMachine (log-likelihood computation) - - data = bob.io.base.load(datafile('data.hdf5', __name__, path="../data/")) - gmm = GMMMachine(2, 50) - gmm.weights = bob.io.base.load(datafile('weights.hdf5', __name__, path="../data/")) - gmm.means = bob.io.base.load(datafile('means.hdf5', __name__, path="../data/")) - gmm.variances = bob.io.base.load(datafile('variances.hdf5', __name__, path="../data/")) - - # Compare the log-likelihood with the one obtained using Chris Matlab - # implementation - matlab_ll_ref = -2.361583051672024e+02 - assert abs(gmm(data) - matlab_ll_ref) < 1e-10 - - + # Test a GMMMachine (log-likelihood computation) + + data = bob.io.base.load(datafile("data.hdf5", __name__, path="../data/")) + gmm = GMMMachine(2, 50) + gmm.weights = bob.io.base.load(datafile("weights.hdf5", __name__, path="../data/")) + gmm.means = bob.io.base.load(datafile("means.hdf5", __name__, path="../data/")) + gmm.variances = bob.io.base.load( + datafile("variances.hdf5", __name__, path="../data/") + ) + + # Compare the log-likelihood with the one obtained using Chris Matlab + # implementation + matlab_ll_ref = -2.361583051672024e02 + assert abs(gmm(data) - matlab_ll_ref) < 1e-10 + + def test_GMMMachine_4(): - import numpy - numpy.random.seed(3) # FIXING A SEED - - data = numpy.random.rand(100,50) #Doesn't matter if it is ramdom. The average of 1D array (in python) MUST output the same result for the 2D array (in C++) - - gmm = GMMMachine(2, 50) - gmm.weights = bob.io.base.load(datafile('weights.hdf5', __name__, path="../data/")) - gmm.means = bob.io.base.load(datafile('means.hdf5', __name__, path="../data/")) - gmm.variances = bob.io.base.load(datafile('variances.hdf5', __name__, path="../data/")) - - - ll = 0 - for i in range(data.shape[0]): - ll += gmm(data[i,:]) - ll /= data.shape[0] - - assert ll==gmm(data) - - + import numpy + + numpy.random.seed(3) # FIXING A SEED + + data = numpy.random.rand( + 100, 50 + ) # Doesn't matter if it is ramdom. The average of 1D array (in python) MUST output the same result for the 2D array (in C++) + + gmm = GMMMachine(2, 50) + gmm.weights = bob.io.base.load(datafile("weights.hdf5", __name__, path="../data/")) + gmm.means = bob.io.base.load(datafile("means.hdf5", __name__, path="../data/")) + gmm.variances = bob.io.base.load( + datafile("variances.hdf5", __name__, path="../data/") + ) + + ll = 0 + for i in range(data.shape[0]): + ll += gmm(data[i, :]) + ll /= data.shape[0] + + assert ll == gmm(data) diff --git a/bob/learn/em/test/test_kmeans.py b/bob/learn/em/test/test_kmeans.py index 0cadba3..2bbd1a3 100644 --- a/bob/learn/em/test/test_kmeans.py +++ b/bob/learn/em/test/test_kmeans.py @@ -8,13 +8,12 @@ """Tests the KMeans machine """ +import dask.array as da import numpy as np from bob.learn.em.cluster import KMeansMachine from bob.learn.em.cluster import KMeansTrainer -import dask.array as da - def test_KMeansMachine(): # Test a KMeansMachine @@ -57,7 +56,10 @@ def test_kmeans_fit(): data2 = da.random.normal(loc=-1, size=(2000, 3)) data = da.concatenate([data1, data2], axis=0) machine = KMeansMachine(2, random_state=0).fit(data) - expected = [[1.00426431, 1.00359693, 1.05996704], [-0.99262315, -1.05226141, -1.00525245]] + expected = [ + [1.00426431, 1.00359693, 1.05996704], + [-0.99262315, -1.05226141, -1.00525245], + ] np.testing.assert_almost_equal(machine.centroids_, expected) @@ -68,7 +70,10 @@ def test_kmeans_fit_init_pp(): data = da.concatenate([data1, data2], axis=0) trainer = KMeansTrainer(init_method="k-means++", random_state=0) machine = KMeansMachine(2).fit(data, trainer=trainer) - expected = [[-0.99262315, -1.05226141, -1.00525245], [1.00426431, 1.00359693, 1.05996704]] + expected = [ + [-0.99262315, -1.05226141, -1.00525245], + [1.00426431, 1.00359693, 1.05996704], + ] np.testing.assert_almost_equal(machine.centroids_, expected) @@ -79,5 +84,8 @@ def test_kmeans_fit_init_random(): data = da.concatenate([data1, data2], axis=0) trainer = KMeansTrainer(init_method="random", random_state=0) machine = KMeansMachine(2).fit(data, trainer=trainer) - expected = [[-0.99433738, -1.05561588, -1.01236246], [0.99800688, 0.99873325, 1.05879539]] + expected = [ + [-0.99433738, -1.05561588, -1.01236246], + [0.99800688, 0.99873325, 1.05879539], + ] np.testing.assert_almost_equal(machine.centroids_, expected) diff --git a/bob/learn/em/test/test_linearscoring.py b/bob/learn/em/test/test_linearscoring.py index 9958e20..eb870ad 100644 --- a/bob/learn/em/test/test_linearscoring.py +++ b/bob/learn/em/test/test_linearscoring.py @@ -10,119 +10,244 @@ import numpy -from bob.learn.em import GMMMachine, GMMStats, linear_scoring +from bob.learn.em import GMMMachine +from bob.learn.em import GMMStats +from bob.learn.em import linear_scoring -def test_LinearScoring(): - ubm = GMMMachine(2, 2) - ubm.weights = numpy.array([0.5, 0.5], 'float64') - ubm.means = numpy.array([[3, 70], [4, 72]], 'float64') - ubm.variances = numpy.array([[1, 10], [2, 5]], 'float64') - ubm.variance_thresholds = numpy.array([[0, 0], [0, 0]], 'float64') - - model1 = GMMMachine(2, 2) - model1.weights = numpy.array([0.5, 0.5], 'float64') - model1.means = numpy.array([[1, 2], [3, 4]], 'float64') - model1.variances = numpy.array([[9, 10], [11, 12]], 'float64') - model1.variance_thresholds = numpy.array([[0, 0], [0, 0]], 'float64') - - model2 = GMMMachine(2, 2) - model2.weights = numpy.array([0.5, 0.5], 'float64') - model2.means = numpy.array([[5, 6], [7, 8]], 'float64') - model2.variances = numpy.array([[13, 14], [15, 16]], 'float64') - model2.variance_thresholds = numpy.array([[0, 0], [0, 0]], 'float64') - - stats1 = GMMStats(2, 2) - stats1.sum_px = numpy.array([[1, 2], [3, 4]], 'float64') - stats1.n = numpy.array([1, 2], 'float64') - stats1.t = 1+2 - - stats2 = GMMStats(2, 2) - stats2.sum_px = numpy.array([[5, 6], [7, 8]], 'float64') - stats2.n = numpy.array([3, 4], 'float64') - stats2.t = 3+4 - - stats3 = GMMStats(2, 2) - stats3.sum_px = numpy.array([[5, 6], [7, 3]], 'float64') - stats3.n = numpy.array([3, 4], 'float64') - stats3.t = 3+4 - - test_channeloffset = [numpy.array([9, 8, 7, 6], 'float64'), numpy.array([5, 4, 3, 2], 'float64'), numpy.array([1, 0, 1, 2], 'float64')] - - # Reference scores (from Idiap internal matlab implementation) - ref_scores_00 = numpy.array([[2372.9, 5207.7, 5275.7], [2215.7, 4868.1, 4932.1]], 'float64') - ref_scores_01 = numpy.array( [[790.9666666666667, 743.9571428571428, 753.6714285714285], [738.5666666666667, 695.4428571428572, 704.5857142857144]], 'float64') - ref_scores_10 = numpy.array([[2615.5, 5434.1, 5392.5], [2381.5, 4999.3, 5022.5]], 'float64') - ref_scores_11 = numpy.array([[871.8333333333332, 776.3000000000001, 770.3571428571427], [793.8333333333333, 714.1857142857143, 717.5000000000000]], 'float64') - - - # 1/ Use GMMMachines - # 1/a/ Without test_channelOffset, without frame-length normalisation - scores = linear_scoring([model1, model2], ubm, [stats1, stats2, stats3]) - assert (abs(scores - ref_scores_00) < 1e-7).all() - - # 1/b/ Without test_channelOffset, with frame-length normalisation - scores = linear_scoring([model1, model2], ubm, [stats1, stats2, stats3], [], True) - assert (abs(scores - ref_scores_01) < 1e-7).all() - #scores = linear_scoring([model1, model2], ubm, [stats1, stats2, stats3], (), True) - #assert (abs(scores - ref_scores_01) < 1e-7).all() - #scores = linear_scoring([model1, model2], ubm, [stats1, stats2, stats3], None, True) - #assert (abs(scores - ref_scores_01) < 1e-7).all() - - # 1/c/ With test_channelOffset, without frame-length normalisation - scores = linear_scoring([model1, model2], ubm, [stats1, stats2, stats3], test_channeloffset) - assert (abs(scores - ref_scores_10) < 1e-7).all() - - # 1/d/ With test_channelOffset, with frame-length normalisation - scores = linear_scoring([model1, model2], ubm, [stats1, stats2, stats3], test_channeloffset, True) - assert (abs(scores - ref_scores_11) < 1e-7).all() - - - # 2/ Use mean/variance supervectors - # 2/a/ Without test_channelOffset, without frame-length normalisation - scores = linear_scoring([model1.mean_supervector, model2.mean_supervector], ubm.mean_supervector, ubm.variance_supervector, [stats1, stats2, stats3]) - assert (abs(scores - ref_scores_00) < 1e-7).all() - - # 2/b/ Without test_channelOffset, with frame-length normalisation - scores = linear_scoring([model1.mean_supervector, model2.mean_supervector], ubm.mean_supervector, ubm.variance_supervector, [stats1, stats2, stats3], [], True) - assert (abs(scores - ref_scores_01) < 1e-7).all() - - # 2/c/ With test_channelOffset, without frame-length normalisation - scores = linear_scoring([model1.mean_supervector, model2.mean_supervector], ubm.mean_supervector, ubm.variance_supervector, [stats1, stats2, stats3], test_channeloffset) - assert (abs(scores - ref_scores_10) < 1e-7).all() - - # 2/d/ With test_channelOffset, with frame-length normalisation - scores = linear_scoring([model1.mean_supervector, model2.mean_supervector], ubm.mean_supervector, ubm.variance_supervector, [stats1, stats2, stats3], test_channeloffset, True) - assert (abs(scores - ref_scores_11) < 1e-7).all() - - - # 3/ Using single model/sample - # 3/a/ without frame-length normalisation - score = linear_scoring(model1.mean_supervector, ubm.mean_supervector, ubm.variance_supervector, stats1, test_channeloffset[0]) - assert abs(score - ref_scores_10[0,0]) < 1e-7 - score = linear_scoring(model1.mean_supervector, ubm.mean_supervector, ubm.variance_supervector, stats2, test_channeloffset[1]) - assert abs(score - ref_scores_10[0,1]) < 1e-7 - score = linear_scoring(model1.mean_supervector, ubm.mean_supervector, ubm.variance_supervector, stats3, test_channeloffset[2]) - assert abs(score - ref_scores_10[0,2]) < 1e-7 - score = linear_scoring(model2.mean_supervector, ubm.mean_supervector, ubm.variance_supervector, stats1, test_channeloffset[0]) - assert abs(score - ref_scores_10[1,0]) < 1e-7 - score = linear_scoring(model2.mean_supervector, ubm.mean_supervector, ubm.variance_supervector, stats2, test_channeloffset[1]) - assert abs(score - ref_scores_10[1,1]) < 1e-7 - score = linear_scoring(model2.mean_supervector, ubm.mean_supervector, ubm.variance_supervector, stats3, test_channeloffset[2]) - assert abs(score - ref_scores_10[1,2]) < 1e-7 - - - # 3/b/ without frame-length normalisation - score = linear_scoring(model1.mean_supervector, ubm.mean_supervector, ubm.variance_supervector, stats1, test_channeloffset[0], True) - assert abs(score - ref_scores_11[0,0]) < 1e-7 - score = linear_scoring(model1.mean_supervector, ubm.mean_supervector, ubm.variance_supervector, stats2, test_channeloffset[1], True) - assert abs(score - ref_scores_11[0,1]) < 1e-7 - score = linear_scoring(model1.mean_supervector, ubm.mean_supervector, ubm.variance_supervector, stats3, test_channeloffset[2], True) - assert abs(score - ref_scores_11[0,2]) < 1e-7 - score = linear_scoring(model2.mean_supervector, ubm.mean_supervector, ubm.variance_supervector, stats1, test_channeloffset[0], True) - assert abs(score - ref_scores_11[1,0]) < 1e-7 - score = linear_scoring(model2.mean_supervector, ubm.mean_supervector, ubm.variance_supervector, stats2, test_channeloffset[1], True) - assert abs(score - ref_scores_11[1,1]) < 1e-7 - score = linear_scoring(model2.mean_supervector, ubm.mean_supervector, ubm.variance_supervector, stats3, test_channeloffset[2], True) - assert abs(score - ref_scores_11[1,2]) < 1e-7 +def test_LinearScoring(): + ubm = GMMMachine(2, 2) + ubm.weights = numpy.array([0.5, 0.5], "float64") + ubm.means = numpy.array([[3, 70], [4, 72]], "float64") + ubm.variances = numpy.array([[1, 10], [2, 5]], "float64") + ubm.variance_thresholds = numpy.array([[0, 0], [0, 0]], "float64") + + model1 = GMMMachine(2, 2) + model1.weights = numpy.array([0.5, 0.5], "float64") + model1.means = numpy.array([[1, 2], [3, 4]], "float64") + model1.variances = numpy.array([[9, 10], [11, 12]], "float64") + model1.variance_thresholds = numpy.array([[0, 0], [0, 0]], "float64") + + model2 = GMMMachine(2, 2) + model2.weights = numpy.array([0.5, 0.5], "float64") + model2.means = numpy.array([[5, 6], [7, 8]], "float64") + model2.variances = numpy.array([[13, 14], [15, 16]], "float64") + model2.variance_thresholds = numpy.array([[0, 0], [0, 0]], "float64") + + stats1 = GMMStats(2, 2) + stats1.sum_px = numpy.array([[1, 2], [3, 4]], "float64") + stats1.n = numpy.array([1, 2], "float64") + stats1.t = 1 + 2 + + stats2 = GMMStats(2, 2) + stats2.sum_px = numpy.array([[5, 6], [7, 8]], "float64") + stats2.n = numpy.array([3, 4], "float64") + stats2.t = 3 + 4 + + stats3 = GMMStats(2, 2) + stats3.sum_px = numpy.array([[5, 6], [7, 3]], "float64") + stats3.n = numpy.array([3, 4], "float64") + stats3.t = 3 + 4 + + test_channeloffset = [ + numpy.array([9, 8, 7, 6], "float64"), + numpy.array([5, 4, 3, 2], "float64"), + numpy.array([1, 0, 1, 2], "float64"), + ] + + # Reference scores (from Idiap internal matlab implementation) + ref_scores_00 = numpy.array( + [[2372.9, 5207.7, 5275.7], [2215.7, 4868.1, 4932.1]], "float64" + ) + ref_scores_01 = numpy.array( + [ + [790.9666666666667, 743.9571428571428, 753.6714285714285], + [738.5666666666667, 695.4428571428572, 704.5857142857144], + ], + "float64", + ) + ref_scores_10 = numpy.array( + [[2615.5, 5434.1, 5392.5], [2381.5, 4999.3, 5022.5]], "float64" + ) + ref_scores_11 = numpy.array( + [ + [871.8333333333332, 776.3000000000001, 770.3571428571427], + [793.8333333333333, 714.1857142857143, 717.5000000000000], + ], + "float64", + ) + + # 1/ Use GMMMachines + # 1/a/ Without test_channelOffset, without frame-length normalisation + scores = linear_scoring([model1, model2], ubm, [stats1, stats2, stats3]) + assert (abs(scores - ref_scores_00) < 1e-7).all() + + # 1/b/ Without test_channelOffset, with frame-length normalisation + scores = linear_scoring([model1, model2], ubm, [stats1, stats2, stats3], [], True) + assert (abs(scores - ref_scores_01) < 1e-7).all() + # scores = linear_scoring([model1, model2], ubm, [stats1, stats2, stats3], (), True) + # assert (abs(scores - ref_scores_01) < 1e-7).all() + # scores = linear_scoring([model1, model2], ubm, [stats1, stats2, stats3], None, True) + # assert (abs(scores - ref_scores_01) < 1e-7).all() + + # 1/c/ With test_channelOffset, without frame-length normalisation + scores = linear_scoring( + [model1, model2], ubm, [stats1, stats2, stats3], test_channeloffset + ) + assert (abs(scores - ref_scores_10) < 1e-7).all() + + # 1/d/ With test_channelOffset, with frame-length normalisation + scores = linear_scoring( + [model1, model2], ubm, [stats1, stats2, stats3], test_channeloffset, True + ) + assert (abs(scores - ref_scores_11) < 1e-7).all() + + # 2/ Use mean/variance supervectors + # 2/a/ Without test_channelOffset, without frame-length normalisation + scores = linear_scoring( + [model1.mean_supervector, model2.mean_supervector], + ubm.mean_supervector, + ubm.variance_supervector, + [stats1, stats2, stats3], + ) + assert (abs(scores - ref_scores_00) < 1e-7).all() + + # 2/b/ Without test_channelOffset, with frame-length normalisation + scores = linear_scoring( + [model1.mean_supervector, model2.mean_supervector], + ubm.mean_supervector, + ubm.variance_supervector, + [stats1, stats2, stats3], + [], + True, + ) + assert (abs(scores - ref_scores_01) < 1e-7).all() + + # 2/c/ With test_channelOffset, without frame-length normalisation + scores = linear_scoring( + [model1.mean_supervector, model2.mean_supervector], + ubm.mean_supervector, + ubm.variance_supervector, + [stats1, stats2, stats3], + test_channeloffset, + ) + assert (abs(scores - ref_scores_10) < 1e-7).all() + + # 2/d/ With test_channelOffset, with frame-length normalisation + scores = linear_scoring( + [model1.mean_supervector, model2.mean_supervector], + ubm.mean_supervector, + ubm.variance_supervector, + [stats1, stats2, stats3], + test_channeloffset, + True, + ) + assert (abs(scores - ref_scores_11) < 1e-7).all() + + # 3/ Using single model/sample + # 3/a/ without frame-length normalisation + score = linear_scoring( + model1.mean_supervector, + ubm.mean_supervector, + ubm.variance_supervector, + stats1, + test_channeloffset[0], + ) + assert abs(score - ref_scores_10[0, 0]) < 1e-7 + score = linear_scoring( + model1.mean_supervector, + ubm.mean_supervector, + ubm.variance_supervector, + stats2, + test_channeloffset[1], + ) + assert abs(score - ref_scores_10[0, 1]) < 1e-7 + score = linear_scoring( + model1.mean_supervector, + ubm.mean_supervector, + ubm.variance_supervector, + stats3, + test_channeloffset[2], + ) + assert abs(score - ref_scores_10[0, 2]) < 1e-7 + score = linear_scoring( + model2.mean_supervector, + ubm.mean_supervector, + ubm.variance_supervector, + stats1, + test_channeloffset[0], + ) + assert abs(score - ref_scores_10[1, 0]) < 1e-7 + score = linear_scoring( + model2.mean_supervector, + ubm.mean_supervector, + ubm.variance_supervector, + stats2, + test_channeloffset[1], + ) + assert abs(score - ref_scores_10[1, 1]) < 1e-7 + score = linear_scoring( + model2.mean_supervector, + ubm.mean_supervector, + ubm.variance_supervector, + stats3, + test_channeloffset[2], + ) + assert abs(score - ref_scores_10[1, 2]) < 1e-7 + + # 3/b/ without frame-length normalisation + score = linear_scoring( + model1.mean_supervector, + ubm.mean_supervector, + ubm.variance_supervector, + stats1, + test_channeloffset[0], + True, + ) + assert abs(score - ref_scores_11[0, 0]) < 1e-7 + score = linear_scoring( + model1.mean_supervector, + ubm.mean_supervector, + ubm.variance_supervector, + stats2, + test_channeloffset[1], + True, + ) + assert abs(score - ref_scores_11[0, 1]) < 1e-7 + score = linear_scoring( + model1.mean_supervector, + ubm.mean_supervector, + ubm.variance_supervector, + stats3, + test_channeloffset[2], + True, + ) + assert abs(score - ref_scores_11[0, 2]) < 1e-7 + score = linear_scoring( + model2.mean_supervector, + ubm.mean_supervector, + ubm.variance_supervector, + stats1, + test_channeloffset[0], + True, + ) + assert abs(score - ref_scores_11[1, 0]) < 1e-7 + score = linear_scoring( + model2.mean_supervector, + ubm.mean_supervector, + ubm.variance_supervector, + stats2, + test_channeloffset[1], + True, + ) + assert abs(score - ref_scores_11[1, 1]) < 1e-7 + score = linear_scoring( + model2.mean_supervector, + ubm.mean_supervector, + ubm.variance_supervector, + stats3, + test_channeloffset[2], + True, + ) + assert abs(score - ref_scores_11[1, 2]) < 1e-7 diff --git a/bob/learn/em/test/test_picklability.py b/bob/learn/em/test/test_picklability.py index 033bfe1..f792a96 100644 --- a/bob/learn/em/test/test_picklability.py +++ b/bob/learn/em/test/test_picklability.py @@ -2,12 +2,13 @@ # vim: set fileencoding=utf-8 : # Tiago de Freitas Pereira <tiago.pereira@idiap.ch> -from bob.learn.em import GMMMachine -from bob.learn.em import KMeansMachine -from bob.learn.em import GMMStats +import pickle import numpy -import pickle + +from bob.learn.em import GMMMachine +from bob.learn.em import GMMStats +from bob.learn.em import KMeansMachine def test_gmm_machine(): -- GitLab