diff --git a/.flake8 b/.flake8 new file mode 100644 index 0000000000000000000000000000000000000000..2534a45ee555bb5ebefd37210d19399634c7a4ee --- /dev/null +++ b/.flake8 @@ -0,0 +1,3 @@ +[flake8] +max-line-length = 80 +ignore = E501,W503,E302,E402,E203 diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1515754e0bef52e755e793e05c3400ecf532c4c7 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,27 @@ +# See https://pre-commit.com for more information +# See https://pre-commit.com/hooks.html for more hooks +repos: + - repo: https://github.com/timothycrosley/isort + rev: 5.9.3 + hooks: + - id: isort + args: [--settings-path, "pyproject.toml"] + - repo: https://github.com/psf/black + rev: 21.7b0 + hooks: + - id: black + - repo: https://gitlab.com/pycqa/flake8 + rev: 3.9.2 + hooks: + - id: flake8 + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.0.1 + hooks: + - id: check-ast + - id: check-case-conflict + - id: trailing-whitespace + - id: end-of-file-fixer + - id: debug-statements + - id: check-added-large-files + - id: check-yaml + exclude: .*/meta.yaml diff --git a/LICENSE b/LICENSE index bd46ce15068f2d3b5a1b23ac6c68a33ec808d95d..0c3160331a4198b350981b4eb39a1b2f851813f6 100644 --- a/LICENSE +++ b/LICENSE @@ -24,4 +24,4 @@ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/bob/__init__.py b/bob/__init__.py index 2ab1e28b150f0549def9963e9e87de3fdd6b2579..edbb4090fca046b19d22d3982711084621bff3be 100644 --- a/bob/__init__.py +++ b/bob/__init__.py @@ -1,3 +1,4 @@ # see https://docs.python.org/3/library/pkgutil.html from pkgutil import extend_path + __path__ = extend_path(__path__, __name__) diff --git a/bob/learn/__init__.py b/bob/learn/__init__.py index 2ab1e28b150f0549def9963e9e87de3fdd6b2579..edbb4090fca046b19d22d3982711084621bff3be 100644 --- a/bob/learn/__init__.py +++ b/bob/learn/__init__.py @@ -1,3 +1,4 @@ # see https://docs.python.org/3/library/pkgutil.html from pkgutil import extend_path + __path__ = extend_path(__path__, __name__) diff --git a/bob/learn/em/__init__.py b/bob/learn/em/__init__.py index 879f33b9d854ad026bea3dad175dd52599a93a3e..6d67218a8049049a3e44c11076f6f8a61a1e09dc 100644 --- a/bob/learn/em/__init__.py +++ b/bob/learn/em/__init__.py @@ -1,5 +1,10 @@ import bob.extension -from .mixture import linear_scoring + +from .gmm import GMMMachine, GMMStats +from .k_means import KMeansMachine +from .linear_scoring import linear_scoring # noqa: F401 +from .wccn import WCCN +from .whitening import Whitening def get_config(): @@ -8,4 +13,26 @@ def get_config(): # gets sphinx autodoc done right - don't remove it +def __appropriate__(*args): + """Says object was actually declared here, an not on the import module. + + Parameters: + + *args: An iterable of objects to modify + + Resolves `Sphinx referencing issues + <https://github.com/sphinx-doc/sphinx/issues/3048>` + """ + + for obj in args: + obj.__module__ = __name__ + + +__appropriate__( + KMeansMachine, + GMMMachine, + GMMStats, + WCCN, + Whitening, +) __all__ = [_ for _ in dir() if not _.startswith("_")] diff --git a/bob/learn/em/cluster/__init__.py b/bob/learn/em/cluster/__init__.py deleted file mode 100644 index 535fd51a3f013f15ec9a9077314e038bf3c5c92d..0000000000000000000000000000000000000000 --- a/bob/learn/em/cluster/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .k_means import KMeansMachine diff --git a/bob/learn/em/mixture/gmm.py b/bob/learn/em/gmm.py similarity index 86% rename from bob/learn/em/mixture/gmm.py rename to bob/learn/em/gmm.py index 63171cf48c610a86e0cfaf73aecb0b2468c337a5..ce742f8225228258461e7b0b3fef5a285855fac1 100644 --- a/bob/learn/em/mixture/gmm.py +++ b/bob/learn/em/gmm.py @@ -4,17 +4,18 @@ """This module provides classes and functions for the training and usage of GMM.""" -import logging import copy +import logging + from typing import Union import dask.array as da import numpy as np -from sklearn.base import BaseEstimator - -from bob.learn.em.cluster import KMeansMachine from h5py import File as HDF5File +from sklearn.base import BaseEstimator + +from .k_means import KMeansMachine logger = logging.getLogger(__name__) @@ -44,10 +45,16 @@ class GMMStats: self.log_likelihood = 0 self.t = 0 self.n = np.zeros(shape=(self.n_gaussians,), dtype=float) - self.sum_px = np.zeros(shape=(self.n_gaussians, self.n_features), dtype=float) - self.sum_pxx = np.zeros(shape=(self.n_gaussians, self.n_features), dtype=float) + self.sum_px = np.zeros( + shape=(self.n_gaussians, self.n_features), dtype=float + ) + self.sum_pxx = np.zeros( + shape=(self.n_gaussians, self.n_features), dtype=float + ) - def init_fields(self, log_likelihood=0.0, t=0, n=None, sum_px=None, sum_pxx=None): + def init_fields( + self, log_likelihood=0.0, t=0, n=None, sum_px=None, sum_pxx=None + ): """Initializes the statistics values to a defined value, or zero by default.""" # The accumulated log likelihood of all samples self.log_likelihood = log_likelihood @@ -55,7 +62,9 @@ class GMMStats: self.t = t # For each Gaussian, the accumulated sum of responsibilities, i.e. the sum of # P(gaussian_i|x) - self.n = np.zeros(shape=(self.n_gaussians,), dtype=float) if n is None else n + self.n = ( + np.zeros(shape=(self.n_gaussians,), dtype=float) if n is None else n + ) # For each Gaussian, the accumulated sum of responsibility times the sample self.sum_px = ( np.zeros(shape=(self.n_gaussians, self.n_features), dtype=float) @@ -90,7 +99,8 @@ class GMMStats: if hdf5.attrs["writer_class"] != str(cls): logger.warning(f"{hdf5.attrs['writer_class']} is not {cls}.") self = cls( - n_gaussians=hdf5["n_gaussians"][()], n_features=hdf5["n_features"][()] + n_gaussians=hdf5["n_gaussians"][()], + n_features=hdf5["n_features"][()], ) self.log_likelihood = hdf5["log_likelihood"][()] self.t = hdf5["T"][()] @@ -139,8 +149,13 @@ class GMMStats: ) def __add__(self, other): - if self.n_gaussians != other.n_gaussians or self.n_features != other.n_features: - raise ValueError("Statistics could not be added together (shape mismatch)") + if ( + self.n_gaussians != other.n_gaussians + or self.n_features != other.n_features + ): + raise ValueError( + "Statistics could not be added together (shape mismatch)" + ) new_stats = GMMStats(self.n_gaussians, self.n_features) new_stats.log_likelihood = self.log_likelihood + other.log_likelihood new_stats.t = self.t + other.t @@ -150,8 +165,13 @@ class GMMStats: return new_stats def __iadd__(self, other): - if self.n_gaussians != other.n_gaussians or self.n_features != other.n_features: - raise ValueError("Statistics could not be added together (shape mismatch)") + if ( + self.n_gaussians != other.n_gaussians + or self.n_features != other.n_features + ): + raise ValueError( + "Statistics could not be added together (shape mismatch)" + ) self.log_likelihood += other.log_likelihood self.t += other.t self.n += other.n @@ -171,7 +191,9 @@ class GMMStats: def is_similar_to(self, other, rtol=1e-5, atol=1e-8): """Returns True if `other` has the same values (within a tolerance).""" return ( - np.isclose(self.log_likelihood, other.log_likelihood, rtol=rtol, atol=atol) + np.isclose( + self.log_likelihood, other.log_likelihood, rtol=rtol, atol=atol + ) and np.isclose(self.t, other.t, rtol=rtol, atol=atol) and np.allclose(self.n, other.n, rtol=rtol, atol=atol) and np.allclose(self.sum_px, other.sum_px, rtol=rtol, atol=atol) @@ -209,14 +231,14 @@ class GMMMachine(BaseEstimator): Two types of training are available MLE and MAP, chosen with `trainer`. - Maximum Likelihood Estimation (:ref:`MLE <mle>`, ML) - --------------------------------------- - The mixtures are initialized (with k-means by default). - The means, variances, and weights of the mixtures are then trained on the data to + * Maximum Likelihood Estimation (:ref:`MLE <mle>`, ML) + + The mixtures are initialized (with k-means by default). The means, + variances, and weights of the mixtures are then trained on the data to increase the likelihood value. (:ref:`MLE <mle>`) - Maximum a Posteriori (:ref:`MAP <map>`) - -------------------------- + * Maximum a Posteriori (:ref:`MAP <map>`) + The MAP machine takes another GMM machine as prior, called Universal Background Model (UBM). The means, variances, and weights of the MAP mixtures are then trained on the data @@ -230,38 +252,10 @@ class GMMMachine(BaseEstimator): When setting manually any of the means, variances or variance thresholds, the k-means initialization will be skipped in `fit`. - Usage - ----- - Maximum likelihood: - >>> data = np.array([[0,0,0],[1,1,1]]) - >>> ml_machine = GMMMachine(n_gaussians=2, trainer="ml") - >>> ml_machine = ml_machine.fit(data) - >>> print(ml_machine.means) - [[1. 1. 1.] - [0. 0. 0.]] - - Maximum a Posteriori: - >>> post_data = np.array([[0.5, 0.5, 0],[1.5, 1, 1.5]]) - >>> map_machine = GMMMachine(n_gaussians=2, trainer="map", ubm=ml_machine) - >>> map_machine = map_machine.fit(post_data) - >>> print(map_machine.means) - [[1.1 1. 1.1] - [0.1 0.1 0. ]] - - Partial fitting: - >>> machine = GMMMachine(n_gaussians=2, trainer="ml") - >>> for step in range(5): - ... machine = machine.fit_partial(data) - >>> print(machine.means) - [[1. 1. 1.] - [0. 0. 0.]] - Attributes ---------- means, variances, variance_thresholds Gaussians parameters. - weights - Gaussians weights. """ def __init__( @@ -272,7 +266,7 @@ class GMMMachine(BaseEstimator): convergence_threshold: float = 1e-5, max_fitting_steps: Union[int, None] = 200, random_state: Union[int, np.random.RandomState] = 0, - weights: "Union[np.ndarray[('n_gaussians',), float], None]" = None, + weights: "Union[np.ndarray[('n_gaussians',), float], None]" = None, # noqa: F821 k_means_trainer: Union[KMeansMachine, None] = None, update_means: bool = True, update_variances: bool = False, @@ -325,11 +319,15 @@ class GMMMachine(BaseEstimator): self.n_gaussians = n_gaussians self.trainer = trainer if trainer in ["ml", "map"] else "ml" - self.m_step_func = map_gmm_m_step if self.trainer == "map" else ml_gmm_m_step + self.m_step_func = ( + map_gmm_m_step if self.trainer == "map" else ml_gmm_m_step + ) if self.trainer == "map" and ubm is None: raise ValueError("A UBM is required for MAP GMM.") if ubm is not None and ubm.n_gaussians != self.n_gaussians: - raise ValueError("The UBM machine is not compatible with this machine.") + raise ValueError( + "The UBM machine is not compatible with this machine." + ) self.ubm = ubm if max_fitting_steps is None and convergence_threshold is None: raise ValueError( @@ -351,11 +349,15 @@ class GMMMachine(BaseEstimator): if self.ubm is not None: self.means = copy.deepcopy(self.ubm.means) self.variances = copy.deepcopy(self.ubm.variances) - self.variance_thresholds = copy.deepcopy(self.ubm.variance_thresholds) + self.variance_thresholds = copy.deepcopy( + self.ubm.variance_thresholds + ) self.weights = copy.deepcopy(self.ubm.weights) else: self.weights = np.full( - (self.n_gaussians,), fill_value=(1 / self.n_gaussians), dtype=float + (self.n_gaussians,), + fill_value=(1 / self.n_gaussians), + dtype=float, ) if weights is not None: self.weights = weights @@ -368,7 +370,9 @@ class GMMMachine(BaseEstimator): return self._weights @weights.setter - def weights(self, weights: "np.ndarray[('n_gaussians',), float]"): + def weights( + self, weights: "np.ndarray[('n_gaussians',), float]" # noqa: F821 + ): # noqa: F821 self._weights = weights self._log_weights = np.log(self._weights) @@ -380,7 +384,10 @@ class GMMMachine(BaseEstimator): return self._means @means.setter - def means(self, means: "np.ndarray[('n_gaussians', 'n_features'), float]"): + def means( + self, + means: "np.ndarray[('n_gaussians', 'n_features'), float]", # noqa: F821 + ): # noqa: F821 self._means = means @property @@ -391,11 +398,16 @@ class GMMMachine(BaseEstimator): return self._variances @variances.setter - def variances(self, variances: "np.ndarray[('n_gaussians', 'n_features'), float]"): + def variances( + self, + variances: "np.ndarray[('n_gaussians', 'n_features'), float]", # noqa: F821 + ): self._variances = np.maximum(self.variance_thresholds, variances) # Recompute g_norm for each gaussian [array of shape (n_gaussians,)] n_log_2pi = self._variances.shape[-1] * np.log(2 * np.pi) - self._g_norms = np.array(n_log_2pi + np.log(self._variances).sum(axis=-1)) + self._g_norms = np.array( + n_log_2pi + np.log(self._variances).sum(axis=-1) + ) @property def variance_thresholds(self): @@ -407,7 +419,7 @@ class GMMMachine(BaseEstimator): @variance_thresholds.setter def variance_thresholds( self, - threshold: "Union[float, np.ndarray[('n_gaussians', 'n_features'), float]]", + threshold: "Union[float, np.ndarray[('n_gaussians', 'n_features'), float]]", # noqa: F821 ): self._variance_thresholds = threshold if self._variances is not None: @@ -448,7 +460,9 @@ class GMMMachine(BaseEstimator): if hdf5.attrs["writer_class"] != str(cls): logger.warning(f"{hdf5.attrs['writer_class']} is not {cls}.") if hdf5["trainer"] == "map" and ubm is None: - raise ValueError("The UBM is needed when loading a MAP machine.") + raise ValueError( + "The UBM is needed when loading a MAP machine." + ) self = cls( n_gaussians=hdf5["n_gaussians"][()], trainer=hdf5["trainer"][()], @@ -464,7 +478,9 @@ class GMMMachine(BaseEstimator): gaussians_group = hdf5["gaussians"] self.means = gaussians_group["means"][...] self.variances = gaussians_group["variances"][...] - self.variance_thresholds = gaussians_group["variance_thresholds"][...] + self.variance_thresholds = gaussians_group["variance_thresholds"][ + ... + ] else: # Legacy file version logger.info("Loading a legacy HDF5 machine file.") n_gaussians = hdf5["m_n_gaussians"][()] @@ -487,6 +503,11 @@ class GMMMachine(BaseEstimator): ) return self + def load(self, hdf5): + """Overwrites the current state with those in an `HDF5File` object.""" + new_self = self.from_hdf5(hdf5) + self.__dict__.update(new_self.__dict__) + def save(self, hdf5): """Saves the current statistics in an `HDF5File` object.""" if isinstance(hdf5, str): @@ -510,7 +531,9 @@ class GMMMachine(BaseEstimator): return ( np.array_equal(self.means, other.means) and np.array_equal(self.variances, other.variances) - and np.array_equal(self.variance_thresholds, other.variance_thresholds) + and np.array_equal( + self.variance_thresholds, other.variance_thresholds + ) and np.array_equal(self.weights, other.weights) ) @@ -518,7 +541,9 @@ class GMMMachine(BaseEstimator): """Returns True if `other` has the same gaussians (within a tolerance).""" return ( np.allclose(self.means, other.means, rtol=rtol, atol=atol) - and np.allclose(self.variances, other.variances, rtol=rtol, atol=atol) + and np.allclose( + self.variances, other.variances, rtol=rtol, atol=atol + ) and np.allclose( self.variance_thresholds, other.variance_thresholds, @@ -529,13 +554,16 @@ class GMMMachine(BaseEstimator): ) def initialize_gaussians( - self, data: "Union[np.ndarray[('n_samples', 'n_features'), float], None]" = None + self, + data: "Union[np.ndarray[('n_samples', 'n_features'), float], None]" = None, # noqa: F821 ): """Populates gaussians parameters with either k-means or the UBM values.""" if self.trainer == "map": self.means = copy.deepcopy(self.ubm.means) self.variances = copy.deepcopy(self.ubm.variances) - self.variance_thresholds = copy.deepcopy(self.ubm.variance_thresholds) + self.variance_thresholds = copy.deepcopy( + self.ubm.variance_thresholds + ) self.weights = copy.deepcopy(self.ubm.weights) else: logger.debug("GMM means was never set. Initializing with k-means.") @@ -559,7 +587,8 @@ class GMMMachine(BaseEstimator): self.weights = np.array(copy.deepcopy(weights)) def log_weighted_likelihood( - self, data: "np.ndarray[('n_samples', 'n_features'), float]" + self, + data: "np.ndarray[('n_samples', 'n_features'), float]", # noqa: F821 ): """Returns the weighted log likelihood for each Gaussian for a set of data. @@ -578,11 +607,14 @@ class GMMMachine(BaseEstimator): (data[None, ..., :] - self.means[..., None, :]) ** 2 / self.variances[..., None, :] ).sum(axis=-1) - l = -0.5 * (self.g_norms[:, None] + z) - log_weighted_likelihood = self.log_weights[:, None] + l + ll = -0.5 * (self.g_norms[:, None] + z) + log_weighted_likelihood = self.log_weights[:, None] + ll return log_weighted_likelihood - def log_likelihood(self, data: "np.ndarray[('n_samples', 'n_features'), float]"): + def log_likelihood( + self, + data: "np.ndarray[('n_samples', 'n_features'), float]", # noqa: F821 + ): """Returns the current log likelihood for a set of data in this Machine. Parameters @@ -624,7 +656,7 @@ class GMMMachine(BaseEstimator): def acc_statistics( self, - data: "np.ndarray[('n_samples', 'n_features'), float]", + data: "np.ndarray[('n_samples', 'n_features'), float]", # noqa: F821 statistics: Union[GMMStats, None] = None, ): """Accumulates the statistics of GMMStats for a set of data. @@ -655,7 +687,9 @@ class GMMMachine(BaseEstimator): # Log likelihood [array of shape (n_samples,)] log_likelihood = self.log_likelihood(data) # Responsibility P [array of shape (n_gaussians, n_samples)] - responsibility = np.exp(log_weighted_likelihoods - log_likelihood[None, :]) + responsibility = np.exp( + log_weighted_likelihoods - log_likelihood[None, :] + ) # Accumulate @@ -675,7 +709,10 @@ class GMMMachine(BaseEstimator): return statistics - def e_step(self, data: "np.ndarray[('n_samples', 'n_features'), float]"): + def e_step( + self, + data: "np.ndarray[('n_samples', 'n_features'), float]", # noqa: F821 + ): # noqa: F821 """Expectation step of the e-m algorithm.""" return self.acc_statistics(data) @@ -706,7 +743,9 @@ class GMMMachine(BaseEstimator): logger.debug("GMM means already set. Initialization was not run!") if self._variances is None: - logger.warning("Variances were not defined before fit. Using variance=1") + logger.warning( + "Variances were not defined before fit. Using variance=1" + ) self.variances = np.ones_like(self.means) average_output = 0 @@ -716,7 +755,11 @@ class GMMMachine(BaseEstimator): step += 1 logger.info( f"Iteration {step:3d}" - + (f"/{self.max_fitting_steps:3d}" if self.max_fitting_steps else "") + + ( + f"/{self.max_fitting_steps:3d}" + if self.max_fitting_steps + else "" + ) ) average_output_previous = average_output @@ -739,7 +782,8 @@ class GMMMachine(BaseEstimator): if step > 1: convergence_value = abs( - (average_output_previous - average_output) / average_output_previous + (average_output_previous - average_output) + / average_output_previous ) logger.debug(f"convergence val = {convergence_value}") @@ -748,10 +792,14 @@ class GMMMachine(BaseEstimator): self.convergence_threshold is not None and convergence_value <= self.convergence_threshold ): - logger.info("Reached convergence threshold. Training stopped.") + logger.info( + "Reached convergence threshold. Training stopped." + ) break else: - logger.info("Reached maximum step. Training stopped without convergence.") + logger.info( + "Reached maximum step. Training stopped without convergence." + ) self.compute() return self @@ -816,9 +864,9 @@ def ml_gmm_m_step( # = 1/n * sum (Pxx) - mean^2 if update_variances: logger.debug("Update variances.") - machine.variances = statistics.sum_pxx / thresholded_n[:, None] - np.power( - machine.means, 2 - ) + machine.variances = statistics.sum_pxx / thresholded_n[ + :, None + ] - np.power(machine.means, 2) def map_gmm_m_step( @@ -890,9 +938,9 @@ def map_gmm_m_step( # Gaussian Mixture Models", Digital Signal Processing, 2000 if update_variances: # Calculate new variances (equation 13) - prior_norm_variances = (machine.ubm.variances + machine.ubm.means) - np.power( - machine.means, 2 - ) + prior_norm_variances = ( + machine.ubm.variances + machine.ubm.means + ) - np.power(machine.means, 2) new_variances = ( alpha[:, None] * statistics.sum_pxx / statistics.n[:, None] + (1 - alpha[:, None]) * (machine.ubm.variances + machine.ubm.means) diff --git a/bob/learn/em/cluster/k_means.py b/bob/learn/em/k_means.py similarity index 91% rename from bob/learn/em/cluster/k_means.py rename to bob/learn/em/k_means.py index abdf6a69e746efb17782f50a50a8c99bda2c357e..830b4e4da6cb613a7b8b846281e6558b72402100 100644 --- a/bob/learn/em/cluster/k_means.py +++ b/bob/learn/em/k_means.py @@ -3,11 +3,12 @@ # @date: Tue 27 Jul 2021 11:04:10 UTC+02 import logging -from typing import Tuple -from typing import Union + +from typing import Tuple, Union import dask.array as da import numpy as np + from dask_ml.cluster.k_means import k_init from sklearn.base import BaseEstimator @@ -32,14 +33,6 @@ class KMeansMachine(BaseEstimator): ---------- centroids_: ndarray of shape (n_clusters, n_features) The current clusters centroids. Available after fitting. - - Example - ------- - >>> data = dask.array.array([[0,-1,0],[-1,1,1],[3,2,1],[2,2,1],[1,0,2]]) - >>> machine = KMeansMachine(2).fit(data) - >>> machine.centroids_.compute() - ... array([[0. , 0. , 1. ], - ... [2.5, 2. , 1. ]]) """ def __init__( @@ -80,6 +73,15 @@ class KMeansMachine(BaseEstimator): self.first_order_statistics = None self.centroids_ = None + @property + def means(self) -> np.ndarray: + """An alias for `centroids_`.""" + return self.centroids_ + + @means.setter + def means(self, value: np.ndarray): + self.centroids_ = value + def get_centroids_distance(self, x: np.ndarray) -> np.ndarray: """Returns the distance values between x and each cluster's centroid. @@ -154,7 +156,9 @@ class KMeansMachine(BaseEstimator): """ n_cluster = self.n_clusters closest_centroid_indices = self.get_closest_centroid_index(data) - weights_count = np.bincount(closest_centroid_indices, minlength=n_cluster) + weights_count = np.bincount( + closest_centroid_indices, minlength=n_cluster + ) weights = weights_count / weights_count.sum() # FIX for `too many indices for array` error if using `np.eye(n_cluster)` alone: @@ -162,7 +166,8 @@ class KMeansMachine(BaseEstimator): # Accumulate means_sum = np.sum( - dask_compatible_eye[closest_centroid_indices][:, :, None] * data[:, None], + dask_compatible_eye[closest_centroid_indices][:, :, None] + * data[:, None], axis=0, ) variances_sum = np.sum( @@ -198,7 +203,8 @@ class KMeansMachine(BaseEstimator): ) # Sum of data points coordinates in each cluster self.first_order_statistics = np.sum( - np.eye(self.n_clusters)[closest_k_indices][:, :, None] * data[:, None], + np.eye(self.n_clusters)[closest_k_indices][:, :, None] + * data[:, None], axis=0, ) self.average_min_distance = self.get_min_distance(data).mean() @@ -210,7 +216,7 @@ class KMeansMachine(BaseEstimator): def fit(self, X, y=None): """Fits this machine on data samples.""" - logger.debug(f"Initializing trainer.") + logger.debug("Initializing trainer.") self.initialize(data=X) logger.info("Training k-means.") @@ -219,7 +225,8 @@ class KMeansMachine(BaseEstimator): while self.max_iter is None or step < self.max_iter: step += 1 logger.info( - f"Iteration {step:3d}" + (f"/{self.max_iter}" if self.max_iter else "") + f"Iteration {step:3d}" + + (f"/{self.max_iter}" if self.max_iter else "") ) distance_previous = distance self.e_step(data=X) @@ -234,7 +241,9 @@ class KMeansMachine(BaseEstimator): distance = float(self.average_min_distance) - logger.info(f"Average minimal squared Euclidean distance = {distance}") + logger.info( + f"Average minimal squared Euclidean distance = {distance}" + ) if step > 1: convergence_value = abs( @@ -247,10 +256,14 @@ class KMeansMachine(BaseEstimator): self.convergence_threshold is not None and convergence_value <= self.convergence_threshold ): - logger.info("Reached convergence threshold. Training stopped.") + logger.info( + "Reached convergence threshold. Training stopped." + ) break else: - logger.info("Reached maximum step. Training stopped without convergence.") + logger.info( + "Reached maximum step. Training stopped without convergence." + ) self.compute() return self diff --git a/bob/learn/em/linear/__init__.py b/bob/learn/em/linear/__init__.py deleted file mode 100644 index 051c3932f575f1eb9ef1a4623f7c76e5c9d11fa6..0000000000000000000000000000000000000000 --- a/bob/learn/em/linear/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -from .wccn import WCCN -from .whitening import Whitening diff --git a/bob/learn/em/mixture/linear_scoring.py b/bob/learn/em/linear_scoring.py similarity index 93% rename from bob/learn/em/mixture/linear_scoring.py rename to bob/learn/em/linear_scoring.py index d7aac713791d95e07d4dbf2ae29a6f3afa56eb3a..46024c020048323efd9f4a9dd3f864cbffd2732c 100644 --- a/bob/learn/em/mixture/linear_scoring.py +++ b/bob/learn/em/linear_scoring.py @@ -5,12 +5,12 @@ """Implements the linear scoring function.""" import logging + from typing import Union import numpy as np -from bob.learn.em.mixture import GMMMachine -from bob.learn.em.mixture import GMMStats +from .gmm import GMMMachine, GMMStats logger = logging.getLogger(__name__) @@ -18,12 +18,12 @@ EPSILON = np.finfo(float).eps def linear_scoring( - models_means: "Union[list[GMMMachine], np.ndarray[('n_models', 'n_gaussians', 'n_features'), float]]", + models_means: "Union[list[GMMMachine], np.ndarray[('n_models', 'n_gaussians', 'n_features'), float]]", # noqa: F821 ubm: GMMMachine, test_stats: Union["list[GMMStats]", GMMStats], - test_channel_offsets: "np.ndarray[('n_test_stats', 'n_gaussians'), float]" = 0, + test_channel_offsets: "np.ndarray[('n_test_stats', 'n_gaussians'), float]" = 0, # noqa: F821 frame_length_normalization: bool = False, -) -> "np.ndarray[('n_models', 'n_test_stats'), float]": +) -> "np.ndarray[('n_models', 'n_test_stats'), float]": # noqa: F821 """Estimation of the LLR between a target model and the UBM for a test instance. The Linear scoring is an approximation to the log-likelihood ratio (LLR) that was diff --git a/bob/learn/em/mixture/__init__.py b/bob/learn/em/mixture/__init__.py deleted file mode 100644 index 229f13e224bae240869b1965f580bccf591fc073..0000000000000000000000000000000000000000 --- a/bob/learn/em/mixture/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from .gmm import GMMMachine -from .gmm import GMMStats -from .linear_scoring import linear_scoring diff --git a/bob/learn/em/test/test_gmm.py b/bob/learn/em/test/test_gmm.py index e99e1601fb5cc0a9e1ef3b89b6ce1c3c0bcd5d3a..51a8bed00cc29adb3278be5f9dbdb2ff26fdd8a6 100644 --- a/bob/learn/em/test/test_gmm.py +++ b/bob/learn/em/test/test_gmm.py @@ -8,280 +8,313 @@ """Tests the GMM machine and the GMMStats container """ -import numpy as np -import dask.array as da - import os import tempfile + from copy import deepcopy + +import dask.array as da +import numpy as np + from h5py import File as HDF5File from pkg_resources import resource_filename -from bob.io.base import load as load_array +from bob.learn.em import GMMMachine, GMMStats, KMeansMachine + -from bob.learn.em.mixture import GMMMachine -from bob.learn.em.mixture import GMMStats +def load_array(filename): + with HDF5File(filename, "r") as f: + array = np.array(f[list(f.keys())[0]]) + # remove dimensions with size 1 + return np.squeeze(array) -from bob.learn.em.cluster import KMeansMachine def test_GMMStats(): - # Test a GMMStats - # Initializes a GMMStats - n_gaussians = 2 - n_features = 3 - gs = GMMStats(n_gaussians,n_features) - log_likelihood = -3. - T = 57 - n = np.array([4.37, 5.31], "float64") - sumpx = np.array([[1., 2., 3.], [4., 5., 6.]], "float64") - sumpxx = np.array([[10., 20., 30.], [40., 50., 60.]], "float64") - gs.log_likelihood = log_likelihood - gs.t = T - gs.n = n - gs.sum_px = sumpx - gs.sum_pxx = sumpxx - np.testing.assert_equal(gs.log_likelihood, log_likelihood) - np.testing.assert_equal(gs.t, T) - np.testing.assert_equal(gs.n, n) - np.testing.assert_equal(gs.sum_px, sumpx) - np.testing.assert_equal(gs.sum_pxx, sumpxx) - np.testing.assert_equal(gs.shape, (n_gaussians, n_features)) - - # Saves and reads from file using `from_hdf5` - filename = str(tempfile.mkstemp(".hdf5")[1]) - gs.save(HDF5File(filename, "w")) - gs_loaded = GMMStats.from_hdf5(HDF5File(filename, "r")) - assert gs == gs_loaded - assert (gs != gs_loaded ) is False - assert gs.is_similar_to(gs_loaded) - - assert type(gs_loaded.n_gaussians) is np.int64 - assert type(gs_loaded.n_features) is np.int64 - assert type(gs_loaded.log_likelihood) is np.float64 - - # Saves and load from file using `load` - filename = str(tempfile.mkstemp(".hdf5")[1]) - gs.save(hdf5=HDF5File(filename, "w")) - gs_loaded = GMMStats(n_gaussians, n_features) - gs_loaded.load(HDF5File(filename, "r")) - assert gs == gs_loaded - assert (gs != gs_loaded ) is False - assert gs.is_similar_to(gs_loaded) - - # Makes them different - gs_loaded.t = 58 - assert (gs == gs_loaded ) is False - assert gs != gs_loaded - assert not (gs.is_similar_to(gs_loaded)) - - # Accumulates from another GMMStats - gs2 = GMMStats(n_gaussians, n_features) - gs2.log_likelihood = log_likelihood - gs2.t = T - gs2.n = n.copy() - gs2.sum_px = sumpx.copy() - gs2.sum_pxx = sumpxx.copy() - gs2 += gs - np.testing.assert_equal(gs2.log_likelihood, 2*log_likelihood) - np.testing.assert_equal(gs2.t, 2*T) - np.testing.assert_almost_equal(gs2.n, 2*n, decimal=8) - np.testing.assert_almost_equal(gs2.sum_px, 2*sumpx, decimal=8) - np.testing.assert_almost_equal(gs2.sum_pxx, 2*sumpxx, decimal=8) - - # Re-init and checks for zeros - gs_loaded.init_fields() - np.testing.assert_equal(gs_loaded.log_likelihood, 0) - np.testing.assert_equal(gs_loaded.t, 0) - np.testing.assert_equal(gs_loaded.n, np.zeros((n_gaussians,))) - np.testing.assert_equal(gs_loaded.sum_px, np.zeros((n_gaussians, n_features))) - np.testing.assert_equal(gs_loaded.sum_pxx, np.zeros((n_gaussians, n_features))) - # Resize and checks size - assert gs_loaded.shape==(n_gaussians, n_features) - gs_loaded.resize(4,5) - assert gs_loaded.shape == (4,5) - assert gs_loaded.sum_px.shape[0] == 4 - assert gs_loaded.sum_px.shape[1] == 5 - - # Clean-up - os.unlink(filename) + # Test a GMMStats + # Initializes a GMMStats + n_gaussians = 2 + n_features = 3 + gs = GMMStats(n_gaussians, n_features) + log_likelihood = -3.0 + T = 57 + n = np.array([4.37, 5.31], "float64") + sumpx = np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]], "float64") + sumpxx = np.array([[10.0, 20.0, 30.0], [40.0, 50.0, 60.0]], "float64") + gs.log_likelihood = log_likelihood + gs.t = T + gs.n = n + gs.sum_px = sumpx + gs.sum_pxx = sumpxx + np.testing.assert_equal(gs.log_likelihood, log_likelihood) + np.testing.assert_equal(gs.t, T) + np.testing.assert_equal(gs.n, n) + np.testing.assert_equal(gs.sum_px, sumpx) + np.testing.assert_equal(gs.sum_pxx, sumpxx) + np.testing.assert_equal(gs.shape, (n_gaussians, n_features)) + + # Saves and reads from file using `from_hdf5` + filename = str(tempfile.mkstemp(".hdf5")[1]) + gs.save(HDF5File(filename, "w")) + gs_loaded = GMMStats.from_hdf5(HDF5File(filename, "r")) + assert gs == gs_loaded + assert (gs != gs_loaded) is False + assert gs.is_similar_to(gs_loaded) + + assert type(gs_loaded.n_gaussians) is np.int64 + assert type(gs_loaded.n_features) is np.int64 + assert type(gs_loaded.log_likelihood) is np.float64 + + # Saves and load from file using `load` + filename = str(tempfile.mkstemp(".hdf5")[1]) + gs.save(hdf5=HDF5File(filename, "w")) + gs_loaded = GMMStats(n_gaussians, n_features) + gs_loaded.load(HDF5File(filename, "r")) + assert gs == gs_loaded + assert (gs != gs_loaded) is False + assert gs.is_similar_to(gs_loaded) + + # Makes them different + gs_loaded.t = 58 + assert (gs == gs_loaded) is False + assert gs != gs_loaded + assert not (gs.is_similar_to(gs_loaded)) + + # Accumulates from another GMMStats + gs2 = GMMStats(n_gaussians, n_features) + gs2.log_likelihood = log_likelihood + gs2.t = T + gs2.n = n.copy() + gs2.sum_px = sumpx.copy() + gs2.sum_pxx = sumpxx.copy() + gs2 += gs + np.testing.assert_equal(gs2.log_likelihood, 2 * log_likelihood) + np.testing.assert_equal(gs2.t, 2 * T) + np.testing.assert_almost_equal(gs2.n, 2 * n, decimal=8) + np.testing.assert_almost_equal(gs2.sum_px, 2 * sumpx, decimal=8) + np.testing.assert_almost_equal(gs2.sum_pxx, 2 * sumpxx, decimal=8) + + # Re-init and checks for zeros + gs_loaded.init_fields() + np.testing.assert_equal(gs_loaded.log_likelihood, 0) + np.testing.assert_equal(gs_loaded.t, 0) + np.testing.assert_equal(gs_loaded.n, np.zeros((n_gaussians,))) + np.testing.assert_equal( + gs_loaded.sum_px, np.zeros((n_gaussians, n_features)) + ) + np.testing.assert_equal( + gs_loaded.sum_pxx, np.zeros((n_gaussians, n_features)) + ) + # Resize and checks size + assert gs_loaded.shape == (n_gaussians, n_features) + gs_loaded.resize(4, 5) + assert gs_loaded.shape == (4, 5) + assert gs_loaded.sum_px.shape[0] == 4 + assert gs_loaded.sum_px.shape[1] == 5 + + # Clean-up + os.unlink(filename) + def test_GMMMachine(): - # Test a GMMMachine basic features - - weights = np.array([0.5, 0.5], "float64") - weights2 = np.array([0.6, 0.4], "float64") - means = np.array([[3, 70, 0], [4, 72, 0]], "float64") - means2 = np.array([[3, 7, 0], [4, 72, 0]], "float64") - variances = np.array([[1, 10, 1], [2, 5, 2]], "float64") - variances2 = np.array([[10, 10, 1], [2, 5, 2]], "float64") - varianceThresholds = np.array([[0, 0, 0], [0, 0, 0]], "float64") - varianceThresholds2 = np.array([[0.0005, 0.0005, 0.0005], [0, 0, 0]], "float64") - - # Initializes a GMMMachine - gmm = GMMMachine(n_gaussians=2) - # Sets the weights, means, variances and varianceThresholds and - # Checks correctness - gmm.weights = weights - gmm.means = means - gmm.variances = variances - gmm.variance_thresholds = varianceThresholds - assert gmm.shape == (2,3) - np.testing.assert_equal(gmm.weights, weights) - np.testing.assert_equal(gmm.means, means) - np.testing.assert_equal(gmm.variances, variances) - np.testing.assert_equal(gmm.variance_thresholds, varianceThresholds) - - newMeans = np.array([[3, 70, 2], [4, 72, 2]], "float64") - newVariances = np.array([[1, 1, 1], [2, 2, 2]], "float64") - - # Checks particular varianceThresholds-related methods - varianceThresholds1D = np.array([0.3, 1, 0.5], "float64") - gmm.variance_thresholds = varianceThresholds1D - np.testing.assert_equal(gmm.variance_thresholds, varianceThresholds1D) - - gmm.variance_thresholds = 0.005 - np.testing.assert_equal(gmm.variance_thresholds, 0.005) - - gmm.means = newMeans - gmm.variances = newVariances - np.testing.assert_equal(gmm.means, newMeans) - np.testing.assert_equal(gmm.variances, newVariances) - - # Checks comparison - gmm2 = deepcopy(gmm) - gmm3 = GMMMachine(n_gaussians=2) - gmm3.weights = weights2 - gmm3.means = means - gmm3.variances = variances - gmm3.variance_thresholds = varianceThresholds - gmm4 = GMMMachine(n_gaussians=2) - gmm4.weights = weights - gmm4.means = means2 - gmm4.variances = variances - gmm4.variance_thresholds = varianceThresholds - gmm5 = GMMMachine(n_gaussians=2) - gmm5.weights = weights - gmm5.means = means - gmm5.variances = variances2 - gmm5.variance_thresholds = varianceThresholds - gmm6 = GMMMachine(n_gaussians=2) - gmm6.weights = weights - gmm6.means = means - gmm6.variances = variances - gmm6.variance_thresholds = varianceThresholds2 - - assert gmm == gmm2 - assert (gmm != gmm2) is False - assert gmm.is_similar_to(gmm2) - assert gmm != gmm3 - assert (gmm == gmm3) is False - assert gmm.is_similar_to(gmm3) is False - assert gmm != gmm4 - assert (gmm == gmm4) is False - assert gmm.is_similar_to(gmm4) is False - assert gmm != gmm5 - assert (gmm == gmm5) is False - assert gmm.is_similar_to(gmm5) is False - assert gmm != gmm6 - assert (gmm == gmm6) is False - assert gmm.is_similar_to(gmm6) is False - - # Saving and loading - with tempfile.NamedTemporaryFile(suffix=".hdf5") as f: - filename = f.name - gmm.save(HDF5File(filename, "w")) - gmm1 = GMMMachine.from_hdf5(HDF5File(filename, "r")) - assert type(gmm1.n_gaussians) is np.int64 - assert type(gmm1.update_means) is np.bool_ - assert type(gmm1.update_variances) is np.bool_ - assert type(gmm1.update_weights) is np.bool_ - assert type(gmm1.trainer) is str - assert gmm1.ubm is None - assert gmm == gmm1 - with tempfile.NamedTemporaryFile(suffix=".hdf5") as f: - filename = f.name - gmm.save(filename) - gmm1 = GMMMachine.from_hdf5(filename) - assert gmm == gmm1 - - # Weights - n_gaussians = 5 - machine = GMMMachine(n_gaussians) - - default_weights = np.full(shape=(n_gaussians,), fill_value=1.0 / n_gaussians) - default_log_weights = np.full( - shape=(n_gaussians,), fill_value=np.log(1.0 / n_gaussians) - ) - - # Test weights getting and setting - np.testing.assert_almost_equal(machine.weights, default_weights) - np.testing.assert_almost_equal(machine.log_weights, default_log_weights) - - modified_weights = default_weights - modified_weights[: n_gaussians // 2] = (1 / n_gaussians) / 2 - modified_weights[n_gaussians // 2 + n_gaussians % 2 :] = (1 / n_gaussians) * 1.5 - - # Ensure setter works (log_weights is updated correctly) - machine.weights = modified_weights - np.testing.assert_almost_equal(machine.weights, modified_weights) - np.testing.assert_almost_equal(machine.log_weights, np.log(modified_weights)) + # Test a GMMMachine basic features + + weights = np.array([0.5, 0.5], "float64") + weights2 = np.array([0.6, 0.4], "float64") + means = np.array([[3, 70, 0], [4, 72, 0]], "float64") + means2 = np.array([[3, 7, 0], [4, 72, 0]], "float64") + variances = np.array([[1, 10, 1], [2, 5, 2]], "float64") + variances2 = np.array([[10, 10, 1], [2, 5, 2]], "float64") + varianceThresholds = np.array([[0, 0, 0], [0, 0, 0]], "float64") + varianceThresholds2 = np.array( + [[0.0005, 0.0005, 0.0005], [0, 0, 0]], "float64" + ) + + # Initializes a GMMMachine + gmm = GMMMachine(n_gaussians=2) + # Sets the weights, means, variances and varianceThresholds and + # Checks correctness + gmm.weights = weights + gmm.means = means + gmm.variances = variances + gmm.variance_thresholds = varianceThresholds + assert gmm.shape == (2, 3) + np.testing.assert_equal(gmm.weights, weights) + np.testing.assert_equal(gmm.means, means) + np.testing.assert_equal(gmm.variances, variances) + np.testing.assert_equal(gmm.variance_thresholds, varianceThresholds) + + newMeans = np.array([[3, 70, 2], [4, 72, 2]], "float64") + newVariances = np.array([[1, 1, 1], [2, 2, 2]], "float64") + + # Checks particular varianceThresholds-related methods + varianceThresholds1D = np.array([0.3, 1, 0.5], "float64") + gmm.variance_thresholds = varianceThresholds1D + np.testing.assert_equal(gmm.variance_thresholds, varianceThresholds1D) + + gmm.variance_thresholds = 0.005 + np.testing.assert_equal(gmm.variance_thresholds, 0.005) + + gmm.means = newMeans + gmm.variances = newVariances + np.testing.assert_equal(gmm.means, newMeans) + np.testing.assert_equal(gmm.variances, newVariances) + + # Checks comparison + gmm2 = deepcopy(gmm) + gmm3 = GMMMachine(n_gaussians=2) + gmm3.weights = weights2 + gmm3.means = means + gmm3.variances = variances + gmm3.variance_thresholds = varianceThresholds + gmm4 = GMMMachine(n_gaussians=2) + gmm4.weights = weights + gmm4.means = means2 + gmm4.variances = variances + gmm4.variance_thresholds = varianceThresholds + gmm5 = GMMMachine(n_gaussians=2) + gmm5.weights = weights + gmm5.means = means + gmm5.variances = variances2 + gmm5.variance_thresholds = varianceThresholds + gmm6 = GMMMachine(n_gaussians=2) + gmm6.weights = weights + gmm6.means = means + gmm6.variances = variances + gmm6.variance_thresholds = varianceThresholds2 + + assert gmm == gmm2 + assert (gmm != gmm2) is False + assert gmm.is_similar_to(gmm2) + assert gmm != gmm3 + assert (gmm == gmm3) is False + assert gmm.is_similar_to(gmm3) is False + assert gmm != gmm4 + assert (gmm == gmm4) is False + assert gmm.is_similar_to(gmm4) is False + assert gmm != gmm5 + assert (gmm == gmm5) is False + assert gmm.is_similar_to(gmm5) is False + assert gmm != gmm6 + assert (gmm == gmm6) is False + assert gmm.is_similar_to(gmm6) is False + + # Saving and loading + with tempfile.NamedTemporaryFile(suffix=".hdf5") as f: + filename = f.name + gmm.save(HDF5File(filename, "w")) + gmm1 = GMMMachine.from_hdf5(HDF5File(filename, "r")) + assert type(gmm1.n_gaussians) is np.int64 + assert type(gmm1.update_means) is np.bool_ + assert type(gmm1.update_variances) is np.bool_ + assert type(gmm1.update_weights) is np.bool_ + assert type(gmm1.trainer) is str + assert gmm1.ubm is None + assert gmm == gmm1 + with tempfile.NamedTemporaryFile(suffix=".hdf5") as f: + filename = f.name + gmm.save(filename) + gmm1 = GMMMachine.from_hdf5(filename) + assert gmm == gmm1 + + # Weights + n_gaussians = 5 + machine = GMMMachine(n_gaussians) + + default_weights = np.full( + shape=(n_gaussians,), fill_value=1.0 / n_gaussians + ) + default_log_weights = np.full( + shape=(n_gaussians,), fill_value=np.log(1.0 / n_gaussians) + ) + + # Test weights getting and setting + np.testing.assert_almost_equal(machine.weights, default_weights) + np.testing.assert_almost_equal(machine.log_weights, default_log_weights) + + modified_weights = default_weights + modified_weights[: n_gaussians // 2] = (1 / n_gaussians) / 2 + modified_weights[n_gaussians // 2 + n_gaussians % 2 :] = ( + 1 / n_gaussians + ) * 1.5 + + # Ensure setter works (log_weights is updated correctly) + machine.weights = modified_weights + np.testing.assert_almost_equal(machine.weights, modified_weights) + np.testing.assert_almost_equal( + machine.log_weights, np.log(modified_weights) + ) def test_GMMMachine_stats(): - """Tests a GMMMachine (statistics)""" + """Tests a GMMMachine (statistics)""" - arrayset = load_array(resource_filename("bob.learn.em", "data/faithful.torch3_f64.hdf5")) - gmm = GMMMachine(n_gaussians=2) - gmm.weights = np.array([0.5, 0.5], "float64") - gmm.means = np.array([[3, 70], [4, 72]], "float64") - gmm.variances = np.array([[1, 10], [2, 5]], "float64") - gmm.variance_thresholds = np.array([[0, 0], [0, 0]], "float64") + arrayset = load_array( + resource_filename("bob.learn.em", "data/faithful.torch3_f64.hdf5") + ) + gmm = GMMMachine(n_gaussians=2) + gmm.weights = np.array([0.5, 0.5], "float64") + gmm.means = np.array([[3, 70], [4, 72]], "float64") + gmm.variances = np.array([[1, 10], [2, 5]], "float64") + gmm.variance_thresholds = np.array([[0, 0], [0, 0]], "float64") - stats = gmm.acc_statistics(arrayset) + stats = gmm.acc_statistics(arrayset) - stats_ref = GMMStats(n_gaussians=2, n_features=2) - stats_ref.load(HDF5File(resource_filename("bob.learn.em", "data/stats.hdf5"), "r")) + stats_ref = GMMStats(n_gaussians=2, n_features=2) + stats_ref.load( + HDF5File(resource_filename("bob.learn.em", "data/stats.hdf5"), "r") + ) - np.testing.assert_equal(stats.t, stats_ref.t) - np.testing.assert_almost_equal(stats.n, stats_ref.n, decimal=10) - # np.testing.assert_equal(stats.sum_px, stats_ref.sum_px) - # Note AA: precision error above - np.testing.assert_almost_equal(stats.sum_px, stats_ref.sum_px, decimal=10) - np.testing.assert_almost_equal(stats.sum_pxx, stats_ref.sum_pxx, decimal=10) + np.testing.assert_equal(stats.t, stats_ref.t) + np.testing.assert_almost_equal(stats.n, stats_ref.n, decimal=10) + # np.testing.assert_equal(stats.sum_px, stats_ref.sum_px) + # Note AA: precision error above + np.testing.assert_almost_equal(stats.sum_px, stats_ref.sum_px, decimal=10) + np.testing.assert_almost_equal(stats.sum_pxx, stats_ref.sum_pxx, decimal=10) def test_GMMMachine_ll_computation(): - """Test a GMMMachine (log-likelihood computation)""" + """Test a GMMMachine (log-likelihood computation)""" - data = load_array(resource_filename("bob.learn.em", "data/data.hdf5")) - gmm = GMMMachine(n_gaussians=2) - gmm.weights = load_array(resource_filename("bob.learn.em", "data/weights.hdf5")) - gmm.means = load_array(resource_filename("bob.learn.em", "data/means.hdf5")) - gmm.variances = load_array(resource_filename("bob.learn.em", "data/variances.hdf5")) + data = load_array(resource_filename("bob.learn.em", "data/data.hdf5")) + gmm = GMMMachine(n_gaussians=2) + gmm.weights = load_array( + resource_filename("bob.learn.em", "data/weights.hdf5") + ) + gmm.means = load_array(resource_filename("bob.learn.em", "data/means.hdf5")) + gmm.variances = load_array( + resource_filename("bob.learn.em", "data/variances.hdf5") + ) - # Compare the log-likelihood with the one obtained using Chris Matlab implementation - matlab_ll_ref = -2.361583051672024e+02 - np.testing.assert_almost_equal(gmm.log_likelihood(data), matlab_ll_ref, decimal=10) + # Compare the log-likelihood with the one obtained using Chris Matlab implementation + matlab_ll_ref = -2.361583051672024e02 + np.testing.assert_almost_equal( + gmm.log_likelihood(data), matlab_ll_ref, decimal=10 + ) def test_GMMMachine_single_ll_vs_multiple(): - np.random.seed(3) # FIXING A SEED - - data = np.random.rand(100,50) # Doesn't matter if it is random. The average of 1D array (in python) MUST output the same result for the 2D array (in C++) + np.random.seed(3) # FIXING A SEED - gmm = GMMMachine(n_gaussians=2) - gmm.weights = load_array(resource_filename("bob.learn.em", "data/weights.hdf5")) - gmm.means = load_array(resource_filename("bob.learn.em", "data/means.hdf5")) - gmm.variances = load_array(resource_filename("bob.learn.em", "data/variances.hdf5")) + data = np.random.rand( + 100, 50 + ) # Doesn't matter if it is random. The average of 1D array (in python) MUST output the same result for the 2D array (in C++) + gmm = GMMMachine(n_gaussians=2) + gmm.weights = load_array( + resource_filename("bob.learn.em", "data/weights.hdf5") + ) + gmm.means = load_array(resource_filename("bob.learn.em", "data/means.hdf5")) + gmm.variances = load_array( + resource_filename("bob.learn.em", "data/variances.hdf5") + ) - ll = 0 - for i in range(data.shape[0]): - ll += gmm.log_likelihood(data[i,:]) - ll /= data.shape[0] + ll = 0 + for i in range(data.shape[0]): + ll += gmm.log_likelihood(data[i, :]) + ll /= data.shape[0] - assert np.isclose(ll, gmm.log_likelihood(data).mean()) + assert np.isclose(ll, gmm.log_likelihood(data).mean()) def test_GMMStats_operations(): @@ -324,8 +357,14 @@ def test_GMMStats_operations(): new_expected_sumPxx = expected_sumPxx * 2 assert new_stats.n.shape == (n_gaussians,), new_stats.n.shape - assert new_stats.sum_px.shape == (n_gaussians, n_features), new_stats.sum_px.shape - assert new_stats.sum_pxx.shape == (n_gaussians, n_features), new_stats.sum_pxx.shape + assert new_stats.sum_px.shape == ( + n_gaussians, + n_features, + ), new_stats.sum_px.shape + assert new_stats.sum_pxx.shape == ( + n_gaussians, + n_features, + ), new_stats.sum_pxx.shape np.testing.assert_almost_equal(new_stats.log_likelihood, new_expected_ll) assert new_stats.t == data.shape[0] * 2 @@ -342,8 +381,14 @@ def test_GMMStats_operations(): new_expected_sumPxx += expected_sumPxx assert new_stats.n.shape == (n_gaussians,), new_stats.n.shape - assert new_stats.sum_px.shape == (n_gaussians, n_features), new_stats.sum_px.shape - assert new_stats.sum_pxx.shape == (n_gaussians, n_features), new_stats.sum_pxx.shape + assert new_stats.sum_px.shape == ( + n_gaussians, + n_features, + ), new_stats.sum_px.shape + assert new_stats.sum_pxx.shape == ( + n_gaussians, + n_features, + ), new_stats.sum_pxx.shape np.testing.assert_almost_equal(new_stats.log_likelihood, new_expected_ll) assert new_stats.t == data.shape[0] * 3 @@ -358,8 +403,12 @@ def test_machine_parameters(): machine = GMMMachine(n_gaussians) machine.means = np.repeat([[0], [1], [-1]], n_features, 1) machine.variances = np.ones_like(machine.means) - np.testing.assert_equal(machine.means, np.repeat([[0], [1], [-1]], n_features, 1)) - np.testing.assert_equal(machine.variances, np.ones((n_gaussians, n_features))) + np.testing.assert_equal( + machine.means, np.repeat([[0], [1], [-1]], n_features, 1) + ) + np.testing.assert_equal( + machine.variances, np.ones((n_gaussians, n_features)) + ) # Setters new_means = np.repeat([[1], [2], [3]], n_features, axis=1) @@ -376,12 +425,18 @@ def test_gmm_kmeans_plusplus_init(): n_gaussians = 3 machine = GMMMachine( n_gaussians, - k_means_trainer=KMeansMachine(n_clusters=n_gaussians, init_method="k-means++"), + k_means_trainer=KMeansMachine( + n_clusters=n_gaussians, init_method="k-means++" + ), + ) + data = np.array( + [[1.5, 1], [1, 1.5], [-1, 0.5], [-1.5, 0], [2, 2], [2.5, 2.5]] ) - data = np.array([[1.5, 1], [1, 1.5], [-1, 0.5], [-1.5, 0], [2, 2], [2.5, 2.5]]) machine = machine.fit(data) expected_means = np.array([[2.25, 2.25], [-1.25, 0.25], [1.25, 1.25]]) - expected_variances = np.array([[1/16, 1/16], [1/16, 1/16], [1/16, 1/16]]) + expected_variances = np.array( + [[1 / 16, 1 / 16], [1 / 16, 1 / 16], [1 / 16, 1 / 16]] + ) np.testing.assert_almost_equal(machine.means, expected_means, decimal=3) np.testing.assert_almost_equal(machine.variances, expected_variances) @@ -390,12 +445,18 @@ def test_gmm_kmeans_parallel_init(): n_gaussians = 3 machine = GMMMachine( n_gaussians, - k_means_trainer=KMeansMachine(n_clusters=n_gaussians, init_method="k-means||"), + k_means_trainer=KMeansMachine( + n_clusters=n_gaussians, init_method="k-means||" + ), + ) + data = np.array( + [[1.5, 1], [1, 1.5], [-1, 0.5], [-1.5, 0], [2, 2], [2.5, 2.5]] ) - data = np.array([[1.5, 1], [1, 1.5], [-1, 0.5], [-1.5, 0], [2, 2], [2.5, 2.5]]) machine = machine.fit(data) expected_means = np.array([[1.25, 1.25], [-1.25, 0.25], [2.25, 2.25]]) - expected_variances = np.array([[1/16, 1/16], [1/16, 1/16], [1/16, 1/16]]) + expected_variances = np.array( + [[1 / 16, 1 / 16], [1 / 16, 1 / 16], [1 / 16, 1 / 16]] + ) np.testing.assert_almost_equal(machine.means, expected_means, decimal=3) np.testing.assert_almost_equal(machine.variances, expected_variances) @@ -408,7 +469,12 @@ def test_likelihood(): machine.variances = np.ones_like(machine.means) log_likelihood = machine.log_likelihood(data) expected_ll = np.array( - [-3.6519900964986527, -3.83151883210222, -3.83151883210222, -5.344374066745753] + [ + -3.6519900964986527, + -3.83151883210222, + -3.83151883210222, + -5.344374066745753, + ] ) np.testing.assert_almost_equal(log_likelihood, expected_ll) @@ -418,11 +484,13 @@ def test_likelihood_variance(): n_gaussians = 3 machine = GMMMachine(n_gaussians) machine.means = np.repeat([[0], [1], [-1]], 3, 1) - machine.variances = np.array([ - [1.1, 1.2, 0.8], - [0.2, 0.4, 0.5], - [1, 1, 1], - ]) + machine.variances = np.array( + [ + [1.1, 1.2, 0.8], + [0.2, 0.4, 0.5], + [1, 1, 1], + ] + ) log_likelihood = machine.log_likelihood(data) expected_ll = np.array( [ @@ -444,7 +512,12 @@ def test_likelihood_weight(): machine.weights = [0.6, 0.1, 0.3] log_likelihood = machine.log_likelihood(data) expected_ll = np.array( - [-4.206596356117164, -3.492325679996329, -3.634745457950943, -6.49485678536014] + [ + -4.206596356117164, + -3.492325679996329, + -3.634745457950943, + -6.49485678536014, + ] ) np.testing.assert_almost_equal(log_likelihood, expected_ll) @@ -453,7 +526,9 @@ def test_GMMMachine_object(): n_gaussians = 5 machine = GMMMachine(n_gaussians) - default_weights = np.full(shape=(n_gaussians,), fill_value=1.0 / n_gaussians) + default_weights = np.full( + shape=(n_gaussians,), fill_value=1.0 / n_gaussians + ) default_log_weights = np.full( shape=(n_gaussians,), fill_value=np.log(1.0 / n_gaussians) ) @@ -464,12 +539,16 @@ def test_GMMMachine_object(): modified_weights = default_weights modified_weights[: n_gaussians // 2] = (1 / n_gaussians) / 2 - modified_weights[n_gaussians // 2 + n_gaussians % 2 :] = (1 / n_gaussians) * 1.5 + modified_weights[n_gaussians // 2 + n_gaussians % 2 :] = ( + 1 / n_gaussians + ) * 1.5 # Ensure setter works (log_weights is updated correctly) machine.weights = modified_weights np.testing.assert_almost_equal(machine.weights, modified_weights) - np.testing.assert_almost_equal(machine.log_weights, np.log(modified_weights)) + np.testing.assert_almost_equal( + machine.log_weights, np.log(modified_weights) + ) def test_ml_em(): @@ -478,19 +557,24 @@ def test_ml_em(): n_gaussians = 2 n_features = data.shape[-1] - machine = GMMMachine(n_gaussians, update_means=True, update_variances=True, update_weights=True) + machine = GMMMachine( + n_gaussians, + update_means=True, + update_variances=True, + update_weights=True, + ) machine.means = np.repeat([[2], [8]], n_features, 1) machine.variances = np.ones_like(machine.means) - stats = machine.e_step( data) + stats = machine.e_step(data) machine.m_step(stats) expected_means = np.array([[1.5, 1.5, 2.0], [7.0, 8.0, 8.0]]) np.testing.assert_almost_equal(machine.means, expected_means) - expected_weights = np.array([2/5, 3/5]) + expected_weights = np.array([2 / 5, 3 / 5]) np.testing.assert_almost_equal(machine.weights, expected_weights) eps = np.finfo(float).eps - expected_variances = np.array([[1/4, 1/4, eps], [eps, 2/3, 2/3]]) + expected_variances = np.array([[1 / 4, 1 / 4, eps], [eps, 2 / 3, 2 / 3]]) np.testing.assert_almost_equal(machine.variances, expected_variances) @@ -501,9 +585,18 @@ def test_map_em(): prior_machine.variances = np.ones_like(prior_machine.means) prior_machine.weights = np.array([0.5, 0.5]) - machine = GMMMachine(n_gaussians, trainer="map", ubm=prior_machine, update_means=True, update_variances=True, update_weights=True) + machine = GMMMachine( + n_gaussians, + trainer="map", + ubm=prior_machine, + update_means=True, + update_variances=True, + update_weights=True, + ) - post_data = np.array([[1, 2, 2], [2, 1, 2], [7, 8, 9], [7, 7, 8], [7, 9, 7]]) + post_data = np.array( + [[1, 2, 2], [2, 1, 2], [7, 8, 9], [7, 7, 8], [7, 9, 7]] + ) machine.initialize_gaussians(None) @@ -515,10 +608,9 @@ def test_map_em(): stats = machine.e_step(post_data) machine.m_step(stats) - expected_means = np.array([ - [1.83333333, 1.83333333, 2.], - [7.57142857, 8, 8] - ]) + expected_means = np.array( + [[1.83333333, 1.83333333, 2.0], [7.57142857, 8, 8]] + ) np.testing.assert_almost_equal(machine.means, expected_means) eps = np.finfo(float).eps expected_vars = np.array([[eps, eps, eps], [eps, eps, eps]]) @@ -533,7 +625,12 @@ def test_ml_transformer(): n_gaussians = 2 n_features = 3 - machine = GMMMachine(n_gaussians, update_means=True, update_variances=True, update_weights=True) + machine = GMMMachine( + n_gaussians, + update_means=True, + update_variances=True, + update_weights=True, + ) machine.means = np.array([[2, 2, 2], [8, 8, 8]]) machine.variances = np.ones_like(machine.means) @@ -541,10 +638,10 @@ def test_ml_transformer(): expected_means = np.array([[1.5, 1.5, 2.0], [7.0, 8.0, 8.0]]) np.testing.assert_almost_equal(machine.means, expected_means) - expected_weights = np.array([2/5, 3/5]) + expected_weights = np.array([2 / 5, 3 / 5]) np.testing.assert_almost_equal(machine.weights, expected_weights) eps = np.finfo(float).eps - expected_variances = np.array([[1/4, 1/4, eps], [eps, 2/3, 2/3]]) + expected_variances = np.array([[1 / 4, 1 / 4, eps], [eps, 2 / 3, 2 / 3]]) np.testing.assert_almost_equal(machine.variances, expected_variances) stats = machine.transform(test_data) @@ -561,7 +658,9 @@ def test_ml_transformer(): def test_map_transformer(): - post_data = np.array([[1, 2, 2], [2, 1, 2], [7, 8, 9], [7, 7, 8], [7, 9, 7]]) + post_data = np.array( + [[1, 2, 2], [2, 1, 2], [7, 8, 9], [7, 7, 8], [7, 9, 7]] + ) test_data = np.array([[1, 1, 1], [1, 1, 2], [8, 9, 9], [8, 8, 8]]) n_gaussians = 2 n_features = 3 @@ -570,14 +669,20 @@ def test_map_transformer(): prior_machine.variances = np.ones_like(prior_machine.means) prior_machine.weights = np.array([0.5, 0.5]) - machine = GMMMachine(n_gaussians, trainer="map", ubm=prior_machine, update_means=True, update_variances=True, update_weights=True) + machine = GMMMachine( + n_gaussians, + trainer="map", + ubm=prior_machine, + update_means=True, + update_variances=True, + update_weights=True, + ) machine = machine.fit(post_data) - expected_means = np.array([ - [1.83333333, 1.83333333, 2.], - [7.57142857, 8, 8] - ]) + expected_means = np.array( + [[1.83333333, 1.83333333, 2.0], [7.57142857, 8, 8]] + ) np.testing.assert_almost_equal(machine.means, expected_means) eps = np.finfo(float).eps expected_vars = np.array([[eps, eps, eps], [eps, eps, eps]]) @@ -589,7 +694,7 @@ def test_map_transformer(): expected_stats = GMMStats(n_gaussians, n_features) expected_stats.init_fields( - log_likelihood=-1.3837590691807108e+16, + log_likelihood=-1.3837590691807108e16, t=test_data.shape[0], n=np.array([2, 2], dtype=float), sum_px=np.array([[2, 2, 3], [16, 17, 17]], dtype=float), @@ -598,20 +703,30 @@ def test_map_transformer(): assert stats.is_similar_to(expected_stats) -## Tests from `test_em.py` +# Tests from `test_em.py` + def loadGMM(): gmm = GMMMachine(n_gaussians=2) - gmm.weights = load_array(resource_filename("bob.learn.em", "data/gmm.init_weights.hdf5")) - gmm.means = load_array(resource_filename("bob.learn.em", "data/gmm.init_means.hdf5")) - gmm.variances = load_array(resource_filename("bob.learn.em", "data/gmm.init_variances.hdf5")) + gmm.weights = load_array( + resource_filename("bob.learn.em", "data/gmm.init_weights.hdf5") + ) + gmm.means = load_array( + resource_filename("bob.learn.em", "data/gmm.init_means.hdf5") + ) + gmm.variances = load_array( + resource_filename("bob.learn.em", "data/gmm.init_variances.hdf5") + ) return gmm + def test_gmm_ML_1(): """Trains a GMMMachine with ML_GMMTrainer""" - ar = load_array(resource_filename("bob.learn.em", "data/faithful.torch3_f64.hdf5")) + ar = load_array( + resource_filename("bob.learn.em", "data/faithful.torch3_f64.hdf5") + ) gmm = loadGMM() # test rng handling @@ -632,19 +747,31 @@ def test_gmm_ML_1(): # Generate reference # gmm.save(HDF5File(resource_filename("bob.learn.em", "data/gmm_ML.hdf5"), "w")) - gmm_ref = GMMMachine.from_hdf5(HDF5File(resource_filename("bob.learn.em", "data/gmm_ML.hdf5"), "r")) + gmm_ref = GMMMachine.from_hdf5( + HDF5File(resource_filename("bob.learn.em", "data/gmm_ML.hdf5"), "r") + ) assert gmm == gmm_ref def test_gmm_ML_2(): """Trains a GMMMachine with ML_GMMTrainer; compares to a reference""" - ar = load_array(resource_filename("bob.learn.em", "data/dataNormalized.hdf5")) + ar = load_array( + resource_filename("bob.learn.em", "data/dataNormalized.hdf5") + ) # Initialize GMMMachine gmm = GMMMachine(n_gaussians=5) - gmm.means = load_array(resource_filename("bob.learn.em", "data/meansAfterKMeans.hdf5")).astype("float64") - gmm.variances = load_array(resource_filename("bob.learn.em", "data/variancesAfterKMeans.hdf5")).astype("float64") - gmm.weights = np.exp(load_array(resource_filename("bob.learn.em", "data/weightsAfterKMeans.hdf5")).astype("float64")) + gmm.means = load_array( + resource_filename("bob.learn.em", "data/meansAfterKMeans.hdf5") + ).astype("float64") + gmm.variances = load_array( + resource_filename("bob.learn.em", "data/variancesAfterKMeans.hdf5") + ).astype("float64") + gmm.weights = np.exp( + load_array( + resource_filename("bob.learn.em", "data/weightsAfterKMeans.hdf5") + ).astype("float64") + ) threshold = 0.001 gmm.variance_thresholds = threshold @@ -661,9 +788,15 @@ def test_gmm_ML_2(): gmm = gmm.fit(ar) # Test results # Load torch3vision reference - meansML_ref = load_array(resource_filename("bob.learn.em", "data/meansAfterML.hdf5")) - variancesML_ref = load_array(resource_filename("bob.learn.em", "data/variancesAfterML.hdf5")) - weightsML_ref = load_array(resource_filename("bob.learn.em", "data/weightsAfterML.hdf5")) + meansML_ref = load_array( + resource_filename("bob.learn.em", "data/meansAfterML.hdf5") + ) + variancesML_ref = load_array( + resource_filename("bob.learn.em", "data/variancesAfterML.hdf5") + ) + weightsML_ref = load_array( + resource_filename("bob.learn.em", "data/weightsAfterML.hdf5") + ) # Compare to current results np.testing.assert_allclose(gmm.means, meansML_ref, atol=3e-3) @@ -673,11 +806,18 @@ def test_gmm_ML_2(): def test_gmm_MAP_1(): """Train a GMMMachine with MAP_GMMTrainer""" - ar = load_array(resource_filename("bob.learn.em", "data/faithful.torch3_f64.hdf5")) + ar = load_array( + resource_filename("bob.learn.em", "data/faithful.torch3_f64.hdf5") + ) # test with rng - gmmprior = GMMMachine.from_hdf5(HDF5File(resource_filename("bob.learn.em", "data/gmm_ML.hdf5"), "r")) - gmm = GMMMachine.from_hdf5(HDF5File(resource_filename("bob.learn.em", "data/gmm_ML.hdf5"), "r"), ubm = gmmprior) + gmmprior = GMMMachine.from_hdf5( + HDF5File(resource_filename("bob.learn.em", "data/gmm_ML.hdf5"), "r") + ) + gmm = GMMMachine.from_hdf5( + HDF5File(resource_filename("bob.learn.em", "data/gmm_ML.hdf5"), "r"), + ubm=gmmprior, + ) gmm.update_means = True gmm.update_variances = False gmm.update_weights = False @@ -685,8 +825,13 @@ def test_gmm_MAP_1(): gmm.random_state = rng gmm = gmm.fit(ar) - gmmprior = GMMMachine.from_hdf5(HDF5File(resource_filename("bob.learn.em", "data/gmm_ML.hdf5"), "r")) - gmm = GMMMachine.from_hdf5(HDF5File(resource_filename("bob.learn.em", "data/gmm_ML.hdf5"), "r"), ubm = gmmprior) + gmmprior = GMMMachine.from_hdf5( + HDF5File(resource_filename("bob.learn.em", "data/gmm_ML.hdf5"), "r") + ) + gmm = GMMMachine.from_hdf5( + HDF5File(resource_filename("bob.learn.em", "data/gmm_ML.hdf5"), "r"), + ubm=gmmprior, + ) gmm.update_means = True gmm.update_variances = False gmm.update_weights = False @@ -696,7 +841,9 @@ def test_gmm_MAP_1(): # Generate reference # gmm.save(HDF5File(resource_filename("bob.learn.em", "data/gmm_MAP.hdf5"), "w")) - gmm_ref = GMMMachine.from_hdf5(HDF5File(resource_filename("bob.learn.em", "data/gmm_MAP.hdf5"), "r")) + gmm_ref = GMMMachine.from_hdf5( + HDF5File(resource_filename("bob.learn.em", "data/gmm_MAP.hdf5"), "r") + ) np.testing.assert_almost_equal(gmm.means, gmm_ref.means, decimal=3) np.testing.assert_almost_equal(gmm.variances, gmm_ref.variances, decimal=3) @@ -709,7 +856,9 @@ def test_gmm_MAP_2(): data = load_array(resource_filename("bob.learn.em", "data/data.hdf5")) data = data.reshape((1, -1)) # make a 2D array out of it means = load_array(resource_filename("bob.learn.em", "data/means.hdf5")) - variances = load_array(resource_filename("bob.learn.em", "data/variances.hdf5")) + variances = load_array( + resource_filename("bob.learn.em", "data/variances.hdf5") + ) weights = load_array(resource_filename("bob.learn.em", "data/weights.hdf5")) gmm = GMMMachine(n_gaussians=2) @@ -725,7 +874,7 @@ def test_gmm_MAP_2(): update_means=True, update_variances=False, update_weights=False, - mean_var_update_threshold=0., + mean_var_update_threshold=0.0, ) gmm_adapted.means = means gmm_adapted.variances = variances @@ -733,7 +882,9 @@ def test_gmm_MAP_2(): gmm_adapted = gmm_adapted.fit(data) - new_means = load_array(resource_filename("bob.learn.em", "data/new_adapted_mean.hdf5")) + new_means = load_array( + resource_filename("bob.learn.em", "data/new_adapted_mean.hdf5") + ) # Compare to matlab reference np.testing.assert_allclose(new_means.T, gmm_adapted.means, rtol=1e-4) @@ -746,9 +897,15 @@ def test_gmm_MAP_3(): # Initialize GMMMachine n_gaussians = 5 prior_gmm = GMMMachine(n_gaussians) - prior_gmm.means = load_array(resource_filename("bob.learn.em", "data/meansAfterML.hdf5")) - prior_gmm.variances = load_array(resource_filename("bob.learn.em", "data/variancesAfterML.hdf5")) - prior_gmm.weights = load_array(resource_filename("bob.learn.em", "data/weightsAfterML.hdf5")) + prior_gmm.means = load_array( + resource_filename("bob.learn.em", "data/meansAfterML.hdf5") + ) + prior_gmm.variances = load_array( + resource_filename("bob.learn.em", "data/variancesAfterML.hdf5") + ) + prior_gmm.weights = load_array( + resource_filename("bob.learn.em", "data/weightsAfterML.hdf5") + ) threshold = 0.001 prior_gmm.variance_thresholds = threshold @@ -775,9 +932,15 @@ def test_gmm_MAP_3(): # Test results # Load torch3vision reference - meansMAP_ref = load_array(resource_filename("bob.learn.em", "data/meansAfterMAP.hdf5")) - variancesMAP_ref = load_array(resource_filename("bob.learn.em", "data/variancesAfterMAP.hdf5")) - weightsMAP_ref = load_array(resource_filename("bob.learn.em", "data/weightsAfterMAP.hdf5")) + meansMAP_ref = load_array( + resource_filename("bob.learn.em", "data/meansAfterMAP.hdf5") + ) + variancesMAP_ref = load_array( + resource_filename("bob.learn.em", "data/variancesAfterMAP.hdf5") + ) + weightsMAP_ref = load_array( + resource_filename("bob.learn.em", "data/weightsAfterMAP.hdf5") + ) # Compare to current results # Gaps are quite large. This might be explained by the fact that there is no @@ -789,40 +952,57 @@ def test_gmm_MAP_3(): def test_gmm_test(): - """Tests a GMMMachine by computing scores against a model and comparing to a reference - """ + """Tests a GMMMachine by computing scores against a model and comparing to a reference""" ar = load_array(resource_filename("bob.learn.em", "data/dataforMAP.hdf5")) # Initialize GMMMachine n_gaussians = 5 gmm = GMMMachine(n_gaussians) - gmm.means = load_array(resource_filename("bob.learn.em", "data/meansAfterML.hdf5")) - gmm.variances = load_array(resource_filename("bob.learn.em", "data/variancesAfterML.hdf5")) - gmm.weights = load_array(resource_filename("bob.learn.em", "data/weightsAfterML.hdf5")) + gmm.means = load_array( + resource_filename("bob.learn.em", "data/meansAfterML.hdf5") + ) + gmm.variances = load_array( + resource_filename("bob.learn.em", "data/variancesAfterML.hdf5") + ) + gmm.weights = load_array( + resource_filename("bob.learn.em", "data/weightsAfterML.hdf5") + ) threshold = 0.001 gmm.variance_thresholds = threshold # Test against the model - score_mean_ref = -1.50379e+06 + score_mean_ref = -1.50379e06 score = gmm.log_likelihood(ar).sum() score /= len(ar) # Compare current results to torch3vision - assert abs(score-score_mean_ref)/score_mean_ref<1e-4 + assert abs(score - score_mean_ref) / score_mean_ref < 1e-4 def test_gmm_ML_dask(): """Trains a GMMMachine with dask array data; compares to a reference""" - ar = da.array(load_array(resource_filename("bob.learn.em", "data/dataNormalized.hdf5"))) + ar = da.array( + load_array( + resource_filename("bob.learn.em", "data/dataNormalized.hdf5") + ) + ) # Initialize GMMMachine gmm = GMMMachine(n_gaussians=5) - gmm.means = load_array(resource_filename("bob.learn.em", "data/meansAfterKMeans.hdf5")).astype("float64") - gmm.variances = load_array(resource_filename("bob.learn.em", "data/variancesAfterKMeans.hdf5")).astype("float64") - gmm.weights = np.exp(load_array(resource_filename("bob.learn.em", "data/weightsAfterKMeans.hdf5")).astype("float64")) + gmm.means = load_array( + resource_filename("bob.learn.em", "data/meansAfterKMeans.hdf5") + ).astype("float64") + gmm.variances = load_array( + resource_filename("bob.learn.em", "data/variancesAfterKMeans.hdf5") + ).astype("float64") + gmm.weights = np.exp( + load_array( + resource_filename("bob.learn.em", "data/weightsAfterKMeans.hdf5") + ).astype("float64") + ) threshold = 0.001 gmm.variance_thresholds = threshold @@ -840,25 +1020,40 @@ def test_gmm_ML_dask(): # Test results # Load torch3vision reference - meansML_ref = load_array(resource_filename("bob.learn.em", "data/meansAfterML.hdf5")) - variancesML_ref = load_array(resource_filename("bob.learn.em", "data/variancesAfterML.hdf5")) - weightsML_ref = load_array(resource_filename("bob.learn.em", "data/weightsAfterML.hdf5")) + meansML_ref = load_array( + resource_filename("bob.learn.em", "data/meansAfterML.hdf5") + ) + variancesML_ref = load_array( + resource_filename("bob.learn.em", "data/variancesAfterML.hdf5") + ) + weightsML_ref = load_array( + resource_filename("bob.learn.em", "data/weightsAfterML.hdf5") + ) # Compare to current results np.testing.assert_allclose(gmm.means, meansML_ref, atol=3e-3) np.testing.assert_allclose(gmm.variances, variancesML_ref, atol=3e-3) np.testing.assert_allclose(gmm.weights, weightsML_ref, atol=1e-4) + def test_gmm_MAP_dask(): """Test a GMMMachine for MAP with a dask array as data.""" - ar = da.array(load_array(resource_filename("bob.learn.em", "data/dataforMAP.hdf5"))) + ar = da.array( + load_array(resource_filename("bob.learn.em", "data/dataforMAP.hdf5")) + ) # Initialize GMMMachine n_gaussians = 5 prior_gmm = GMMMachine(n_gaussians) - prior_gmm.means = load_array(resource_filename("bob.learn.em", "data/meansAfterML.hdf5")) - prior_gmm.variances = load_array(resource_filename("bob.learn.em", "data/variancesAfterML.hdf5")) - prior_gmm.weights = load_array(resource_filename("bob.learn.em", "data/weightsAfterML.hdf5")) + prior_gmm.means = load_array( + resource_filename("bob.learn.em", "data/meansAfterML.hdf5") + ) + prior_gmm.variances = load_array( + resource_filename("bob.learn.em", "data/variancesAfterML.hdf5") + ) + prior_gmm.weights = load_array( + resource_filename("bob.learn.em", "data/weightsAfterML.hdf5") + ) threshold = 0.001 prior_gmm.variance_thresholds = threshold @@ -885,9 +1080,15 @@ def test_gmm_MAP_dask(): # Test results # Load torch3vision reference - meansMAP_ref = load_array(resource_filename("bob.learn.em", "data/meansAfterMAP.hdf5")) - variancesMAP_ref = load_array(resource_filename("bob.learn.em", "data/variancesAfterMAP.hdf5")) - weightsMAP_ref = load_array(resource_filename("bob.learn.em", "data/weightsAfterMAP.hdf5")) + meansMAP_ref = load_array( + resource_filename("bob.learn.em", "data/meansAfterMAP.hdf5") + ) + variancesMAP_ref = load_array( + resource_filename("bob.learn.em", "data/variancesAfterMAP.hdf5") + ) + weightsMAP_ref = load_array( + resource_filename("bob.learn.em", "data/weightsAfterMAP.hdf5") + ) # Compare to current results # Gaps are quite large. This might be explained by the fact that there is no diff --git a/bob/learn/em/test/test_kmeans.py b/bob/learn/em/test/test_kmeans.py index cbf5f0954e67d5f59b8387c51da66231e65a4a20..df066c89c945c18c2a860e20f9c441d2236e873c 100644 --- a/bob/learn/em/test/test_kmeans.py +++ b/bob/learn/em/test/test_kmeans.py @@ -15,7 +15,7 @@ import copy import dask.array as da import numpy as np -from bob.learn.em.cluster import KMeansMachine +from bob.learn.em import KMeansMachine def to_numpy(*args): @@ -41,7 +41,7 @@ def test_KMeansMachine(): means = np.array([[3, 70, 0], [4, 72, 0]], "float64") test_val = np.array([3, 70, 1], "float64") - test_arr = np.array([[3, 70, 1],[5, 72, 0]], "float64") + test_arr = np.array([[3, 70, 1], [5, 72, 0]], "float64") for transform in (to_numpy, to_dask_array): means, test_val, test_arr = transform(means, test_val, test_arr) @@ -59,17 +59,17 @@ def test_KMeansMachine(): np.testing.assert_equal(dist, np.array([[1.0]])) (indices, dists) = km.get_closest_centroid(test_arr) - np.testing.assert_equal(indices, np.array([0,1])) - np.testing.assert_equal(dists, np.array([[1,8],[6,1]])) + np.testing.assert_equal(indices, np.array([0, 1])) + np.testing.assert_equal(dists, np.array([[1, 8], [6, 1]])) index = km.predict(test_val) assert index == 0 indices = km.predict(test_arr) - np.testing.assert_equal(indices, np.array([0,1])) + np.testing.assert_equal(indices, np.array([0, 1])) np.testing.assert_equal(km.get_min_distance(test_val), np.array([1])) - np.testing.assert_equal(km.get_min_distance(test_arr), np.array([1,1])) + np.testing.assert_equal(km.get_min_distance(test_arr), np.array([1, 1])) # Check __eq__ and is_similar_to km2 = KMeansMachine(2) @@ -78,7 +78,7 @@ def test_KMeansMachine(): km2 = copy.deepcopy(km) assert km == km2 assert km.is_similar_to(km2) - km2.centroids_[0,0] += 1 + km2.centroids_[0, 0] += 1 assert km != km2 assert not km.is_similar_to(km2) @@ -90,7 +90,9 @@ def test_KMeansMachine_var_and_weight(): data = np.array([[1.0, 1], [1.2, 3], [0, 0], [0.3, 0.2], [0.2, 0]]) data = transform(data) - variances, weights = kmeans.get_variances_and_weights_for_each_cluster(data) + variances, weights = kmeans.get_variances_and_weights_for_each_cluster( + data + ) variances_result = np.array([[0.01, 1.0], [0.01555556, 0.00888889]]) weights_result = np.array([0.4, 0.6]) @@ -149,7 +151,9 @@ def test_kmeans_fit_init_pp(): for transform in (to_numpy, to_dask_array): data = transform(data) - machine = KMeansMachine(2, init_method="k-means++", random_state=0).fit(data) + machine = KMeansMachine(2, init_method="k-means++", random_state=0).fit( + data + ) centroids = machine.centroids_[np.argsort(machine.centroids_[:, 0])] expected = [ [-1.07173464, -1.06200356, -1.00724920], @@ -165,7 +169,9 @@ def test_kmeans_fit_init_random(): data = np.concatenate([data1, data2], axis=0) for transform in (to_numpy, to_dask_array): data = transform(data) - machine = KMeansMachine(2, init_method="random", random_state=0).fit(data) + machine = KMeansMachine(2, init_method="random", random_state=0).fit( + data + ) centroids = machine.centroids_[np.argsort(machine.centroids_[:, 0])] expected = [ [-1.07329460, -1.06207104, -1.00714365], @@ -173,6 +179,7 @@ def test_kmeans_fit_init_random(): ] np.testing.assert_almost_equal(centroids, expected, decimal=7) + def test_kmeans_parameters(): np.random.seed(0) data1 = np.random.normal(loc=1, size=(2000, 3)) diff --git a/bob/learn/em/test/test_linear.py b/bob/learn/em/test/test_linear.py index 98ad1fba33c151d5e3e9dd739ca583b26395f145..8a6c73b6d08329f00b506e8940f66e3139cdcd70 100644 --- a/bob/learn/em/test/test_linear.py +++ b/bob/learn/em/test/test_linear.py @@ -9,13 +9,9 @@ """Tests on the machine infrastructure. """ -import dask.array as da import numpy as np -from bob.learn.em.linear import ( - Whitening, - WCCN, -) +from bob.learn.em import WCCN, Whitening def run_whitening(with_dask): @@ -110,7 +106,9 @@ def run_wccn(with_dask): [18.76762201, -2.19719292, 2.1505817], ] ) - sample_wccn_ref = numerical_module.array([50.55905765, -0.83273618, 6.45174511]) + sample_wccn_ref = numerical_module.array( + [50.55905765, -0.83273618, 6.45174511] + ) # Runs WCCN (first method) t = WCCN() @@ -124,7 +122,7 @@ def run_wccn(with_dask): assert np.allclose(s, sample_wccn_ref, eps, eps) # Runs WCCN (second method) - m2 = t.fit(X, y) + t.fit(X, y) s2 = t.transform(sample) # Makes sure results are good diff --git a/bob/learn/em/test/test_linearscoring.py b/bob/learn/em/test/test_linearscoring.py index 55aa3f4d759f4050c8a73e4d57a5eacafcd5241d..0c0de7de20b7e5771aade717f148e24879357609 100644 --- a/bob/learn/em/test/test_linearscoring.py +++ b/bob/learn/em/test/test_linearscoring.py @@ -10,119 +10,178 @@ import numpy as np -from bob.learn.em.mixture import GMMMachine -from bob.learn.em.mixture import GMMStats -from bob.learn.em.mixture import linear_scoring +from bob.learn.em import GMMMachine, GMMStats, linear_scoring -def test_LinearScoring(): - ubm = GMMMachine(n_gaussians=2) - ubm.weights = np.array([0.5, 0.5], 'float64') - ubm.means = np.array([[3, 70], [4, 72]], 'float64') - ubm.variances = np.array([[1, 10], [2, 5]], 'float64') - ubm.variance_thresholds = np.array([[0, 0], [0, 0]], 'float64') - - model1 = GMMMachine(n_gaussians=2) - model1.weights = np.array([0.5, 0.5], 'float64') - model1.means = np.array([[1, 2], [3, 4]], 'float64') - model1.variances = np.array([[9, 10], [11, 12]], 'float64') - model1.variance_thresholds = np.array([[0, 0], [0, 0]], 'float64') - - model2 = GMMMachine(n_gaussians=2) - model2.weights = np.array([0.5, 0.5], 'float64') - model2.means = np.array([[5, 6], [7, 8]], 'float64') - model2.variances = np.array([[13, 14], [15, 16]], 'float64') - model2.variance_thresholds = np.array([[0, 0], [0, 0]], 'float64') - - stats1 = GMMStats(2, 2) - stats1.sum_px = np.array([[1, 2], [3, 4]], 'float64') - stats1.n = np.array([1, 2], 'float64') - stats1.t = 1+2 - - stats2 = GMMStats(2, 2) - stats2.sum_px = np.array([[5, 6], [7, 8]], 'float64') - stats2.n = np.array([3, 4], 'float64') - stats2.t = 3+4 - - stats3 = GMMStats(2, 2) - stats3.sum_px = np.array([[5, 6], [7, 3]], 'float64') - stats3.n = np.array([3, 4], 'float64') - stats3.t = 3+4 - - test_channeloffset = [np.array([[9, 8], [7, 6]], 'float64'), np.array([[5, 4], [3, 2]], 'float64'), np.array([[1, 0], [1, 2]], 'float64')] - - # Reference scores (from Idiap internal matlab implementation) - ref_scores_00 = np.array([[2372.9, 5207.7, 5275.7], [2215.7, 4868.1, 4932.1]], 'float64') - ref_scores_01 = np.array( [[790.9666666666667, 743.9571428571428, 753.6714285714285], [738.5666666666667, 695.4428571428572, 704.5857142857144]], 'float64') - ref_scores_10 = np.array([[2615.5, 5434.1, 5392.5], [2381.5, 4999.3, 5022.5]], 'float64') - ref_scores_11 = np.array([[871.8333333333332, 776.3000000000001, 770.3571428571427], [793.8333333333333, 714.1857142857143, 717.5000000000000]], 'float64') - - - # 1/ Use GMMMachines - # 1/a/ Without test_channelOffset, without frame-length normalisation - scores = linear_scoring([model1, model2], ubm, [stats1, stats2, stats3]) - np.testing.assert_almost_equal(scores, ref_scores_00, decimal=7) - - # 1/b/ Without test_channelOffset, with frame-length normalisation - scores = linear_scoring([model1, model2], ubm, [stats1, stats2, stats3], frame_length_normalization=True) - np.testing.assert_almost_equal(scores, ref_scores_01, decimal=7) - scores = linear_scoring([model1, model2], ubm, [stats1, stats2, stats3], 0, True) - np.testing.assert_almost_equal(scores, ref_scores_01, decimal=7) - - # 1/c/ With test_channelOffset, without frame-length normalisation - scores = linear_scoring([model1, model2], ubm, [stats1, stats2, stats3], test_channeloffset) - np.testing.assert_almost_equal(scores, ref_scores_10, decimal=7) - - # 1/d/ With test_channelOffset, with frame-length normalisation - scores = linear_scoring([model1, model2], ubm, [stats1, stats2, stats3], test_channeloffset, frame_length_normalization=True) - np.testing.assert_almost_equal(scores, ref_scores_11, decimal=7) - - - # 2/ Use means instead of models - # 2/a/ Without test_channelOffset, without frame-length normalisation - scores = linear_scoring([model1.means, model2.means], ubm, [stats1, stats2, stats3]) - assert (abs(scores - ref_scores_00) < 1e-7).all() - - # 2/b/ Without test_channelOffset, with frame-length normalisation - scores = linear_scoring([model1.means, model2.means], ubm, [stats1, stats2, stats3], frame_length_normalization=True) - assert (abs(scores - ref_scores_01) < 1e-7).all() - - # 2/c/ With test_channelOffset, without frame-length normalisation - scores = linear_scoring([model1.means, model2.means], ubm, [stats1, stats2, stats3], test_channeloffset) - assert (abs(scores - ref_scores_10) < 1e-7).all() - - # 2/d/ With test_channelOffset, with frame-length normalisation - scores = linear_scoring([model1.means, model2.means], ubm, [stats1, stats2, stats3], test_channeloffset, frame_length_normalization=True) - assert (abs(scores - ref_scores_11) < 1e-7).all() - - - # 3/ Using single model/sample - # 3/a/ without frame-length normalisation - score = linear_scoring(model1.means, ubm, stats1, test_channeloffset[0]) - np.testing.assert_almost_equal(score, ref_scores_10[0,0], decimal=7) - score = linear_scoring(model1.means, ubm, stats2, test_channeloffset[1]) - np.testing.assert_almost_equal(score, ref_scores_10[0,1], decimal=7) - score = linear_scoring(model1.means, ubm, stats3, test_channeloffset[2]) - np.testing.assert_almost_equal(score, ref_scores_10[0,2], decimal=7) - score = linear_scoring(model2.means, ubm, stats1, test_channeloffset[0]) - np.testing.assert_almost_equal(score, ref_scores_10[1,0], decimal=7) - score = linear_scoring(model2.means, ubm, stats2, test_channeloffset[1]) - np.testing.assert_almost_equal(score, ref_scores_10[1,1], decimal=7) - score = linear_scoring(model2.means, ubm, stats3, test_channeloffset[2]) - np.testing.assert_almost_equal(score, ref_scores_10[1,2], decimal=7) - - - # 3/b/ with frame-length normalisation - score = linear_scoring(model1.means, ubm, stats1, test_channeloffset[0], True) - np.testing.assert_almost_equal(score, ref_scores_11[0,0], decimal=7) - score = linear_scoring(model1.means, ubm, stats2, test_channeloffset[1], True) - np.testing.assert_almost_equal(score, ref_scores_11[0,1], decimal=7) - score = linear_scoring(model1.means, ubm, stats3, test_channeloffset[2], True) - np.testing.assert_almost_equal(score, ref_scores_11[0,2], decimal=7) - score = linear_scoring(model2.means, ubm, stats1, test_channeloffset[0], True) - np.testing.assert_almost_equal(score, ref_scores_11[1,0], decimal=7) - score = linear_scoring(model2.means, ubm, stats2, test_channeloffset[1], True) - np.testing.assert_almost_equal(score, ref_scores_11[1,1], decimal=7) - score = linear_scoring(model2.means, ubm, stats3, test_channeloffset[2], True) - np.testing.assert_almost_equal(score, ref_scores_11[1,2], decimal=7) +def test_LinearScoring(): + ubm = GMMMachine(n_gaussians=2) + ubm.weights = np.array([0.5, 0.5], "float64") + ubm.means = np.array([[3, 70], [4, 72]], "float64") + ubm.variances = np.array([[1, 10], [2, 5]], "float64") + ubm.variance_thresholds = np.array([[0, 0], [0, 0]], "float64") + + model1 = GMMMachine(n_gaussians=2) + model1.weights = np.array([0.5, 0.5], "float64") + model1.means = np.array([[1, 2], [3, 4]], "float64") + model1.variances = np.array([[9, 10], [11, 12]], "float64") + model1.variance_thresholds = np.array([[0, 0], [0, 0]], "float64") + + model2 = GMMMachine(n_gaussians=2) + model2.weights = np.array([0.5, 0.5], "float64") + model2.means = np.array([[5, 6], [7, 8]], "float64") + model2.variances = np.array([[13, 14], [15, 16]], "float64") + model2.variance_thresholds = np.array([[0, 0], [0, 0]], "float64") + + stats1 = GMMStats(2, 2) + stats1.sum_px = np.array([[1, 2], [3, 4]], "float64") + stats1.n = np.array([1, 2], "float64") + stats1.t = 1 + 2 + + stats2 = GMMStats(2, 2) + stats2.sum_px = np.array([[5, 6], [7, 8]], "float64") + stats2.n = np.array([3, 4], "float64") + stats2.t = 3 + 4 + + stats3 = GMMStats(2, 2) + stats3.sum_px = np.array([[5, 6], [7, 3]], "float64") + stats3.n = np.array([3, 4], "float64") + stats3.t = 3 + 4 + + test_channeloffset = [ + np.array([[9, 8], [7, 6]], "float64"), + np.array([[5, 4], [3, 2]], "float64"), + np.array([[1, 0], [1, 2]], "float64"), + ] + + # Reference scores (from Idiap internal matlab implementation) + ref_scores_00 = np.array( + [[2372.9, 5207.7, 5275.7], [2215.7, 4868.1, 4932.1]], "float64" + ) + ref_scores_01 = np.array( + [ + [790.9666666666667, 743.9571428571428, 753.6714285714285], + [738.5666666666667, 695.4428571428572, 704.5857142857144], + ], + "float64", + ) + ref_scores_10 = np.array( + [[2615.5, 5434.1, 5392.5], [2381.5, 4999.3, 5022.5]], "float64" + ) + ref_scores_11 = np.array( + [ + [871.8333333333332, 776.3000000000001, 770.3571428571427], + [793.8333333333333, 714.1857142857143, 717.5000000000000], + ], + "float64", + ) + + # 1/ Use GMMMachines + # 1/a/ Without test_channelOffset, without frame-length normalisation + scores = linear_scoring([model1, model2], ubm, [stats1, stats2, stats3]) + np.testing.assert_almost_equal(scores, ref_scores_00, decimal=7) + + # 1/b/ Without test_channelOffset, with frame-length normalisation + scores = linear_scoring( + [model1, model2], + ubm, + [stats1, stats2, stats3], + frame_length_normalization=True, + ) + np.testing.assert_almost_equal(scores, ref_scores_01, decimal=7) + scores = linear_scoring( + [model1, model2], ubm, [stats1, stats2, stats3], 0, True + ) + np.testing.assert_almost_equal(scores, ref_scores_01, decimal=7) + + # 1/c/ With test_channelOffset, without frame-length normalisation + scores = linear_scoring( + [model1, model2], ubm, [stats1, stats2, stats3], test_channeloffset + ) + np.testing.assert_almost_equal(scores, ref_scores_10, decimal=7) + + # 1/d/ With test_channelOffset, with frame-length normalisation + scores = linear_scoring( + [model1, model2], + ubm, + [stats1, stats2, stats3], + test_channeloffset, + frame_length_normalization=True, + ) + np.testing.assert_almost_equal(scores, ref_scores_11, decimal=7) + + # 2/ Use means instead of models + # 2/a/ Without test_channelOffset, without frame-length normalisation + scores = linear_scoring( + [model1.means, model2.means], ubm, [stats1, stats2, stats3] + ) + assert (abs(scores - ref_scores_00) < 1e-7).all() + + # 2/b/ Without test_channelOffset, with frame-length normalisation + scores = linear_scoring( + [model1.means, model2.means], + ubm, + [stats1, stats2, stats3], + frame_length_normalization=True, + ) + assert (abs(scores - ref_scores_01) < 1e-7).all() + + # 2/c/ With test_channelOffset, without frame-length normalisation + scores = linear_scoring( + [model1.means, model2.means], + ubm, + [stats1, stats2, stats3], + test_channeloffset, + ) + assert (abs(scores - ref_scores_10) < 1e-7).all() + + # 2/d/ With test_channelOffset, with frame-length normalisation + scores = linear_scoring( + [model1.means, model2.means], + ubm, + [stats1, stats2, stats3], + test_channeloffset, + frame_length_normalization=True, + ) + assert (abs(scores - ref_scores_11) < 1e-7).all() + + # 3/ Using single model/sample + # 3/a/ without frame-length normalisation + score = linear_scoring(model1.means, ubm, stats1, test_channeloffset[0]) + np.testing.assert_almost_equal(score, ref_scores_10[0, 0], decimal=7) + score = linear_scoring(model1.means, ubm, stats2, test_channeloffset[1]) + np.testing.assert_almost_equal(score, ref_scores_10[0, 1], decimal=7) + score = linear_scoring(model1.means, ubm, stats3, test_channeloffset[2]) + np.testing.assert_almost_equal(score, ref_scores_10[0, 2], decimal=7) + score = linear_scoring(model2.means, ubm, stats1, test_channeloffset[0]) + np.testing.assert_almost_equal(score, ref_scores_10[1, 0], decimal=7) + score = linear_scoring(model2.means, ubm, stats2, test_channeloffset[1]) + np.testing.assert_almost_equal(score, ref_scores_10[1, 1], decimal=7) + score = linear_scoring(model2.means, ubm, stats3, test_channeloffset[2]) + np.testing.assert_almost_equal(score, ref_scores_10[1, 2], decimal=7) + + # 3/b/ with frame-length normalisation + score = linear_scoring( + model1.means, ubm, stats1, test_channeloffset[0], True + ) + np.testing.assert_almost_equal(score, ref_scores_11[0, 0], decimal=7) + score = linear_scoring( + model1.means, ubm, stats2, test_channeloffset[1], True + ) + np.testing.assert_almost_equal(score, ref_scores_11[0, 1], decimal=7) + score = linear_scoring( + model1.means, ubm, stats3, test_channeloffset[2], True + ) + np.testing.assert_almost_equal(score, ref_scores_11[0, 2], decimal=7) + score = linear_scoring( + model2.means, ubm, stats1, test_channeloffset[0], True + ) + np.testing.assert_almost_equal(score, ref_scores_11[1, 0], decimal=7) + score = linear_scoring( + model2.means, ubm, stats2, test_channeloffset[1], True + ) + np.testing.assert_almost_equal(score, ref_scores_11[1, 1], decimal=7) + score = linear_scoring( + model2.means, ubm, stats3, test_channeloffset[2], True + ) + np.testing.assert_almost_equal(score, ref_scores_11[1, 2], decimal=7) diff --git a/bob/learn/em/test/test_picklability.py b/bob/learn/em/test/test_picklability.py index f83c8c9541dce817b84a9300496301141dc7e1be..e4862a5ed2ea0596a4d842c2b4b5fb617985d138 100644 --- a/bob/learn/em/test/test_picklability.py +++ b/bob/learn/em/test/test_picklability.py @@ -6,13 +6,13 @@ import pickle import numpy -from bob.learn.em.cluster import KMeansMachine +from bob.learn.em import KMeansMachine + def test_kmeans_machine(): # Test a KMeansMachine means = numpy.array([[3, 70, 0], [4, 72, 0]], "float64") - mean = numpy.array([3, 70, 1], "float64") # Initializes a KMeansMachine kmeans_machine = KMeansMachine(2, 3) diff --git a/bob/learn/em/linear/wccn.py b/bob/learn/em/wccn.py similarity index 83% rename from bob/learn/em/linear/wccn.py rename to bob/learn/em/wccn.py index 5d0dbaa1e92d820258b6ea69d4a584a41fdeb131..7c2c8246c4f70c80f3ae5499ce70db65c8976f65 100644 --- a/bob/learn/em/linear/wccn.py +++ b/bob/learn/em/wccn.py @@ -1,11 +1,8 @@ -from sklearn.base import BaseEstimator -from sklearn.base import TransformerMixin - - import dask # Dask doesn't have an implementation for `pinv` from scipy.linalg import pinv +from sklearn.base import BaseEstimator, TransformerMixin class WCCN(TransformerMixin, BaseEstimator): @@ -42,10 +39,12 @@ class WCCN(TransformerMixin, BaseEstimator): # CHECKING THE TYPES if isinstance(X, dask.array.Array): import dask.array as numerical_module - from dask.array.linalg import inv, cholesky + + from dask.array.linalg import cholesky, inv else: import numpy as numerical_module - from scipy.linalg import inv, cholesky + + from scipy.linalg import cholesky, inv possible_labels = set(y) y_ = numerical_module.array(y) @@ -55,17 +54,19 @@ class WCCN(TransformerMixin, BaseEstimator): # 1. compute the means for each label mu_l = numerical_module.array( [ - numerical_module.mean(X[numerical_module.where(y_ == l)[0]], axis=0) - for l in possible_labels + numerical_module.mean( + X[numerical_module.where(y_ == label)[0]], axis=0 + ) + for label in possible_labels ] ) # 2. Compute Sw Sw = numerical_module.zeros((X.shape[1], X.shape[1]), dtype=float) - for l in possible_labels: - indexes = numerical_module.where(y_ == l)[0] - X_l_mu_l = X[indexes] - mu_l[l] + for label in possible_labels: + indexes = numerical_module.where(y_ == label)[0] + X_l_mu_l = X[indexes] - mu_l[label] Sw += X_l_mu_l.T @ X_l_mu_l diff --git a/bob/learn/em/linear/whitening.py b/bob/learn/em/whitening.py similarity index 91% rename from bob/learn/em/linear/whitening.py rename to bob/learn/em/whitening.py index dd1046069f23ab33fc2e2f50640e0b8a026a919b..3565f4fb2397bb7aadcd5e81727e79562056270e 100644 --- a/bob/learn/em/linear/whitening.py +++ b/bob/learn/em/whitening.py @@ -1,9 +1,8 @@ -from sklearn.base import BaseEstimator -from sklearn.base import TransformerMixin -import numpy as np -from scipy.linalg import pinv import dask +from scipy.linalg import pinv +from sklearn.base import BaseEstimator, TransformerMixin + class Whitening(TransformerMixin, BaseEstimator): """ @@ -46,11 +45,13 @@ class Whitening(TransformerMixin, BaseEstimator): # CHECKING THE TYPES if isinstance(X, dask.array.Array): import dask.array as numerical_module - from dask.array.linalg import inv, cholesky + + from dask.array.linalg import cholesky, inv else: import numpy as numerical_module - from scipy.linalg import inv, cholesky + + from scipy.linalg import cholesky, inv # 1. Computes the mean vector and the covariance matrix of the training set mu = numerical_module.mean(X, axis=0) diff --git a/buildout.cfg b/buildout.cfg index 3b7a446fdd70c7e5b29dfdfd214a9fafd0ff6f06..d3c5cee7a448e29f08c7627e5a14591ec2e73122 100644 --- a/buildout.cfg +++ b/buildout.cfg @@ -10,4 +10,4 @@ newest = false verbose = true [scripts] -recipe = bob.buildout:scripts \ No newline at end of file +recipe = bob.buildout:scripts diff --git a/conda/meta.yaml b/conda/meta.yaml index 8ba3b142709c9ef386eddd185f79ca6442567161..319c7e6c80577d197daa9e6bb23869b86fe3e7ea 100644 --- a/conda/meta.yaml +++ b/conda/meta.yaml @@ -26,11 +26,6 @@ requirements: - setuptools {{ setuptools }} - pip {{ pip }} - bob.extension - - bob.blitz - - bob.io.base - - bob.sp - - bob.learn.activation - - bob.learn.linear - numpy {{ numpy }} - dask {{ dask }} - dask-ml {{ dask_ml }} diff --git a/develop.cfg b/develop.cfg index 8058224ef600e2139e69947bd2f99842e5fb1e75..d1af433667412a7b00b81e1fd07f229cd723d970 100644 --- a/develop.cfg +++ b/develop.cfg @@ -10,10 +10,6 @@ extensions = bob.buildout auto-checkout = * develop = src/bob.extension - src/bob.io.base - src/bob.sp - src/bob.learn.activation - src/bob.learn.linear . ; options for bob.buildout extension @@ -23,10 +19,6 @@ newest = false [sources] bob.extension = git https://gitlab.idiap.ch/bob/bob.extension -bob.io.base = git https://gitlab.idiap.ch/bob/bob.io.base -bob.sp = git https://gitlab.idiap.ch/bob/bob.sp -bob.learn.activation = git https://gitlab.idiap.ch/bob/bob.learn.activation -bob.learn.linear = git https://gitlab.idiap.ch/bob/bob.learn.linear [scripts] recipe = bob.buildout:scripts diff --git a/doc/conf.py b/doc/conf.py index 5075261b4e6c5bdbf5df7333bb2d8ec07a857a27..dd19c99be128f833ccb7c093637a6547e7e14c94 100755 --- a/doc/conf.py +++ b/doc/conf.py @@ -2,32 +2,30 @@ # vim: set fileencoding=utf-8 : import os -import sys -import glob -import pkg_resources +import pkg_resources # -- General configuration ----------------------------------------------------- # If your documentation needs a minimal Sphinx version, state it here. -needs_sphinx = '1.3' +needs_sphinx = "1.3" # Add any Sphinx extension module names here, as strings. They can be extensions # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. extensions = [ - 'sphinx.ext.todo', - 'sphinx.ext.coverage', - 'sphinx.ext.ifconfig', - 'sphinx.ext.autodoc', - 'sphinx.ext.autosummary', - 'sphinx.ext.doctest', - 'sphinx.ext.graphviz', - 'sphinx.ext.intersphinx', - 'sphinx.ext.napoleon', - 'sphinx.ext.viewcode', - 'sphinx.ext.mathjax', - 'matplotlib.sphinxext.plot_directive' - ] + "sphinx.ext.todo", + "sphinx.ext.coverage", + "sphinx.ext.ifconfig", + "sphinx.ext.autodoc", + "sphinx.ext.autosummary", + "sphinx.ext.doctest", + "sphinx.ext.graphviz", + "sphinx.ext.intersphinx", + "sphinx.ext.napoleon", + "sphinx.ext.viewcode", + "sphinx.ext.mathjax", + "matplotlib.sphinxext.plot_directive", +] # Be picky about warnings nitpicky = False @@ -36,13 +34,13 @@ nitpicky = False nitpick_ignore = [] # Allows the user to override warnings from a separate file -if os.path.exists('nitpick-exceptions.txt'): - for line in open('nitpick-exceptions.txt'): +if os.path.exists("nitpick-exceptions.txt"): + for line in open("nitpick-exceptions.txt"): if line.strip() == "" or line.startswith("#"): continue dtype, target = line.split(None, 1) target = target.strip() - try: # python 2.x + try: # python 2.x target = unicode(target) except NameError: pass @@ -58,25 +56,27 @@ autosummary_generate = True numfig = True # If we are on OSX, the 'dvipng' path maybe different -dvipng_osx = '/opt/local/libexec/texlive/binaries/dvipng' -if os.path.exists(dvipng_osx): pngmath_dvipng = dvipng_osx +dvipng_osx = "/opt/local/libexec/texlive/binaries/dvipng" +if os.path.exists(dvipng_osx): + pngmath_dvipng = dvipng_osx # Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] +templates_path = ["_templates"] # The suffix of source filenames. -source_suffix = '.rst' +source_suffix = ".rst" # The encoding of source files. -#source_encoding = 'utf-8-sig' +# source_encoding = 'utf-8-sig' # The master toctree document. -master_doc = 'index' +master_doc = "index" # General information about the project. -project = u'bob.learn.em' +project = u"bob.learn.em" import time -copyright = u'%s, Idiap Research Institute' % time.strftime('%Y') + +copyright = u"%s, Idiap Research Institute" % time.strftime("%Y") # Grab the setup entry distribution = pkg_resources.require(project)[0] @@ -92,42 +92,42 @@ release = distribution.version # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. -#language = None +# language = None # There are two options for replacing |today|: either, you set today to some # non-false value, then it is used: -#today = '' +# today = '' # Else, today_fmt is used as the format for a strftime call. -#today_fmt = '%B %d, %Y' +# today_fmt = '%B %d, %Y' # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. -exclude_patterns = ['links.rst'] +exclude_patterns = ["links.rst"] # The reST default role (used for this markup: `text`) to use for all documents. -#default_role = None +# default_role = None # If true, '()' will be appended to :func: etc. cross-reference text. -#add_function_parentheses = True +# add_function_parentheses = True # If true, the current module name will be prepended to all description # unit titles (such as .. function::). -#add_module_names = True +# add_module_names = True # If true, sectionauthor and moduleauthor directives will be shown in the # output. They are ignored by default. -#show_authors = False +# show_authors = False # The name of the Pygments (syntax highlighting) style to use. -pygments_style = 'sphinx' +pygments_style = "sphinx" # A list of ignored prefixes for module index sorting. -#modindex_common_prefix = [] +# modindex_common_prefix = [] # Some variables which are useful for generated material -project_variable = project.replace('.', '_') -short_description = u'Core utilities required on all Bob modules' -owner = [u'Idiap Research Institute'] +project_variable = project.replace(".", "_") +short_description = u"Core utilities required on all Bob modules" +owner = [u"Idiap Research Institute"] # -- Options for HTML output --------------------------------------------------- @@ -135,80 +135,81 @@ owner = [u'Idiap Research Institute'] # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. import sphinx_rtd_theme -html_theme = 'sphinx_rtd_theme' + +html_theme = "sphinx_rtd_theme" # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. -#html_theme_options = {} +# html_theme_options = {} # Add any paths that contain custom themes here, relative to this directory. html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] # The name for this set of Sphinx documents. If None, it defaults to # "<project> v<release> documentation". -#html_title = None +# html_title = None # A shorter title for the navigation bar. Default is the same as html_title. -#html_short_title = project_variable +# html_short_title = project_variable # The name of an image file (relative to this directory) to place at the top # of the sidebar. -html_logo = 'img/logo.png' +html_logo = "img/logo.png" # The name of an image file (within the static path) to use as favicon of the # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 # pixels large. -html_favicon = 'img/favicon.ico' +html_favicon = "img/favicon.ico" # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -#html_static_path = ['_static'] +# html_static_path = ['_static'] # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, # using the given strftime format. -#html_last_updated_fmt = '%b %d, %Y' +# html_last_updated_fmt = '%b %d, %Y' # If true, SmartyPants will be used to convert quotes and dashes to # typographically correct entities. -#html_use_smartypants = True +# html_use_smartypants = True # Custom sidebar templates, maps document names to template names. -#html_sidebars = {} +# html_sidebars = {} # Additional templates that should be rendered to pages, maps page names to # template names. -#html_additional_pages = {} +# html_additional_pages = {} # If false, no module index is generated. -#html_domain_indices = True +# html_domain_indices = True # If false, no index is generated. -#html_use_index = True +# html_use_index = True # If true, the index is split into individual pages for each letter. -#html_split_index = False +# html_split_index = False # If true, links to the reST sources are added to the pages. -#html_show_sourcelink = True +# html_show_sourcelink = True # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. -#html_show_sphinx = True +# html_show_sphinx = True # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. -#html_show_copyright = True +# html_show_copyright = True # If true, an OpenSearch description file will be output, and all pages will # contain a <link> tag referring to it. The value of this option must be the # base URL from which the finished HTML is served. -#html_use_opensearch = '' +# html_use_opensearch = '' # This is the file name suffix for HTML files (e.g. ".xhtml"). -#html_file_suffix = None +# html_file_suffix = None # Output file base name for HTML help builder. -htmlhelp_basename = project_variable + u'_doc' +htmlhelp_basename = project_variable + u"_doc" # -- Post configuration -------------------------------------------------------- @@ -218,26 +219,27 @@ rst_epilog = """ .. |project| replace:: Bob .. |version| replace:: %s .. |current-year| date:: %%Y -""" % (version,) +""" % ( + version, +) # Default processing flags for sphinx -autoclass_content = 'class' -autodoc_member_order = 'bysource' +autoclass_content = "class" +autodoc_member_order = "bysource" autodoc_default_options = { - "members": True, - "undoc-members": True, - "show-inheritance": True, + "members": True, + "undoc-members": True, + "show-inheritance": True, } # For inter-documentation mapping: from bob.extension.utils import link_documentation, load_requirements + sphinx_requirements = "extra-intersphinx.txt" if os.path.exists(sphinx_requirements): intersphinx_mapping = link_documentation( - additional_packages=['python', 'numpy'] + \ - load_requirements(sphinx_requirements) + additional_packages=["python", "numpy"] + + load_requirements(sphinx_requirements) ) else: intersphinx_mapping = link_documentation() - - diff --git a/doc/guide.rst b/doc/guide.rst index b91540e614bcc586b529f0a0abfc5a6472b4e05b..82d0b95ac172b6e14aa8ccb8220fc2b6405632ce 100644 --- a/doc/guide.rst +++ b/doc/guide.rst @@ -49,16 +49,16 @@ between :math:`Js` of successive iterations are lower than a convergence threshold. In this implementation, the training consists in the definition of the -statistical model, called machine, (:py:class:`bob.learn.em.cluster.KMeansMachine`) and +statistical model, called machine, (:py:class:`bob.learn.em.KMeansMachine`) and this statistical model is learned by executing the -:py:meth:`~bob.learn.em.cluster.KMeansMachine.fit` method. +:py:meth:`~bob.learn.em.KMeansMachine.fit` method. Follow bellow an snippet on how to train a KMeans using Bob_. .. doctest:: :options: +NORMALIZE_WHITESPACE - >>> from bob.learn.em.cluster import KMeansMachine + >>> from bob.learn.em import KMeansMachine >>> import numpy >>> data = numpy.array( ... [[3,-3,100], @@ -718,5 +718,3 @@ computed, which is defined in more formal way by: .. [8] http://en.wikipedia.org/wiki/Expectation-maximization_algorithm .. [9] http://en.wikipedia.org/wiki/Maximum_likelihood .. [10] http://en.wikipedia.org/wiki/Maximum_a_posteriori_estimation - - diff --git a/doc/nitpick-exceptions.txt b/doc/nitpick-exceptions.txt index c8f669868414e81396f5889679db8b7bcdbf8ea5..3497bd4629af39b5d007af077e42c51ea11d609a 100644 --- a/doc/nitpick-exceptions.txt +++ b/doc/nitpick-exceptions.txt @@ -1,3 +1,3 @@ py:class bob.learn.em.GMMStats.n py:class bob.learn.em.GMMStats.sum_px -py:class bob.learn.em.GMMStats.sum_pxx \ No newline at end of file +py:class bob.learn.em.GMMStats.sum_pxx diff --git a/doc/plot/plot_ML.py b/doc/plot/plot_ML.py index dd6857ca93927beef4b198e53ab2d81eb666bbb9..4717249139eef6dce906d41341768fd0fe41ab80 100644 --- a/doc/plot/plot_ML.py +++ b/doc/plot/plot_ML.py @@ -1,12 +1,14 @@ -from bob.learn.em.mixture import GMMMachine -import bob.db.iris -import numpy +import logging + import matplotlib.pyplot as plt +import numpy -from matplotlib.patches import Ellipse from matplotlib.lines import Line2D +from matplotlib.patches import Ellipse -import logging +import bob.db.iris + +from bob.learn.em import GMMMachine logger = logging.getLogger("bob.learn.em") logger.setLevel("DEBUG") @@ -41,7 +43,9 @@ machine = machine.fit(data) # Plotting figure, ax = plt.subplots() ax.scatter(setosa[:, 0], setosa[:, 1], c="darkcyan", label="setosa") -ax.scatter(versicolor[:, 0], versicolor[:, 1], c="goldenrod", label="versicolor") +ax.scatter( + versicolor[:, 0], versicolor[:, 1], c="goldenrod", label="versicolor" +) ax.scatter(virginica[:, 0], virginica[:, 1], c="dimgrey", label="virginica") ax.scatter( machine.means[:, 0], @@ -57,7 +61,9 @@ for mean, variance in zip(machine.means, machine.variances): eigvals, eigvecs = numpy.linalg.eig(numpy.diag(variance)) axis = numpy.sqrt(eigvals) * numpy.sqrt(5.991) angle = 180.0 * numpy.arctan(eigvecs[1][0] / eigvecs[1][1]) / numpy.pi - ax.add_patch(Ellipse(mean, *axis, angle=angle, linewidth=1, fill=False, zorder=2)) + ax.add_patch( + Ellipse(mean, *axis, angle=angle, linewidth=1, fill=False, zorder=2) + ) # Plot details (legend, axis labels) plt.legend( diff --git a/doc/plot/plot_kmeans.py b/doc/plot/plot_kmeans.py index ccafc146e6cda674c2dcad3249eb0770318f5976..af565c119fb164cc22cf2f57e32f5390ac3276cc 100644 --- a/doc/plot/plot_kmeans.py +++ b/doc/plot/plot_kmeans.py @@ -1,15 +1,20 @@ -from bob.learn.em.cluster import KMeansMachine -import bob.db.iris -import numpy import matplotlib.pyplot as plt +import numpy + +import bob.db.iris + +from bob.learn.em import KMeansMachine data_per_class = bob.db.iris.data() setosa = numpy.column_stack( - (data_per_class['setosa'][:, 0], data_per_class['setosa'][:, 3])) + (data_per_class["setosa"][:, 0], data_per_class["setosa"][:, 3]) +) versicolor = numpy.column_stack( - (data_per_class['versicolor'][:, 0], data_per_class['versicolor'][:, 3])) + (data_per_class["versicolor"][:, 0], data_per_class["versicolor"][:, 3]) +) virginica = numpy.column_stack( - (data_per_class['virginica'][:, 0], data_per_class['virginica'][:, 3])) + (data_per_class["virginica"][:, 0], data_per_class["virginica"][:, 3]) +) data = numpy.vstack((setosa, versicolor, virginica)) @@ -21,15 +26,19 @@ predictions = machine.predict(data) # Plotting figure, ax = plt.subplots() -plt.scatter(setosa[:, 0], - setosa[:, 1], c="darkcyan", label="setosa") -plt.scatter(versicolor[:, 0], - versicolor[:, 1], c="goldenrod", label="versicolor") -plt.scatter(virginica[:, 0], - virginica[:, 1], c="dimgrey", label="virginica") -plt.scatter(machine.centroids_[:, 0], - machine.centroids_[:, 1], c="blue", marker="x", label="centroids", - s=60) +plt.scatter(setosa[:, 0], setosa[:, 1], c="darkcyan", label="setosa") +plt.scatter( + versicolor[:, 0], versicolor[:, 1], c="goldenrod", label="versicolor" +) +plt.scatter(virginica[:, 0], virginica[:, 1], c="dimgrey", label="virginica") +plt.scatter( + machine.centroids_[:, 0], + machine.centroids_[:, 1], + c="blue", + marker="x", + label="centroids", + s=60, +) plt.legend() plt.xticks([], []) plt.yticks([], []) diff --git a/doc/py_api.rst b/doc/py_api.rst index 54a64cff4a51149eeef8c18fab4e66bd951e6f87..c6059c7785bb159578f5c5549d2f457113074454 100644 --- a/doc/py_api.rst +++ b/doc/py_api.rst @@ -12,32 +12,12 @@ This section includes information for using the pure Python API of Classes ------- - -Trainers -........ - -.. autosummary:: - -.. - TODO uncomment when implemented - bob.learn.em.ML_GMMTrainer - bob.learn.em.MAP_GMMTrainer - bob.learn.em.ISVTrainer - bob.learn.em.JFATrainer - bob.learn.em.IVectorTrainer - bob.learn.em.PLDATrainer - bob.learn.em.EMPCATrainer - -Machines -........ - .. autosummary:: - - bob.learn.em.cluster.KMeansMachine - bob.learn.em.mixture.GMMStats - bob.learn.em.mixture.GMMMachine - bob.learn.em.linear.WCCN - bob.learn.em.linear.Whitening + bob.learn.em.KMeansMachine + bob.learn.em.GMMStats + bob.learn.em.GMMMachine + bob.learn.em.WCCN + bob.learn.em.Whitening .. TODO uncomment when implemented @@ -51,14 +31,11 @@ Machines Functions --------- -.. - TODO uncomment when implemented - .. autosummary:: + .. autosummary:: bob.learn.em.linear_scoring Detailed Information -------------------- .. automodule:: bob.learn.em - diff --git a/pyproject.toml b/pyproject.toml index b3b27618f5454f6355ffe4732d8ead72ee630de7..b738dc847ff9705c5769673db7415f2eb9a75f4d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,3 +1,12 @@ [build-system] -requires = ["setuptools", "wheel", "bob.extension", "bob.blitz", "bob.core", "bob.io.base", "bob.sp", "bob.math", "bob.learn.activation", "bob.learn.linear"] -build-backend = "setuptools.build_meta" + requires = ["setuptools", "wheel", "bob.extension"] + build-backend = "setuptools.build_meta" + +[tool.isort] + profile = "black" + line_length = 80 + order_by_type = true + lines_between_types = 1 + +[tool.black] + line-length = 80 diff --git a/requirements.txt b/requirements.txt index 0a01048ef7e4c6adf624b636f594fd27df5ac9d0..ece1416436592dde4f830ad1e350a694bf142230 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,9 +1,5 @@ setuptools bob.extension -bob.io.base -bob.sp -bob.learn.activation -bob.learn.linear dask dask-ml h5py >= 3 diff --git a/setup.py b/setup.py index 732b0d949db969ed12a8ff6676e3d5115385ff5f..21a202ff96ece87c8c563c8ff2d2d1322a04a7d2 100644 --- a/setup.py +++ b/setup.py @@ -1,54 +1,37 @@ #!/usr/bin/env python # vim: set fileencoding=utf-8 : -# Andre Anjos <andre.anjos@idiap.ch> -# Mon 16 Apr 08:18:08 2012 CEST -from setuptools import dist -from setuptools import setup +from setuptools import dist, setup -from bob.extension.utils import find_packages -from bob.extension.utils import load_requirements - -bob_packages = ['bob.core', 'bob.io.base', 'bob.sp', 'bob.math', 'bob.learn.activation', 'bob.learn.linear'] -dist.Distribution(dict(setup_requires=['bob.extension>=2.0.7'] + bob_packages)) +dist.Distribution(dict(setup_requires=["bob.extension"])) +from bob.extension.utils import find_packages, load_requirements install_requires = load_requirements() -# Define package version -version = open("version.txt").read().rstrip() - -packages = ['boost'] -boost_modules = ['system'] setup( - - name='bob.learn.em', - version=version, - description='Bindings for EM machines and trainers of Bob', - url='http://gitlab.idiap.ch/bob/bob.learn.em', - license='BSD', - author='Andre Anjos', - author_email='andre.anjos@idiap.ch', + name="bob.learn.em", + version=open("version.txt").read().rstrip(), + description="Bindings for EM machines and trainers of Bob", + url="http://gitlab.idiap.ch/bob/bob.learn.em", + license="BSD", + author="Andre Anjos", + author_email="andre.anjos@idiap.ch", keywords="bob, em, expectation-maximization", - - long_description=open('README.rst').read(), - + long_description=open("README.rst").read(), packages=find_packages(), include_package_data=True, zip_safe=False, - - install_requires = install_requires, - + install_requires=install_requires, classifiers=[ - 'Framework :: Bob', - 'Development Status :: 3 - Alpha', - 'Intended Audience :: Developers', - 'License :: OSI Approved :: BSD License', - 'Natural Language :: English', - 'Programming Language :: Python', - 'Programming Language :: Python :: 3', - 'Topic :: Software Development :: Libraries :: Python Modules', + "Framework :: Bob", + "Development Status :: 4 - Beta", + "Intended Audience :: Developers", + "License :: OSI Approved :: BSD License", + "Natural Language :: English", + "Programming Language :: Python", + "Programming Language :: Python :: 3", + "Topic :: Software Development :: Libraries :: Python Modules", ], - ) diff --git a/test-requirements.txt b/test-requirements.txt index a72389c3271b9d0c859caa1118af99c3dd21cd3c..6f36688509fc95b8754520e56b4920804355c3d1 100644 --- a/test-requirements.txt +++ b/test-requirements.txt @@ -1 +1 @@ -bob.db.iris \ No newline at end of file +bob.db.iris