diff --git a/bob/bio/gmm/algorithm/GMM.py b/bob/bio/gmm/algorithm/GMM.py index 747d45bb5b6410800a5010b8b2c5aee5daec5df8..3d9d23eadbe0bd8c4902db8b973a880ecc44a692 100644 --- a/bob/bio/gmm/algorithm/GMM.py +++ b/bob/bio/gmm/algorithm/GMM.py @@ -2,75 +2,115 @@ # vim: set fileencoding=utf-8 : # Manuel Guenther <Manuel.Guenther@idiap.ch> +"""Interface between the lower level GMM classes and the Algorithm Transformer. +Implements the enroll and score methods using the low level GMM implementation. + +This adds the notions of models, probes, enrollment, and scores to GMM. +""" + + +import copy import logging -from multiprocessing.pool import ThreadPool +from typing import Callable + +import dask +import dask.array as da +import numpy as np + +from h5py import File as HDF5File +from sklearn.base import BaseEstimator + +from bob.bio.base.pipelines.vanilla_biometrics.abstract_classes import BioAlgorithm +from bob.learn.em.cluster import KMeansMachine +from bob.learn.em.mixture import GMMMachine +from bob.learn.em.mixture import GMMStats +from bob.learn.em.mixture import linear_scoring + +logger = logging.getLogger(__name__) -import numpy -import bob.core -import bob.io.base -import bob.learn.em +class GMM(BioAlgorithm, BaseEstimator): + """Algorithm for computing UBM and Gaussian Mixture Models of the features. -from bob.bio.base.algorithm import Algorithm + Features must be normalized to zero mean and unit standard deviation. -logger = logging.getLogger("bob.bio.gmm") + Models are MAP GMM machines trained from a UBM on the enrollment feature set. + The UBM is a ML GMM machine trained on the training feature set. -class GMM(Algorithm): - """Algorithm for computing Universal Background Models and Gaussian Mixture Models of the features. - Features must be normalized to zero mean and unit standard deviation.""" + Probes are GMM statistics of features projected on the UBM. + """ def __init__( self, # parameters for the GMM - number_of_gaussians, + number_of_gaussians: int, # parameters of UBM training - kmeans_training_iterations=25, # Maximum number of iterations for K-Means - gmm_training_iterations=25, # Maximum number of iterations for ML GMM Training - training_threshold=5e-4, # Threshold to end the ML training - variance_threshold=5e-4, # Minimum value that a variance can reach - update_weights=True, - update_means=True, - update_variances=True, + kmeans_training_iterations: int = 25, # Maximum number of iterations for K-Means + ubm_training_iterations: int = 25, # Maximum number of iterations for GMM Training + training_threshold: float = 5e-4, # Threshold to end the ML training + variance_threshold: float = 5e-4, # Minimum value that a variance can reach + update_means: bool = True, + update_variances: bool = True, + update_weights: bool = True, # parameters of the GMM enrollment - relevance_factor=4, # Relevance factor as described in Reynolds paper - gmm_enroll_iterations=1, # Number of iterations for the enrollment phase - responsibility_threshold=0, # If set, the weight of a particular Gaussian will at least be greater than this threshold. In the case the real weight is lower, the prior mean value will be used to estimate the current mean and variance. - INIT_SEED=5489, + gmm_enroll_iterations: int = 1, # Number of iterations for the enrollment phase + enroll_update_means: bool = True, + enroll_update_variances: bool = False, + enroll_update_weights: bool = False, + relevance_factor: float = 4, # Relevance factor as described in Reynolds paper + responsibility_threshold: float = 0, # If set, the weight of a particular Gaussian will at least be greater than this threshold. In the case the real weight is lower, the prior mean value will be used to estimate the current mean and variance. # scoring - scoring_function=bob.learn.em.linear_scoring, - n_threads=None, + scoring_function: Callable = linear_scoring, + # RNG + init_seed: int = 5489, ): - """Initializes the local UBM-GMM tool chain with the given file selector object""" - - # call base class constructor and register that this tool performs projection - Algorithm.__init__( - self, - performs_projection=True, - use_projected_features_for_enrollment=False, - number_of_gaussians=number_of_gaussians, - kmeans_training_iterations=kmeans_training_iterations, - gmm_training_iterations=gmm_training_iterations, - training_threshold=training_threshold, - variance_threshold=variance_threshold, - update_weights=update_weights, - update_means=update_means, - update_variances=update_variances, - relevance_factor=relevance_factor, - gmm_enroll_iterations=gmm_enroll_iterations, - responsibility_threshold=responsibility_threshold, - INIT_SEED=INIT_SEED, - scoring_function=str(scoring_function), - multiple_model_scoring=None, - multiple_probe_scoring="average", - ) - - # copy parameters - self.gaussians = number_of_gaussians + """Initializes the local UBM-GMM tool chain. + + Parameters + ---------- + number_of_gaussians + The number of Gaussians used in the UBM and the models. + kmeans_training_iterations + Number of e-m iterations to train k-means initializing the UBM. + ubm_training_iterations + Number of e-m iterations for training the UBM. + training_threshold + Convergence threshold to halt the GMM training early. + variance_threshold + Minimum value a variance of the Gaussians can reach. + update_weights + Decides wether the weights of the Gaussians are updated while training. + update_means + Decides wether the means of the Gaussians are updated while training. + update_variances + Decides wether the variancess of the Gaussians are updated while training. + gmm_enroll_iterations + Number of iterations for the MAP GMM used for enrollment. + enroll_update_weights + Decides wether the weights of the Gaussians are updated while enrolling. + enroll_update_means + Decides wether the means of the Gaussians are updated while enrolling. + enroll_update_variances + Decides wether the variancess of the Gaussians are updated while enrolling. + relevance_factor + Relevance factor as described in Reynolds paper. + responsibility_threshold + If set, the weight of a particular Gaussian will at least be greater than + this threshold. In the case where the real weight is lower, the prior mean + value will be used to estimate the current mean and variance. + init_seed + Seed for the random number generation. + scoring_function + Function returning a score from a model, a UBM, and a probe. + """ + + # Copy parameters + self.number_of_gaussians = number_of_gaussians self.kmeans_training_iterations = kmeans_training_iterations - self.gmm_training_iterations = gmm_training_iterations + self.ubm_training_iterations = ubm_training_iterations self.training_threshold = training_threshold self.variance_threshold = variance_threshold self.update_weights = update_weights @@ -78,261 +118,210 @@ class GMM(Algorithm): self.update_variances = update_variances self.relevance_factor = relevance_factor self.gmm_enroll_iterations = gmm_enroll_iterations - self.init_seed = INIT_SEED - self.rng = bob.core.random.mt19937(self.init_seed) + self.enroll_update_means = enroll_update_means + self.enroll_update_weights = enroll_update_weights + self.enroll_update_variances = enroll_update_variances + self.init_seed = init_seed + self.rng = self.init_seed self.responsibility_threshold = responsibility_threshold + self.scoring_function = scoring_function - self.n_threads = n_threads - self.pool = None self.ubm = None - self.kmeans_trainer = bob.learn.em.KMeansTrainer() - self.ubm_trainer = bob.learn.em.ML_GMMTrainer( - self.update_means, - self.update_variances, - self.update_weights, - self.responsibility_threshold, - ) + + super().__init__() def _check_feature(self, feature): """Checks that the features are appropriate""" if ( - not isinstance(feature, numpy.ndarray) + not isinstance(feature, np.ndarray) or feature.ndim != 2 - or feature.dtype != numpy.float64 + or feature.dtype != np.float64 ): - raise ValueError("The given feature is not appropriate") + raise ValueError(f"The given feature is not appropriate: \n{feature}") if self.ubm is not None and feature.shape[1] != self.ubm.shape[1]: raise ValueError( "The given feature is expected to have %d elements, but it has %d" % (self.ubm.shape[1], feature.shape[1]) ) - ####################################################### - # UBM training # - - def train_ubm(self, array): - - logger.debug(" .... Training with %d feature vectors", array.shape[0]) - if self.n_threads is not None: - self.pool = ThreadPool(self.n_threads) - - # Computes input size - input_size = array.shape[1] - - # Creates the machines (KMeans and GMM) - logger.debug(" .... Creating machines") - kmeans = bob.learn.em.KMeansMachine(self.gaussians, input_size) - self.ubm = bob.learn.em.GMMMachine(self.gaussians, input_size) + def save_model(self, ubm_file): + """Saves the projector (UBM) to file.""" + # Saves the UBM to file + logger.debug("Saving model to file '%s'", ubm_file) - # Trains using the KMeansTrainer - logger.info(" -> Training K-Means") + hdf5 = ubm_file if isinstance(ubm_file, HDF5File) else HDF5File(ubm_file, "w") + self.ubm.save(hdf5) - # Reseting the pseudo random number generator so we can have the same initialization for serial and parallel execution. - self.rng = bob.core.random.mt19937(self.init_seed) - bob.learn.em.train( - self.kmeans_trainer, - kmeans, - array, - self.kmeans_training_iterations, - self.training_threshold, - rng=self.rng, - pool=self.pool, - ) + def load_model(self, ubm_file): + """Loads the projector (UBM) from a file.""" + hdf5file = HDF5File(ubm_file, "r") + logger.debug("Loading model from file '%s'", ubm_file) + # Read the UBM + self.ubm = GMMMachine.from_hdf5(hdf5file) + self.ubm.variance_thresholds = self.variance_threshold + + def project(self, array): + """Computes GMM statistics against a UBM, given a 2D array of feature vectors + + This is applied to the probes before scoring. + """ + self._check_feature(array) + logger.debug("Projecting %d feature vectors", array.shape[0]) + # Accumulates statistics + gmm_stats = self.ubm.transform(array) + gmm_stats.compute() - variances, weights = kmeans.get_variances_and_weights_for_each_cluster(array) - means = kmeans.means - - # Initializes the GMM - self.ubm.means = means - self.ubm.variances = variances - self.ubm.weights = weights - self.ubm.set_variance_thresholds(self.variance_threshold) - - # Trains the GMM - logger.info(" -> Training GMM") - # Reseting the pseudo random number generator so we can have the same initialization for serial and parallel execution. - self.rng = bob.core.random.mt19937(self.init_seed) - bob.learn.em.train( - self.ubm_trainer, - self.ubm, - array, - self.gmm_training_iterations, - self.training_threshold, - rng=self.rng, - pool=self.pool, - ) + # Return the resulting statistics + return gmm_stats - def save_ubm(self, projector_file): - """Save projector to file""" - # Saves the UBM to file - logger.debug(" .... Saving model to file '%s'", projector_file) - hdf5 = ( - projector_file - if isinstance(projector_file, bob.io.base.HDF5File) - else bob.io.base.HDF5File(projector_file, "w") - ) - self.ubm.save(hdf5) + def enroll(self, data): + """Enrolls a GMM using MAP adaptation given a reference's feature vectors - def train_projector(self, train_features, projector_file): - """Computes the Universal Background Model from the training ("world") data""" - [self._check_feature(feature) for feature in train_features] + Returns a GMMMachine tuned from the UBM with MAP on a biometric reference data. + """ - logger.info( - " -> Training UBM model with %d training files", len(train_features) - ) + [self._check_feature(feature) for feature in data] + array = da.vstack(data) + # Use the array to train a GMM and return it + logger.info("Enrolling with %d feature vectors", array.shape[0]) + + # TODO accept responsibility_threshold in bob.learn.em + with dask.config.set(scheduler="threads"): + gmm = GMMMachine( + n_gaussians=self.number_of_gaussians, + trainer="map", + ubm=copy.deepcopy(self.ubm), + convergence_threshold=self.training_threshold, + max_fitting_steps=self.gmm_enroll_iterations, + random_state=self.rng, + update_means=self.enroll_update_means, + update_variances=self.enroll_update_variances, + update_weights=self.enroll_update_weights, + mean_var_update_threshold=self.variance_threshold, + ) + gmm.fit(array) + return gmm - # Loads the data into an array - array = numpy.vstack(train_features) + def read_biometric_reference(self, model_file): + """Reads an enrolled reference model, which is a MAP GMMMachine.""" + if self.ubm is None: + raise ValueError( + "You must load a UBM before reading a biometric reference." + ) + return GMMMachine.from_hdf5(HDF5File(model_file, "r"), ubm=self.ubm) - self.train_ubm(array) + def write_biometric_reference(self, model: GMMMachine, model_file): + """Write the enrolled reference (MAP GMMMachine) into a file.""" + return model.save(model_file) - self.save_ubm(projector_file) + def score(self, biometric_reference: GMMMachine, probe): + """Computes the score for the given model and the given probe. - ####################################################### - # GMM training using UBM # + Uses the scoring function passed during initialization. - def load_ubm(self, ubm_file): - hdf5file = bob.io.base.HDF5File(ubm_file) - # read UBM - self.ubm = bob.learn.em.GMMMachine(hdf5file) - self.ubm.set_variance_thresholds(self.variance_threshold) + Parameters + ---------- + biometric_reference: + The model to score against. + probe: + The probe data to compare to the model. + """ - def load_projector(self, projector_file): - """Reads the UBM model from file""" - # read UBM - self.load_ubm(projector_file) - # prepare MAP_GMM_Trainer - kwargs = ( - dict( - mean_var_update_responsibilities_threshold=self.responsibility_threshold - ) - if self.responsibility_threshold > 0.0 - else dict() - ) - self.enroll_trainer = bob.learn.em.MAP_GMMTrainer( - self.ubm, - relevance_factor=self.relevance_factor, - update_means=True, - update_variances=False, - **kwargs - ) - self.rng = bob.core.random.mt19937(self.init_seed) + logger.debug(f"scoring {biometric_reference}, {probe}") + if not isinstance(probe, GMMStats): + # Projection is done here instead of in transform (or it would be applied to enrollment data too...) + probe = self.project(probe) + return self.scoring_function( + models_means=[biometric_reference], + ubm=self.ubm, + test_stats=probe, + frame_length_normalization=True, + )[0, 0] + + def score_multiple_biometric_references( + self, biometric_references: "list[GMMMachine]", probe: GMMStats + ): + """Computes the score between multiple models and one probe. - def project_ubm(self, array): - logger.debug(" .... Projecting %d feature vectors" % array.shape[0]) - # Accumulates statistics - gmm_stats = bob.learn.em.GMMStats(self.ubm.shape[0], self.ubm.shape[1]) - self.ubm.acc_statistics(array, gmm_stats) + Uses the scoring function passed during initialization. - # return the resulting statistics - return gmm_stats + Parameters + ---------- + biometric_references: + The models to score against. + probe: + The probe data to compare to the models. + """ - def project(self, feature): - """Computes GMM statistics against a UBM, given an input 2D numpy.ndarray of feature vectors""" - self._check_feature(feature) - return self.project_ubm(feature) - - def read_gmm_stats(self, gmm_stats_file): - """Reads GMM stats from file.""" - return bob.learn.em.GMMStats(bob.io.base.HDF5File(gmm_stats_file)) - - def read_feature(self, feature_file): - """Read the type of features that we require, namely GMM_Stats""" - return self.read_gmm_stats(feature_file) - - def enroll_gmm(self, array): - logger.debug(" .... Enrolling with %d feature vectors", array.shape[0]) - - gmm = bob.learn.em.GMMMachine(self.ubm) - gmm.set_variance_thresholds(self.variance_threshold) - bob.learn.em.train( - self.enroll_trainer, - gmm, - array, - self.gmm_enroll_iterations, - self.training_threshold, - rng=self.rng, - pool=self.pool, + logger.debug(f"scoring {biometric_references}, {probe}") + assert isinstance(biometric_references[0], GMMMachine), type( + biometric_references[0] ) - return gmm - - def enroll(self, feature_arrays): - """Enrolls a GMM using MAP adaptation, given a list of 2D numpy.ndarray's of feature vectors""" - [self._check_feature(feature) for feature in feature_arrays] - array = numpy.vstack(feature_arrays) - # Use the array to train a GMM and return it - return self.enroll_gmm(array) - - ###################################################### - # Feature comparison # - def read_model(self, model_file): - """Reads the model, which is a GMM machine""" - return bob.learn.em.GMMMachine(bob.io.base.HDF5File(model_file)) - - def score(self, model, probe): - """Computes the score for the given model and the given probe using the scoring function from the config file""" - assert isinstance(model, bob.learn.em.GMMMachine) - assert isinstance(probe, bob.learn.em.GMMStats) + stats = self.project(probe) if not isinstance(probe, GMMStats) else probe return self.scoring_function( - [model], self.ubm, [probe], [], frame_length_normalisation=True - )[0][0] + models_means=biometric_references, + ubm=self.ubm, + test_stats=stats, + frame_length_normalization=True, + ).reshape((-1,)) - def score_for_multiple_probes(self, model, probes): + def score_for_multiple_probes(self, biometric_reference, probes): """This function computes the score between the given model and several given probe files.""" - assert isinstance(model, bob.learn.em.GMMMachine) - for probe in probes: - assert isinstance(probe, bob.learn.em.GMMStats) - # logger.warn("Please verify that this function is correct") - return self.probe_fusion_function( - self.scoring_function( - [model], self.ubm, probes, [], frame_length_normalisation=True - ) - ) - - -class GMMRegular(GMM): - """Algorithm for computing Universal Background Models and Gaussian Mixture Models of the features""" - - def __init__(self, **kwargs): - """Initializes the local UBM-GMM tool chain with the given file selector object""" - # logger.warn("This class must be checked. Please verify that I didn't do any mistake here. I had to rename 'train_projector' into a 'train_enroller'!") - # initialize the UBMGMM base class - GMM.__init__(self, **kwargs) - # register a different set of functions in the Tool base class - Algorithm.__init__( - self, requires_enroller_training=True, performs_projection=False + logger.debug(f"scoring {biometric_reference}, {probes}") + assert isinstance(biometric_reference, GMMMachine) + stats = [ + self.project(probe) if not isinstance(probe, GMMStats) else probe + for probe in probes + ] + return self.scoring_function( + models_means=biometric_reference.means, + ubm=self.ubm, + test_stats=stats, + frame_length_normalization=True, + ).reshape((-1,)) + + def fit(self, X, y=None, **kwargs): + """Trains the UBM.""" + # Stack all the samples in a 2D array of features + array = da.vstack(X).persist() + + logger.debug("UBM with %d feature vectors", array.shape[0]) + + logger.debug(f"Creating UBM machine with {self.number_of_gaussians} gaussians") + + self.ubm = GMMMachine( + n_gaussians=self.number_of_gaussians, + trainer="ml", + max_fitting_steps=self.ubm_training_iterations, + convergence_threshold=self.training_threshold, + update_means=self.update_means, + update_variances=self.update_variances, + update_weights=self.update_weights, + mean_var_update_threshold=self.variance_threshold, + k_means_trainer=KMeansMachine( + self.number_of_gaussians, + convergence_threshold=self.training_threshold, + max_iter=self.kmeans_training_iterations, + init_method="k-means||", + init_max_iter=5, + ), ) - ####################################################### - # UBM training # + # Train the GMM + logger.info("Training UBM GMM") - def train_enroller(self, train_features, enroller_file): - """Computes the Universal Background Model from the training ("world") data""" - train_features = [feature for client in train_features for feature in client] - return self.train_projector(train_features, enroller_file) + self.ubm.fit(array, ubm_train=True) - ####################################################### - # GMM training using UBM # + return self - def load_enroller(self, enroller_file): - """Reads the UBM model from file""" - return self.load_projector(enroller_file) - - ###################################################### - # Feature comparison # - def score(self, model, probe): - """Computes the score for the given model and the given probe. - The score are Log-Likelihood. - Therefore, the log of the likelihood ratio is obtained by computing the following difference.""" - - assert isinstance(model, bob.learn.em.GMMMachine) - self._check_feature(probe) - score = sum( - model.log_likelihood(probe[i, :]) - self.ubm.log_likelihood(probe[i, :]) - for i in range(probe.shape[0]) - ) - return score / probe.shape[0] + def transform(self, X, **kwargs): + """Passthrough. Enroll applies a different transform as score.""" + # The idea would be to apply the projection in Transform (going from extracted + # to GMMStats), but we must not apply this during the training (fit requires + # extracted data directly). + # `project` is applied in the score function directly. + return X - def score_for_multiple_probes(self, model, probes): - raise NotImplementedError("Implement Me!") + def _more_tags(self): + return {"bob_fit_supports_dask_array": True} diff --git a/bob/bio/gmm/algorithm/ISV.py b/bob/bio/gmm/algorithm/ISV.py deleted file mode 100644 index 6a5666eefca82afe8a7dacecb8bb849c379ef3b0..0000000000000000000000000000000000000000 --- a/bob/bio/gmm/algorithm/ISV.py +++ /dev/null @@ -1,232 +0,0 @@ -#!/usr/bin/env python -# vim: set fileencoding=utf-8 : -# Manuel Guenther <Manuel.Guenther@idiap.ch> - -import logging - -import numpy - -import bob.core -import bob.io.base -import bob.learn.em - -from bob.bio.base.algorithm import Algorithm - -from .GMM import GMM - -logger = logging.getLogger("bob.bio.gmm") - - -class ISV(GMM): - """Tool for computing Unified Background Models and Gaussian Mixture Models of the features""" - - def __init__( - self, - # ISV training - subspace_dimension_of_u, # U subspace dimension - isv_training_iterations=10, # Number of EM iterations for the ISV training - # ISV enrollment - isv_enroll_iterations=1, # Number of iterations for the enrollment phase - multiple_probe_scoring=None, # scoring when multiple probe files are available - # parameters of the GMM - **kwargs - ): - """Initializes the local UBM-GMM tool with the given file selector object""" - # call base class constructor with its set of parameters - GMM.__init__(self, **kwargs) - - # call tool constructor to overwrite what was set before - Algorithm.__init__( - self, - performs_projection=True, - use_projected_features_for_enrollment=True, - requires_enroller_training=False, # not needed anymore because it's done while training the projector - split_training_features_by_client=True, - subspace_dimension_of_u=subspace_dimension_of_u, - isv_training_iterations=isv_training_iterations, - isv_enroll_iterations=isv_enroll_iterations, - multiple_model_scoring=None, - multiple_probe_scoring=multiple_probe_scoring, - **kwargs - ) - - self.subspace_dimension_of_u = subspace_dimension_of_u - self.isv_training_iterations = isv_training_iterations - self.isv_enroll_iterations = isv_enroll_iterations - self.isv_trainer = bob.learn.em.ISVTrainer(self.relevance_factor) - - def train_isv(self, data): - """Train the ISV model given a dataset""" - logger.info(" -> Training ISV enroller") - self.isvbase = bob.learn.em.ISVBase(self.ubm, self.subspace_dimension_of_u) - # train ISV model - # Reseting the pseudo random number generator so we can have the same initialization for serial and parallel execution. - self.rng = bob.core.random.mt19937(self.init_seed) - bob.learn.em.train( - self.isv_trainer, - self.isvbase, - data, - self.isv_training_iterations, - rng=self.rng, - ) - - def train_projector(self, train_features, projector_file): - """Train Projector and Enroller at the same time""" - [ - self._check_feature(feature) - for client in train_features - for feature in client - ] - - data1 = numpy.vstack(feature for client in train_features for feature in client) - self.train_ubm(data1) - # to save some memory, we might want to delete these data - del data1 - - # project training data - logger.info(" -> Projecting training data") - data = [ - [self.project_ubm(feature) for feature in client] - for client in train_features - ] - - # train ISV - self.train_isv(data) - - # Save the ISV base AND the UBM into the same file - self.save_projector(projector_file) - - def save_projector(self, projector_file): - """Save the GMM and the ISV model in the same HDF5 file""" - hdf5file = bob.io.base.HDF5File(projector_file, "w") - hdf5file.create_group("Projector") - hdf5file.cd("Projector") - self.ubm.save(hdf5file) - - hdf5file.cd("/") - hdf5file.create_group("Enroller") - hdf5file.cd("Enroller") - self.isvbase.save(hdf5file) - - def load_isv(self, isv_file): - hdf5file = bob.io.base.HDF5File(isv_file) - self.isvbase = bob.learn.em.ISVBase(hdf5file) - # add UBM model from base class - self.isvbase.ubm = self.ubm - - def load_projector(self, projector_file): - """Load the GMM and the ISV model from the same HDF5 file""" - hdf5file = bob.io.base.HDF5File(projector_file) - - # Load Projector - hdf5file.cd("/Projector") - self.load_ubm(hdf5file) - - # Load Enroller - hdf5file.cd("/Enroller") - self.load_isv(hdf5file) - - ####################################################### - # ISV training # - def project_isv(self, projected_ubm): - projected_isv = numpy.ndarray( - shape=(self.ubm.shape[0] * self.ubm.shape[1],), dtype=numpy.float64 - ) - model = bob.learn.em.ISVMachine(self.isvbase) - model.estimate_ux(projected_ubm, projected_isv) - return projected_isv - - def project(self, feature): - """Computes GMM statistics against a UBM, then corresponding Ux vector""" - self._check_feature(feature) - projected_ubm = GMM.project(self, feature) - projected_isv = self.project_isv(projected_ubm) - return [projected_ubm, projected_isv] - - ####################################################### - # ISV model enroll # - - def write_feature(self, data, feature_file): - gmmstats = data[0] - Ux = data[1] - hdf5file = ( - bob.io.base.HDF5File(feature_file, "w") - if isinstance(feature_file, str) - else feature_file - ) - hdf5file.create_group("gmmstats") - hdf5file.cd("gmmstats") - gmmstats.save(hdf5file) - hdf5file.cd("..") - hdf5file.set("Ux", Ux) - - def read_feature(self, feature_file): - """Read the type of features that we require, namely GMMStats""" - hdf5file = bob.io.base.HDF5File(feature_file) - hdf5file.cd("gmmstats") - gmmstats = bob.learn.em.GMMStats(hdf5file) - hdf5file.cd("..") - Ux = hdf5file.read("Ux") - return [gmmstats, Ux] - - def _check_projected(self, probe): - """Checks that the probe is of the desired type""" - assert isinstance(probe, (tuple, list)) - assert len(probe) == 2 - assert isinstance(probe[0], bob.learn.em.GMMStats) - assert ( - isinstance(probe[1], numpy.ndarray) - and probe[1].ndim == 1 - and probe[1].dtype == numpy.float64 - ) - - def enroll(self, enroll_features): - """Performs ISV enrollment""" - for feature in enroll_features: - self._check_projected(feature) - machine = bob.learn.em.ISVMachine(self.isvbase) - self.isv_trainer.enroll( - machine, [f[0] for f in enroll_features], self.isv_enroll_iterations - ) - # return the resulting gmm - return machine - - ###################################################### - # Feature comparison # - def read_model(self, model_file): - """Reads the ISV Machine that holds the model""" - machine = bob.learn.em.ISVMachine(bob.io.base.HDF5File(model_file)) - machine.isv_base = self.isvbase - return machine - - def score(self, model, probe): - """Computes the score for the given model and the given probe.""" - assert isinstance(model, bob.learn.em.ISVMachine) - self._check_projected(probe) - - gmmstats = probe[0] - Ux = probe[1] - return model.forward_ux(gmmstats, Ux) - - def score_for_multiple_probes(self, model, probes): - """This function computes the score between the given model and several given probe files.""" - assert isinstance(model, bob.learn.em.ISVMachine) - [self._check_projected(probe) for probe in probes] - if self.probe_fusion_function is not None: - # When a multiple probe fusion function is selected, use it - return Algorithm.score_for_multiple_probes(self, model, probes) - else: - # Otherwise: compute joint likelihood of all probe features - # create GMM statistics from first probe statistics - # import pdb; pdb.set_trace() - gmmstats_acc = bob.learn.em.GMMStats(probes[0][0]) - # gmmstats_acc = probes[0][0] - # add all other probe statistics - for i in range(1, len(probes)): - gmmstats_acc += probes[i][0] - # compute ISV score with the accumulated statistics - projected_isv_acc = numpy.ndarray( - shape=(self.ubm.shape[0] * self.ubm.shape[1],), dtype=numpy.float64 - ) - model.estimate_ux(gmmstats_acc, projected_isv_acc) - return model.forward_ux(gmmstats_acc, projected_isv_acc) diff --git a/bob/bio/gmm/algorithm/IVector.py b/bob/bio/gmm/algorithm/IVector.py deleted file mode 100644 index 94c6f4f86079605394a1d5f7c25aa0827e4b6df3..0000000000000000000000000000000000000000 --- a/bob/bio/gmm/algorithm/IVector.py +++ /dev/null @@ -1,412 +0,0 @@ -#!/usr/bin/env python -# vim: set fileencoding=utf-8 : -# Laurent El Shafey <Laurent.El-Shafey@idiap.ch> - -import logging - -import numpy - -import bob.core -import bob.io.base -import bob.learn.em -import bob.learn.linear - -from bob.bio.base.algorithm import Algorithm - -from .GMM import GMM - -logger = logging.getLogger("bob.bio.gmm") - - -class IVector(GMM): - """Tool for extracting I-Vectors""" - - def __init__( - self, - # IVector training - subspace_dimension_of_t, # T subspace dimension - tv_training_iterations=25, # Number of EM iterations for the JFA training - update_sigma=True, - use_whitening=True, - use_lda=False, - use_wccn=False, - use_plda=False, - lda_dim=None, - lda_strip_to_rank=True, - plda_dim_F=50, - plda_dim_G=50, - plda_training_iterations=50, - # parameters of the GMM - **kwargs - ): - """Initializes the local GMM tool with the given file selector object""" - # call base class constructor with its set of parameters - GMM.__init__(self, **kwargs) - - # call tool constructor to overwrite what was set before - Algorithm.__init__( - self, - performs_projection=True, - use_projected_features_for_enrollment=True, - requires_enroller_training=False, # not needed anymore because it's done while training the projector - split_training_features_by_client=True, - subspace_dimension_of_t=subspace_dimension_of_t, - tv_training_iterations=tv_training_iterations, - update_sigma=update_sigma, - use_whitening=use_whitening, - use_lda=use_lda, - use_wccn=use_wccn, - use_plda=use_plda, - lda_dim=lda_dim, - lda_strip_to_rank=lda_strip_to_rank, - plda_dim_F=plda_dim_F, - plda_dim_G=plda_dim_G, - plda_training_iterations=plda_training_iterations, - multiple_model_scoring=None, - multiple_probe_scoring=None, - **kwargs - ) - - self.update_sigma = update_sigma - self.use_whitening = use_whitening - self.use_lda = use_lda - self.use_wccn = use_wccn - self.use_plda = use_plda - self.subspace_dimension_of_t = subspace_dimension_of_t - self.tv_training_iterations = tv_training_iterations - - self.ivector_trainer = bob.learn.em.IVectorTrainer(update_sigma=update_sigma) - self.whitening_trainer = bob.learn.linear.WhiteningTrainer() - - self.lda_dim = lda_dim - self.lda_trainer = bob.learn.linear.FisherLDATrainer( - strip_to_rank=lda_strip_to_rank - ) - self.wccn_trainer = bob.learn.linear.WCCNTrainer() - self.plda_trainer = bob.learn.em.PLDATrainer() - self.plda_dim_F = plda_dim_F - self.plda_dim_G = plda_dim_G - self.plda_training_iterations = plda_training_iterations - - def _check_ivector(self, feature): - """Checks that the features are appropriate""" - if ( - not isinstance(feature, numpy.ndarray) - or feature.ndim != 1 - or feature.dtype != numpy.float64 - ): - raise ValueError("The given feature is not appropriate") - - def train_ivector(self, training_stats): - logger.info(" -> Training IVector enroller") - self.tv = bob.learn.em.IVectorMachine( - self.ubm, self.subspace_dimension_of_t, self.variance_threshold - ) - - # Reseting the pseudo random number generator so we can have the same initialization for serial and parallel execution. - self.rng = bob.core.random.mt19937(self.init_seed) - - # train IVector model - bob.learn.em.train( - self.ivector_trainer, - self.tv, - training_stats, - self.tv_training_iterations, - rng=self.rng, - ) - - def train_whitener(self, training_features): - logger.info(" -> Training Whitening") - ivectors_matrix = numpy.vstack(training_features) - # create a Linear Machine - self.whitener = bob.learn.linear.Machine( - ivectors_matrix.shape[1], ivectors_matrix.shape[1] - ) - # create the whitening trainer - self.whitening_trainer.train(ivectors_matrix, self.whitener) - - def train_lda(self, training_features): - logger.info(" -> Training LDA projector") - self.lda, __eig_vals = self.lda_trainer.train(training_features) - - # resize the machine if desired - # You can only clip if the rank is higher than LDA_DIM - if self.lda_dim is not None: - if len(__eig_vals) < self.lda_dim: - logger.warning( - " -> You are resizing the LDA matrix to a value above its rank" - "(from {0} to {1}). Be aware that this may lead you to imprecise eigenvectors.".format( - len(__eig_vals), self.lda_dim - ) - ) - self.lda.resize(self.lda.shape[0], self.lda_dim) - - def train_wccn(self, training_features): - logger.info(" -> Training WCCN projector") - self.wccn = self.wccn_trainer.train(training_features) - - def train_plda(self, training_features): - logger.info(" -> Training PLDA projector") - self.plda_trainer.init_f_method = "BETWEEN_SCATTER" - self.plda_trainer.init_g_method = "WITHIN_SCATTER" - self.plda_trainer.init_sigma_method = "VARIANCE_DATA" - variance_flooring = 1e-5 - training_features = [numpy.vstack(client) for client in training_features] - input_dim = training_features[0].shape[1] - - # Reseting the pseudo random number generator so we can have the same initialization for serial and parallel execution. - self.rng = bob.core.random.mt19937(self.init_seed) - - self.plda_base = bob.learn.em.PLDABase( - input_dim, self.plda_dim_F, self.plda_dim_G, variance_flooring - ) - bob.learn.em.train( - self.plda_trainer, - self.plda_base, - training_features, - self.plda_training_iterations, - rng=self.rng, - ) - - def train_projector(self, train_features, projector_file): - """Train Projector and Enroller at the same time""" - - [ - self._check_feature(feature) - for client in train_features - for feature in client - ] - - # train UBM - data = numpy.vstack(feature for client in train_features for feature in client) - self.train_ubm(data) - del data - - # project training data - logger.info(" -> Projecting training data") - train_gmm_stats = [ - [self.project_ubm(feature) for feature in client] - for client in train_features - ] - train_gmm_stats_flatten = [ - stats for client in train_gmm_stats for stats in client - ] - - # train IVector - logger.info(" -> Projecting training data") - self.train_ivector(train_gmm_stats_flatten) - - # project training i-vectors - train_ivectors = [ - [self.project_ivector(stats) for stats in client] - for client in train_gmm_stats - ] - train_ivectors_flatten = [ - stats for client in train_ivectors for stats in client - ] - - if self.use_whitening: - # Train Whitening - self.train_whitener(train_ivectors_flatten) - # whitening and length-normalizing i-vectors - train_ivectors = [ - [self.project_whitening(ivec) for ivec in client] - for client in train_ivectors - ] - - if self.use_lda: - self.train_lda(train_ivectors) - train_ivectors = [ - [self.project_lda(ivec) for ivec in client] for client in train_ivectors - ] - - if self.use_wccn: - self.train_wccn(train_ivectors) - train_ivectors = [ - [self.project_wccn(ivec) for ivec in client] - for client in train_ivectors - ] - - if self.use_plda: - self.train_plda(train_ivectors) - - # save - self.save_projector(projector_file) - - def save_projector(self, projector_file): - # Save the IVector base AND the UBM AND the whitening into the same file - hdf5file = bob.io.base.HDF5File(projector_file, "w") - hdf5file.create_group("Projector") - hdf5file.cd("Projector") - self.save_ubm(hdf5file) - - hdf5file.cd("/") - hdf5file.create_group("Enroller") - hdf5file.cd("Enroller") - self.tv.save(hdf5file) - - if self.use_whitening: - hdf5file.cd("/") - hdf5file.create_group("Whitener") - hdf5file.cd("Whitener") - self.whitener.save(hdf5file) - - if self.use_lda: - hdf5file.cd("/") - hdf5file.create_group("LDA") - hdf5file.cd("LDA") - self.lda.save(hdf5file) - - if self.use_wccn: - hdf5file.cd("/") - hdf5file.create_group("WCCN") - hdf5file.cd("WCCN") - self.wccn.save(hdf5file) - - if self.use_plda: - hdf5file.cd("/") - hdf5file.create_group("PLDA") - hdf5file.cd("PLDA") - self.plda_base.save(hdf5file) - - def load_tv(self, tv_file): - hdf5file = bob.io.base.HDF5File(tv_file) - self.tv = bob.learn.em.IVectorMachine(hdf5file) - # add UBM model from base class - self.tv.ubm = self.ubm - - def load_whitener(self, whitening_file): - hdf5file = bob.io.base.HDF5File(whitening_file) - self.whitener = bob.learn.linear.Machine(hdf5file) - - def load_lda(self, lda_file): - hdf5file = bob.io.base.HDF5File(lda_file) - self.lda = bob.learn.linear.Machine(hdf5file) - - def load_wccn(self, wccn_file): - hdf5file = bob.io.base.HDF5File(wccn_file) - self.wccn = bob.learn.linear.Machine(hdf5file) - - def load_plda(self, plda_file): - hdf5file = bob.io.base.HDF5File(plda_file) - self.plda_base = bob.learn.em.PLDABase(hdf5file) - self.plda_machine = bob.learn.em.PLDAMachine(self.plda_base) - - def load_projector(self, projector_file): - """Load the GMM and the ISV model from the same HDF5 file""" - hdf5file = bob.io.base.HDF5File(projector_file) - - # Load Projector - hdf5file.cd("/Projector") - self.load_ubm(hdf5file) - - # Load Enroller - hdf5file.cd("/Enroller") - self.load_tv(hdf5file) - - if self.use_whitening: - # Load Whitening - hdf5file.cd("/Whitener") - self.load_whitener(hdf5file) - - if self.use_lda: - # Load LDA - hdf5file.cd("/LDA") - self.load_lda(hdf5file) - - if self.use_wccn: - # Load WCCN - hdf5file.cd("/WCCN") - self.load_wccn(hdf5file) - - if self.use_plda: - # Load PLDA - hdf5file.cd("/PLDA") - self.load_plda(hdf5file) - - def project_ivector(self, gmm_stats): - return self.tv.project(gmm_stats) - - def project_whitening(self, ivector): - whitened = self.whitener.forward(ivector) - return whitened / numpy.linalg.norm(whitened) - - def project_lda(self, ivector): - out_ivector = numpy.ndarray(self.lda.shape[1], numpy.float64) - self.lda(ivector, out_ivector) - return out_ivector - - def project_wccn(self, ivector): - out_ivector = numpy.ndarray(self.wccn.shape[1], numpy.float64) - self.wccn(ivector, out_ivector) - return out_ivector - - ####################################################### - # IVector projection # - def project(self, feature_array): - """Computes GMM statistics against a UBM, then corresponding Ux vector""" - self._check_feature(feature_array) - # project UBM - projected_ubm = self.project_ubm(feature_array) - # project I-Vector - ivector = self.project_ivector(projected_ubm) - # whiten I-Vector - if self.use_whitening: - ivector = self.project_whitening(ivector) - # LDA projection - if self.use_lda: - ivector = self.project_lda(ivector) - # WCCN projection - if self.use_wccn: - ivector = self.project_wccn(ivector) - return ivector - - ####################################################### - # Read / Write I-Vectors # - def write_feature(self, data, feature_file): - """Saves the feature, which is the (whitened) I-Vector.""" - bob.bio.base.save(data, feature_file) - - def read_feature(self, feature_file): - """Read the type of features that we require, namely i-vectors (stored as simple numpy arrays)""" - return bob.bio.base.load(feature_file) - - ####################################################### - # Model Enrollment # - def enroll(self, enroll_features): - """Performs IVector enrollment""" - [self._check_ivector(feature) for feature in enroll_features] - average_ivector = numpy.mean(numpy.vstack(enroll_features), axis=0) - if self.use_plda: - average_ivector = average_ivector.reshape(1, -1) - self.plda_trainer.enroll(self.plda_machine, average_ivector) - return self.plda_machine - else: - return average_ivector - - ###################################################### - # Feature comparison # - def read_model(self, model_file): - """Reads the whitened i-vector that holds the model""" - if self.use_plda: - return bob.learn.em.PLDAMachine( - bob.io.base.HDF5File(str(model_file)), self.plda_base - ) - else: - return bob.bio.base.load(model_file) - - def score(self, model, probe): - """Computes the score for the given model and the given probe.""" - self._check_ivector(probe) - if self.use_plda: - return model.log_likelihood_ratio(probe) - else: - self._check_ivector(model) - return numpy.dot( - model / numpy.linalg.norm(model), probe / numpy.linalg.norm(probe) - ) - - def score_for_multiple_probes(self, model, probes): - """This function computes the score between the given model and several given probe files.""" - probe = numpy.mean(numpy.vstack(probes), axis=0) - return self.score(model, probe) diff --git a/bob/bio/gmm/algorithm/JFA.py b/bob/bio/gmm/algorithm/JFA.py deleted file mode 100644 index 4280d2671c9ebd235ff9b9e004c64b39c4f7b6e6..0000000000000000000000000000000000000000 --- a/bob/bio/gmm/algorithm/JFA.py +++ /dev/null @@ -1,131 +0,0 @@ -#!/usr/bin/env python -# vim: set fileencoding=utf-8 : -# Manuel Guenther <Manuel.Guenther@idiap.ch> - -import logging - -import bob.core -import bob.io.base -import bob.learn.em - -from bob.bio.base.algorithm import Algorithm - -from .GMM import GMM - -logger = logging.getLogger("bob.bio.gmm") - - -class JFA(GMM): - """Tool for computing Unified Background Models and Gaussian Mixture Models of the features and project it via JFA""" - - def __init__( - self, - # JFA training - subspace_dimension_of_u, # U subspace dimension - subspace_dimension_of_v, # V subspace dimension - jfa_training_iterations=10, # Number of EM iterations for the JFA training - # JFA enrollment - jfa_enroll_iterations=1, # Number of iterations for the enrollment phase - # parameters of the GMM - **kwargs - ): - """Initializes the local UBM-GMM tool with the given file selector object""" - # call base class constructor - GMM.__init__(self, **kwargs) - - # call tool constructor to overwrite what was set before - Algorithm.__init__( - self, - performs_projection=True, - use_projected_features_for_enrollment=True, - requires_enroller_training=True, - subspace_dimension_of_u=subspace_dimension_of_u, - subspace_dimension_of_v=subspace_dimension_of_v, - jfa_training_iterations=jfa_training_iterations, - jfa_enroll_iterations=jfa_enroll_iterations, - multiple_model_scoring=None, - multiple_probe_scoring=None, - **kwargs - ) - - self.subspace_dimension_of_u = subspace_dimension_of_u - self.subspace_dimension_of_v = subspace_dimension_of_v - self.jfa_training_iterations = jfa_training_iterations - self.jfa_enroll_iterations = jfa_enroll_iterations - self.jfa_trainer = bob.learn.em.JFATrainer() - - def load_projector(self, projector_file): - """Reads the UBM model from file""" - # Here, we just need to load the UBM from the projector file. - self.load_ubm(projector_file) - - ####################################################### - # JFA training # - def train_enroller(self, train_features, enroller_file): - # assert that all training features are GMMStatistics - for client_feature in train_features: - for feature in client_feature: - assert isinstance(feature, bob.learn.em.GMMStats) - - # create a JFABasemachine with the UBM from the base class - self.jfa_base = bob.learn.em.JFABase( - self.ubm, self.subspace_dimension_of_u, self.subspace_dimension_of_v - ) - - # train the JFA - bob.learn.em.train_jfa( - self.jfa_trainer, - self.jfa_base, - train_features, - self.jfa_training_iterations, - rng=bob.core.random.mt19937(self.init_seed), - ) - - # Save the JFA base AND the UBM into the same file - self.jfa_base.save(bob.io.base.HDF5File(enroller_file, "w")) - - ####################################################### - # JFA model enroll # - def load_enroller(self, enroller_file): - """Reads the JFA base from file""" - # now, load the JFA base, if it is included in the file - self.jfa_base = bob.learn.em.JFABase(bob.io.base.HDF5File(enroller_file)) - # add UBM model from base class - self.jfa_base.ubm = self.ubm - - # TODO: Why is the rng re-initialized here? - # self.rng = bob.core.random.mt19937(self.init_seed) - - def read_feature(self, feature_file): - """Reads the projected feature to be enrolled as a model""" - return bob.learn.em.GMMStats(bob.io.base.HDF5File(feature_file)) - - def enroll(self, enroll_features): - """Enrolls a GMM using MAP adaptation""" - machine = bob.learn.em.JFAMachine(self.jfa_base) - self.jfa_trainer.enroll(machine, enroll_features, self.jfa_enroll_iterations) - # return the resulting gmm - return machine - - ###################################################### - # Feature comparison # - def read_model(self, model_file): - """Reads the JFA Machine that holds the model""" - machine = bob.learn.em.JFAMachine(bob.io.base.HDF5File(model_file)) - machine.jfa_base = self.jfa_base - return machine - - def score(self, model, probe): - """Computes the score for the given model and the given probe""" - assert isinstance(model, bob.learn.em.JFAMachine) - assert isinstance(probe, bob.learn.em.GMMStats) - return model.log_likelihood(probe) - - def score_for_multiple_probes(self, model, probes): - """This function computes the score between the given model and several probes.""" - # TODO: Check if this is correct - # logger.warn("This function needs to be verified!") - raise NotImplementedError("Multiple probes is not yet supported") - # scores = numpy.ndarray((len(probes),), 'float64') - # model.forward(probes, scores) - # return scores[0] diff --git a/bob/bio/gmm/algorithm/__init__.py b/bob/bio/gmm/algorithm/__init__.py index 046970ffbfb3d835b81a43ff05ce51ca55bd1d53..cf76a6bab685c1ddf8933c8d8a140f1f1f69d1ad 100644 --- a/bob/bio/gmm/algorithm/__init__.py +++ b/bob/bio/gmm/algorithm/__init__.py @@ -1,8 +1,4 @@ from .GMM import GMM -from .GMM import GMMRegular -from .ISV import ISV -from .IVector import IVector -from .JFA import JFA # gets sphinx autodoc done right - don't remove it @@ -23,9 +19,5 @@ def __appropriate__(*args): __appropriate__( GMM, - GMMRegular, - JFA, - ISV, - IVector, ) __all__ = [_ for _ in dir() if not _.startswith("_")] diff --git a/bob/bio/gmm/config/algorithm/gmm.py b/bob/bio/gmm/config/algorithm/gmm.py index 9b280de63b4455fc5ad0ea3b9a899a46f434136f..ce235b6b52bb464d448c2faac1b94afabf50a7ec 100644 --- a/bob/bio/gmm/config/algorithm/gmm.py +++ b/bob/bio/gmm/config/algorithm/gmm.py @@ -1,5 +1,3 @@ import bob.bio.gmm -algorithm = bob.bio.gmm.algorithm.GMM( - number_of_gaussians=512, -) +algorithm = bob.bio.gmm.algorithm.GMM(number_of_gaussians=512) diff --git a/bob/bio/gmm/config/algorithm/gmm_regular.py b/bob/bio/gmm/config/algorithm/gmm_regular.py deleted file mode 100644 index 5bedd869287975d8068d17ee599b7b4bbb9225d3..0000000000000000000000000000000000000000 --- a/bob/bio/gmm/config/algorithm/gmm_regular.py +++ /dev/null @@ -1,5 +0,0 @@ -#!/usr/bin/env python - -import bob.bio.gmm - -algorithm = bob.bio.gmm.algorithm.GMMRegular(number_of_gaussians=512) diff --git a/bob/bio/gmm/config/algorithm/isv.py b/bob/bio/gmm/config/algorithm/isv.py deleted file mode 100644 index 3d6d9c7a72d356a47d30eaa24c4baabe4d94134b..0000000000000000000000000000000000000000 --- a/bob/bio/gmm/config/algorithm/isv.py +++ /dev/null @@ -1,8 +0,0 @@ -import bob.bio.gmm - -algorithm = bob.bio.gmm.algorithm.ISV( - # ISV parameters - subspace_dimension_of_u=160, - # GMM parameters - number_of_gaussians=512, -) diff --git a/bob/bio/gmm/config/algorithm/ivector_cosine.py b/bob/bio/gmm/config/algorithm/ivector_cosine.py deleted file mode 100644 index 1006de117ba47c7dfd2dbe42965b08fa086d91e5..0000000000000000000000000000000000000000 --- a/bob/bio/gmm/config/algorithm/ivector_cosine.py +++ /dev/null @@ -1,10 +0,0 @@ -import bob.bio.gmm - -algorithm = bob.bio.gmm.algorithm.IVector( - # IVector parameters - subspace_dimension_of_t=400, - update_sigma=True, - tv_training_iterations=3, # Number of EM iterations for the TV training - # GMM parameters - number_of_gaussians=512, -) diff --git a/bob/bio/gmm/config/algorithm/ivector_lda_wccn_plda.py b/bob/bio/gmm/config/algorithm/ivector_lda_wccn_plda.py deleted file mode 100644 index de9bd5a06f4ed6ba06ee4416a9318a7b3c4caab1..0000000000000000000000000000000000000000 --- a/bob/bio/gmm/config/algorithm/ivector_lda_wccn_plda.py +++ /dev/null @@ -1,17 +0,0 @@ -import bob.bio.gmm - -algorithm = bob.bio.gmm.algorithm.IVector( - # IVector parameters - subspace_dimension_of_t=100, - update_sigma=True, - tv_training_iterations=25, # Number of EM iterations for the TV training - # GMM parameters - number_of_gaussians=256, - use_lda=True, - use_wccn=True, - use_plda=True, - lda_dim=50, - plda_dim_F=50, - plda_dim_G=50, - plda_training_iterations=200, -) diff --git a/bob/bio/gmm/config/algorithm/ivector_plda.py b/bob/bio/gmm/config/algorithm/ivector_plda.py deleted file mode 100644 index a99762f8ef3b158c8168f99866720f56a980eddb..0000000000000000000000000000000000000000 --- a/bob/bio/gmm/config/algorithm/ivector_plda.py +++ /dev/null @@ -1,14 +0,0 @@ -import bob.bio.gmm - -algorithm = bob.bio.gmm.algorithm.IVector( - # IVector parameters - subspace_dimension_of_t=100, - update_sigma=True, - tv_training_iterations=25, # Number of EM iterations for the TV training - # GMM parameters - number_of_gaussians=256, - use_plda=True, - plda_dim_F=50, - plda_dim_G=50, - plda_training_iterations=200, -) diff --git a/bob/bio/gmm/config/algorithm/jfa.py b/bob/bio/gmm/config/algorithm/jfa.py deleted file mode 100644 index 21c5d4df52cb647e49db69343fdcf86c4b7daf6c..0000000000000000000000000000000000000000 --- a/bob/bio/gmm/config/algorithm/jfa.py +++ /dev/null @@ -1,12 +0,0 @@ -#!/usr/bin/env python - -import bob.bio.gmm - -algorithm = bob.bio.gmm.algorithm.JFA( - # JFA Training - subspace_dimension_of_u=2, # U subspace dimension - subspace_dimension_of_v=2, # V subspace dimension - jfa_training_iterations=10, # Number of EM iterations for the JFA training - # GMM training - number_of_gaussians=512, -) diff --git a/bob/bio/gmm/test/__init__.py b/bob/bio/gmm/test/__init__.py index 20aa1aac7e67b2a10ac7fc71b992c30bdfc60a4b..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 100644 --- a/bob/bio/gmm/test/__init__.py +++ b/bob/bio/gmm/test/__init__.py @@ -1 +0,0 @@ -from . import dummy # noqa: F401 diff --git a/bob/bio/gmm/test/data/gmm_enrolled.hdf5 b/bob/bio/gmm/test/data/gmm_enrolled.hdf5 new file mode 100644 index 0000000000000000000000000000000000000000..6466ed0a3fc524a6ab698f09540aa908f5108c22 Binary files /dev/null and b/bob/bio/gmm/test/data/gmm_enrolled.hdf5 differ diff --git a/bob/bio/gmm/test/data/gmm_model.hdf5 b/bob/bio/gmm/test/data/gmm_model.hdf5 deleted file mode 100644 index a57d494c0fc9112e582827d577ae4bf974d2e174..0000000000000000000000000000000000000000 Binary files a/bob/bio/gmm/test/data/gmm_model.hdf5 and /dev/null differ diff --git a/bob/bio/gmm/test/data/gmm_projected.hdf5 b/bob/bio/gmm/test/data/gmm_projected.hdf5 index 31d930b955098e3ae990c1e2509d2c232d1a86be..fc5e0a7c0b8f41d3b7d9a03d07c8e277f8bb5386 100644 Binary files a/bob/bio/gmm/test/data/gmm_projected.hdf5 and b/bob/bio/gmm/test/data/gmm_projected.hdf5 differ diff --git a/bob/bio/gmm/test/data/gmm_projector.hdf5 b/bob/bio/gmm/test/data/gmm_projector.hdf5 deleted file mode 100644 index 4c47be97a009e963d25301904a7420eced1b55e9..0000000000000000000000000000000000000000 Binary files a/bob/bio/gmm/test/data/gmm_projector.hdf5 and /dev/null differ diff --git a/bob/bio/gmm/test/data/gmm_ubm.hdf5 b/bob/bio/gmm/test/data/gmm_ubm.hdf5 new file mode 100644 index 0000000000000000000000000000000000000000..6a8abe718f958351135e5992621a733016127ccc Binary files /dev/null and b/bob/bio/gmm/test/data/gmm_ubm.hdf5 differ diff --git a/bob/bio/gmm/test/dummy/__init__.py b/bob/bio/gmm/test/dummy/__init__.py deleted file mode 100644 index e19bac9316e70ef0f0efe281c508a48af46b401f..0000000000000000000000000000000000000000 --- a/bob/bio/gmm/test/dummy/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from . import extractor # noqa: F401 diff --git a/bob/bio/gmm/test/dummy/extractor.py b/bob/bio/gmm/test/dummy/extractor.py deleted file mode 100644 index 9459dd2147fad976ddfa2de168734ac6c4127217..0000000000000000000000000000000000000000 --- a/bob/bio/gmm/test/dummy/extractor.py +++ /dev/null @@ -1,30 +0,0 @@ -import numpy - -import bob.io.base - -from bob.bio.base.extractor import Extractor - -_data = [10.0, 11.0, 12.0, 13.0, 14.0] - - -class DummyExtractor(Extractor): - def __init__(self): - Extractor.__init__(self, requires_training=True) - self.model = False - - def train(self, train_data, extractor_file): - assert isinstance(train_data, list) - bob.io.base.save(_data, extractor_file) - - def load(self, extractor_file): - data = bob.io.base.load(extractor_file) - assert (_data == data).all() - self.model = True - - def __call__(self, data): - """Does nothing, simply converts the data type of the data, ignoring any annotation.""" - assert self.model - return data.astype(numpy.float) - - -extractor = DummyExtractor() diff --git a/bob/bio/gmm/test/test_algorithms.py b/bob/bio/gmm/test/test_algorithms.py deleted file mode 100644 index 7cb0bb5b5c5052d936285fd1ee49cf510dbf6665..0000000000000000000000000000000000000000 --- a/bob/bio/gmm/test/test_algorithms.py +++ /dev/null @@ -1,712 +0,0 @@ -#!/usr/bin/env python -# vim: set fileencoding=utf-8 : -# @author: Manuel Guenther <Manuel.Guenther@idiap.ch> -# @date: Thu May 24 10:41:42 CEST 2012 -# -# Copyright (C) 2011-2012 Idiap Research Institute, Martigny, Switzerland -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, version 3 of the License. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see <http://www.gnu.org/licenses/>. - -import logging -import os -import shutil -import sys - -import numpy -import pkg_resources - -import bob.bio.gmm -import bob.io.base -import bob.io.base.test_utils -import bob.learn.linear - -from bob.bio.base.test import utils - -logger = logging.getLogger("bob.bio.gmm") - -regenerate_refs = False - -seed_value = 5489 - -_mac_os = sys.platform == "darwin" - - -def _compare( - data, reference, write_function=bob.bio.base.save, read_function=bob.bio.base.load -): - # write reference? - if regenerate_refs: - write_function(data, reference) - - # compare reference - reference = read_function(reference) - if hasattr(data, "is_similar_to"): - assert data.is_similar_to(reference) - else: - assert numpy.allclose(data, reference, atol=1e-5) - - -def _compare_complex( - data, reference, write_function=bob.bio.base.save, read_function=bob.bio.base.load -): - # write reference? - if regenerate_refs: - write_function(data, reference) - - # compare reference - reference = read_function(reference) - for d, r in zip(data, reference): - if hasattr(d, "is_similar_to"): - assert d.is_similar_to(r) - else: - assert numpy.allclose(d, r, atol=1e-5) - - -def test_gmm(): - temp_file = bob.io.base.test_utils.temporary_filename() - gmm1 = bob.bio.base.load_resource( - "gmm", "algorithm", preferred_package="bob.bio.gmm" - ) - assert isinstance(gmm1, bob.bio.gmm.algorithm.GMM) - assert isinstance(gmm1, bob.bio.base.algorithm.Algorithm) - assert gmm1.performs_projection - assert gmm1.requires_projector_training - assert not gmm1.use_projected_features_for_enrollment - assert not gmm1.split_training_features_by_client - assert not gmm1.requires_enroller_training - - # create smaller GMM object - gmm2 = bob.bio.gmm.algorithm.GMM( - number_of_gaussians=2, - kmeans_training_iterations=1, - gmm_training_iterations=1, - INIT_SEED=seed_value, - ) - - train_data = utils.random_training_set( - (100, 45), count=5, minimum=-5.0, maximum=5.0 - ) - reference_file = pkg_resources.resource_filename( - "bob.bio.gmm.test", "data/gmm_projector.hdf5" - ) - try: - # train the projector - gmm2.train_projector(train_data, temp_file) - - assert os.path.exists(temp_file) - - if regenerate_refs: - shutil.copy(temp_file, reference_file) - - # check projection matrix - gmm1.load_projector(reference_file) - gmm2.load_projector(temp_file) - - assert gmm1.ubm.is_similar_to(gmm2.ubm) - finally: - if os.path.exists(temp_file): - os.remove(temp_file) - - # generate and project random feature - feature = utils.random_array((20, 45), -5.0, 5.0, seed=84) - projected = gmm1.project(feature) - assert isinstance(projected, bob.learn.em.GMMStats) - _compare( - projected, - pkg_resources.resource_filename("bob.bio.gmm.test", "data/gmm_projected.hdf5"), - gmm1.write_feature, - gmm1.read_feature, - ) - - # enroll model from random features - enroll = utils.random_training_set((20, 45), 5, -5.0, 5.0, seed=21) - model = gmm1.enroll(enroll) - assert isinstance(model, bob.learn.em.GMMMachine) - _compare( - model, - pkg_resources.resource_filename("bob.bio.gmm.test", "data/gmm_model.hdf5"), - gmm1.write_model, - gmm1.read_model, - ) - - # compare model with probe - probe = gmm1.read_feature( - pkg_resources.resource_filename("bob.bio.gmm.test", "data/gmm_projected.hdf5") - ) - reference_score = -0.01676570 - assert ( - abs(gmm1.score(model, probe) - reference_score) < 1e-5 - ), "The scores differ: %3.8f, %3.8f" % (gmm1.score(model, probe), reference_score) - assert ( - abs(gmm1.score_for_multiple_probes(model, [probe, probe]) - reference_score) - < 1e-5 - ) - - -def test_gmm_regular(): - - temp_file = bob.io.base.test_utils.temporary_filename() - gmm1 = bob.bio.base.load_resource( - "gmm-regular", "algorithm", preferred_package="bob.bio.gmm" - ) - assert isinstance(gmm1, bob.bio.gmm.algorithm.GMMRegular) - assert isinstance(gmm1, bob.bio.gmm.algorithm.GMM) - assert isinstance(gmm1, bob.bio.base.algorithm.Algorithm) - assert not gmm1.performs_projection - assert not gmm1.requires_projector_training - assert not gmm1.use_projected_features_for_enrollment - assert gmm1.requires_enroller_training - - # create smaller GMM object - gmm2 = bob.bio.gmm.algorithm.GMMRegular( - number_of_gaussians=2, - kmeans_training_iterations=1, - gmm_training_iterations=1, - INIT_SEED=seed_value, - ) - - train_data = utils.random_training_set( - (100, 45), count=5, minimum=-5.0, maximum=5.0 - ) - reference_file = pkg_resources.resource_filename( - "bob.bio.gmm.test", "data/gmm_projector.hdf5" - ) - try: - # train the enroler - gmm2.train_enroller([train_data], temp_file) - - assert os.path.exists(temp_file) - - if regenerate_refs: - shutil.copy(temp_file, reference_file) - - # check projection matrix - gmm1.load_enroller(reference_file) - gmm2.load_enroller(temp_file) - - assert gmm1.ubm.is_similar_to(gmm2.ubm) - finally: - if os.path.exists(temp_file): - os.remove(temp_file) - - # enroll model from random features - enroll = utils.random_training_set((20, 45), 5, -5.0, 5.0, seed=21) - model = gmm1.enroll(enroll) - assert isinstance(model, bob.learn.em.GMMMachine) - _compare( - model, - pkg_resources.resource_filename("bob.bio.gmm.test", "data/gmm_model.hdf5"), - gmm1.write_model, - gmm1.read_model, - ) - - # generate random probe feature - probe = utils.random_array((20, 45), -5.0, 5.0, seed=84) - - # compare model with probe - reference_score = -0.40840148 - assert ( - abs(gmm1.score(model, probe) - reference_score) < 1e-5 - ), "The scores differ: %3.8f, %3.8f" % (gmm1.score(model, probe), reference_score) - # TODO: not implemented - # assert abs(gmm1.score_for_multiple_probes(model, [probe, probe]) - reference_score) < 1e-5 - - -def test_isv(): - temp_file = bob.io.base.test_utils.temporary_filename() - isv1 = bob.bio.base.load_resource( - "isv", "algorithm", preferred_package="bob.bio.gmm" - ) - assert isinstance(isv1, bob.bio.gmm.algorithm.ISV) - assert isinstance(isv1, bob.bio.gmm.algorithm.GMM) - assert isinstance(isv1, bob.bio.base.algorithm.Algorithm) - assert isv1.performs_projection - assert isv1.requires_projector_training - assert isv1.use_projected_features_for_enrollment - assert isv1.split_training_features_by_client - assert not isv1.requires_enroller_training - - # create smaller GMM object - isv2 = bob.bio.gmm.algorithm.ISV( - number_of_gaussians=2, - subspace_dimension_of_u=10, - kmeans_training_iterations=1, - gmm_training_iterations=1, - isv_training_iterations=1, - INIT_SEED=seed_value, - ) - - train_data = utils.random_training_set_by_id( - (100, 45), count=5, minimum=-5.0, maximum=5.0 - ) - reference_file = pkg_resources.resource_filename( - "bob.bio.gmm.test", "data/isv_projector.hdf5" - ) - try: - # train the projector - isv2.train_projector(train_data, temp_file) - - assert os.path.exists(temp_file) - - if regenerate_refs: - shutil.copy(temp_file, reference_file) - - # check projection matrix - isv1.load_projector(reference_file) - isv2.load_projector(temp_file) - - assert isv1.ubm.is_similar_to(isv2.ubm) - assert isv1.isvbase.is_similar_to(isv2.isvbase) - finally: - if os.path.exists(temp_file): - os.remove(temp_file) - - # generate and project random feature - feature = utils.random_array((20, 45), -5.0, 5.0, seed=84) - projected = isv1.project(feature) - assert isinstance(projected, (list, tuple)) - assert len(projected) == 2 - assert isinstance(projected[0], bob.learn.em.GMMStats) - assert isinstance(projected[1], numpy.ndarray) - _compare_complex( - projected, - pkg_resources.resource_filename("bob.bio.gmm.test", "data/isv_projected.hdf5"), - isv1.write_feature, - isv1.read_feature, - ) - - # enroll model from random features - random_features = utils.random_training_set( - (20, 45), count=5, minimum=-5.0, maximum=5.0 - ) - enroll_features = [isv1.project(feature) for feature in random_features] - model = isv1.enroll(enroll_features) - assert isinstance(model, bob.learn.em.ISVMachine) - _compare( - model, - pkg_resources.resource_filename("bob.bio.gmm.test", "data/isv_model.hdf5"), - isv1.write_model, - isv1.read_model, - ) - - # compare model with probe - probe = isv1.read_feature( - pkg_resources.resource_filename("bob.bio.gmm.test", "data/isv_projected.hdf5") - ) - reference_score = 0.02136784 - assert ( - abs(isv1.score(model, probe) - reference_score) < 1e-5 - ), "The scores differ: %3.8f, %3.8f" % (isv1.score(model, probe), reference_score) - # assert abs(isv1.score_for_multiple_probes(model, [probe]*4) - reference_score) < 1e-5, isv1.score_for_multiple_probes(model, [probe, probe]) - # TODO: Why is the score not identical for multiple copies of the same probe? - assert ( - abs(isv1.score_for_multiple_probes(model, [probe, probe]) - reference_score) - < 1e-4 - ), isv1.score_for_multiple_probes(model, [probe, probe]) - - -def test_jfa(): - temp_file = bob.io.base.test_utils.temporary_filename() - jfa1 = bob.bio.base.load_resource( - "jfa", "algorithm", preferred_package="bob.bio.gmm" - ) - assert isinstance(jfa1, bob.bio.gmm.algorithm.JFA) - assert isinstance(jfa1, bob.bio.gmm.algorithm.GMM) - assert isinstance(jfa1, bob.bio.base.algorithm.Algorithm) - assert jfa1.performs_projection - assert jfa1.requires_projector_training - assert jfa1.use_projected_features_for_enrollment - assert not jfa1.split_training_features_by_client - assert jfa1.requires_enroller_training - - # create smaller JFA object - jfa2 = bob.bio.gmm.algorithm.JFA( - number_of_gaussians=2, - subspace_dimension_of_u=2, - subspace_dimension_of_v=2, - kmeans_training_iterations=1, - gmm_training_iterations=1, - jfa_training_iterations=1, - INIT_SEED=seed_value, - ) - - train_data = utils.random_training_set( - (100, 45), count=5, minimum=-5.0, maximum=5.0 - ) - # reference is the same as for GMM projection - reference_file = pkg_resources.resource_filename( - "bob.bio.gmm.test", "data/gmm_projector.hdf5" - ) - try: - # train the projector - jfa2.train_projector(train_data, temp_file) - - assert os.path.exists(temp_file) - - if regenerate_refs: - shutil.copy(temp_file, reference_file) - - # check projection matrix - jfa1.load_projector(reference_file) - jfa2.load_projector(temp_file) - - assert jfa1.ubm.is_similar_to(jfa2.ubm) - finally: - if os.path.exists(temp_file): - os.remove(temp_file) - - # generate and project random feature - feature = utils.random_array((20, 45), -5.0, 5.0, seed=84) - projected = jfa1.project(feature) - assert isinstance(projected, bob.learn.em.GMMStats) - _compare( - projected, - pkg_resources.resource_filename("bob.bio.gmm.test", "data/gmm_projected.hdf5"), - jfa1.write_feature, - jfa1.read_feature, - ) - - # enroll model from random features - random_features = utils.random_training_set_by_id( - (20, 45), count=5, minimum=-5.0, maximum=5.0 - ) - train_data = [ - [jfa1.project(feature) for feature in client_features] - for client_features in random_features - ] - reference_file = pkg_resources.resource_filename( - "bob.bio.gmm.test", "data/jfa_enroller.hdf5" - ) - try: - # train the projector - jfa2.train_enroller(train_data, temp_file) - - assert os.path.exists(temp_file) - - if regenerate_refs: - shutil.copy(temp_file, reference_file) - - # check projection matrix - jfa1.load_enroller(reference_file) - jfa2.load_enroller(temp_file) - - assert jfa1.jfa_base.is_similar_to(jfa2.jfa_base) - finally: - if os.path.exists(temp_file): - os.remove(temp_file) - - # enroll model from random features - random_features = utils.random_training_set( - (20, 45), count=5, minimum=-5.0, maximum=5.0 - ) - enroll_features = [jfa1.project(feature) for feature in random_features] - model = jfa1.enroll(enroll_features) - assert isinstance(model, bob.learn.em.JFAMachine) - _compare( - model, - pkg_resources.resource_filename("bob.bio.gmm.test", "data/jfa_model.hdf5"), - jfa1.write_model, - jfa1.read_model, - ) - - # compare model with probe - probe = jfa1.read_feature( - pkg_resources.resource_filename("bob.bio.gmm.test", "data/gmm_projected.hdf5") - ) - reference_score = 0.02225812 - assert ( - abs(jfa1.score(model, probe) - reference_score) < 1e-5 - ), "The scores differ: %3.8f, %3.8f" % (jfa1.score(model, probe), reference_score) - # TODO: implement that - # assert abs(jfa1.score_for_multiple_probes(model, [probe, probe]) - reference_score) < 1e-5, jfa1.score_for_multiple_probes(model, [probe, probe]) - - -def test_ivector_cosine(): - temp_file = bob.io.base.test_utils.temporary_filename() - ivec1 = bob.bio.base.load_resource( - "ivector-cosine", "algorithm", preferred_package="bob.bio.gmm" - ) - assert isinstance(ivec1, bob.bio.gmm.algorithm.IVector) - assert isinstance(ivec1, bob.bio.gmm.algorithm.GMM) - assert isinstance(ivec1, bob.bio.base.algorithm.Algorithm) - assert ivec1.performs_projection - assert ivec1.requires_projector_training - assert ivec1.use_projected_features_for_enrollment - assert ivec1.split_training_features_by_client - assert not ivec1.requires_enroller_training - - # create smaller IVector object - ivec2 = bob.bio.gmm.algorithm.IVector( - number_of_gaussians=2, - subspace_dimension_of_t=2, - kmeans_training_iterations=1, - tv_training_iterations=1, - INIT_SEED=seed_value, - ) - - train_data = utils.random_training_set( - (100, 45), count=5, minimum=-5.0, maximum=5.0 - ) - train_data = [train_data] - - # reference is the same as for GMM projection - reference_file = pkg_resources.resource_filename( - "bob.bio.gmm.test", "data/ivector_projector.hdf5" - ) - try: - # train the projector - - ivec2.train_projector(train_data, temp_file) - - assert os.path.exists(temp_file) - - if regenerate_refs: - shutil.copy(temp_file, reference_file) - - # check projection matrix - ivec1.load_projector(reference_file) - ivec2.load_projector(temp_file) - - assert ivec1.ubm.is_similar_to(ivec2.ubm) - assert ivec1.tv.is_similar_to(ivec2.tv) - assert ivec1.whitener.is_similar_to(ivec2.whitener) - finally: - if os.path.exists(temp_file): - os.remove(temp_file) - - # generate and project random feature - feature = utils.random_array((20, 45), -5.0, 5.0, seed=84) - projected = ivec1.project(feature) - _compare( - projected, - pkg_resources.resource_filename( - "bob.bio.gmm.test", "data/ivector_projected.hdf5" - ), - ivec1.write_feature, - ivec1.read_feature, - ) - - # enroll model from random features - random_features = utils.random_training_set( - (20, 45), count=5, minimum=-5.0, maximum=5.0 - ) - enroll_features = [ivec1.project(feature) for feature in random_features] - model = ivec1.enroll(enroll_features) - _compare( - model, - pkg_resources.resource_filename("bob.bio.gmm.test", "data/ivector_model.hdf5"), - ivec1.write_model, - ivec1.read_model, - ) - - # compare model with probe - probe = ivec1.read_feature( - pkg_resources.resource_filename( - "bob.bio.gmm.test", "data/ivector_projected.hdf5" - ) - ) - reference_score = -0.00187151 - assert ( - abs(ivec1.score(model, probe) - reference_score) < 1e-5 - ), "The scores differ: %3.8f, %3.8f" % (ivec1.score(model, probe), reference_score) - # TODO: implement that - assert ( - abs(ivec1.score_for_multiple_probes(model, [probe, probe]) - reference_score) - < 1e-5 - ) - - -def test_ivector_plda(): - temp_file = bob.io.base.test_utils.temporary_filename() - ivec1 = bob.bio.base.load_resource( - "ivector-plda", "algorithm", preferred_package="bob.bio.gmm" - ) - ivec1.use_plda = True - - # create smaller IVector object - ivec2 = bob.bio.gmm.algorithm.IVector( - number_of_gaussians=2, - subspace_dimension_of_t=10, - kmeans_training_iterations=1, - tv_training_iterations=1, - INIT_SEED=seed_value, - use_plda=True, - plda_dim_F=2, - plda_dim_G=2, - plda_training_iterations=2, - ) - - train_data = utils.random_training_set_by_id( - (100, 45), count=5, minimum=-5.0, maximum=5.0 - ) - - # reference is the same as for GMM projection - reference_file = pkg_resources.resource_filename( - "bob.bio.gmm.test", "data/ivector2_projector.hdf5" - ) - try: - # train the projector - - ivec2.train_projector(train_data, temp_file) - - assert os.path.exists(temp_file) - - if regenerate_refs: - shutil.copy(temp_file, reference_file) - - # check projection matrix - ivec1.load_projector(reference_file) - ivec2.load_projector(temp_file) - - assert ivec1.ubm.is_similar_to(ivec2.ubm) - assert ivec1.tv.is_similar_to(ivec2.tv) - assert ivec1.whitener.is_similar_to(ivec2.whitener) - finally: - if os.path.exists(temp_file): - os.remove(temp_file) - - # generate and project random feature - feature = utils.random_array((20, 45), -5.0, 5.0, seed=84) - projected = ivec1.project(feature) - _compare( - projected, - pkg_resources.resource_filename( - "bob.bio.gmm.test", "data/ivector2_projected.hdf5" - ), - ivec1.write_feature, - ivec1.read_feature, - ) - - # enroll model from random features - random_features = utils.random_training_set( - (20, 45), count=5, minimum=-5.0, maximum=5.0 - ) - enroll_features = [ivec1.project(feature) for feature in random_features] - - model = ivec1.enroll(enroll_features) - _compare( - model, - pkg_resources.resource_filename("bob.bio.gmm.test", "data/ivector2_model.hdf5"), - ivec1.write_model, - ivec1.read_model, - ) - - # compare model with probe - probe = ivec1.read_feature( - pkg_resources.resource_filename( - "bob.bio.gmm.test", "data/ivector2_projected.hdf5" - ) - ) - logger.info("%f" % ivec1.score(model, probe)) - reference_score = 1.21879822 - assert ( - abs(ivec1.score(model, probe) - reference_score) < 1e-5 - ), "The scores differ: %3.8f, %3.8f" % (ivec1.score(model, probe), reference_score) - assert ( - abs(ivec1.score_for_multiple_probes(model, [probe, probe]) - reference_score) - < 1e-5 - ) - - -def test_ivector_lda_wccn_plda(): - temp_file = bob.io.base.test_utils.temporary_filename() - ivec1 = bob.bio.base.load_resource( - "ivector-lda-wccn-plda", "algorithm", preferred_package="bob.bio.gmm" - ) - ivec1.use_lda = True - ivec1.use_wccn = True - ivec1.use_plda = True - # create smaller IVector object - ivec2 = bob.bio.gmm.algorithm.IVector( - number_of_gaussians=2, - subspace_dimension_of_t=10, - kmeans_training_iterations=1, - tv_training_iterations=1, - INIT_SEED=seed_value, - use_lda=True, - lda_dim=3, - use_wccn=True, - use_plda=True, - plda_dim_F=2, - plda_dim_G=2, - plda_training_iterations=2, - ) - - train_data = utils.random_training_set_by_id( - (100, 45), count=5, minimum=-5.0, maximum=5.0 - ) - - # reference is the same as for GMM projection - reference_file = pkg_resources.resource_filename( - "bob.bio.gmm.test", "data/ivector3_projector.hdf5" - ) - try: - # train the projector - - ivec2.train_projector(train_data, temp_file) - - assert os.path.exists(temp_file) - - if regenerate_refs: - shutil.copy(temp_file, reference_file) - - # check projection matrix - ivec1.load_projector(reference_file) - ivec2.load_projector(temp_file) - - assert ivec1.ubm.is_similar_to(ivec2.ubm) - assert ivec1.tv.is_similar_to(ivec2.tv) - assert ivec1.whitener.is_similar_to(ivec2.whitener) - finally: - if os.path.exists(temp_file): - os.remove(temp_file) - - # generate and project random feature - feature = utils.random_array((20, 45), -5.0, 5.0, seed=84) - projected = ivec1.project(feature) - _compare( - projected, - pkg_resources.resource_filename( - "bob.bio.gmm.test", "data/ivector3_projected.hdf5" - ), - ivec1.write_feature, - ivec1.read_feature, - ) - - # enroll model from random features - random_features = utils.random_training_set( - (20, 45), count=5, minimum=-5.0, maximum=5.0 - ) - enroll_features = [ivec1.project(feature) for feature in random_features] - model = ivec1.enroll(enroll_features) - _compare( - model, - pkg_resources.resource_filename("bob.bio.gmm.test", "data/ivector3_model.hdf5"), - ivec1.write_model, - ivec1.read_model, - ) - - # compare model with probe - probe = ivec1.read_feature( - pkg_resources.resource_filename( - "bob.bio.gmm.test", "data/ivector3_projected.hdf5" - ) - ) - reference_score = 0.2954148598 - assert ( - abs(ivec1.score(model, probe) - reference_score) < 1e-5 - ), "The scores differ: %3.8f, %3.8f" % (ivec1.score(model, probe), reference_score) - assert ( - abs(ivec1.score_for_multiple_probes(model, [probe, probe]) - reference_score) - < 1e-5 - ) diff --git a/bob/bio/gmm/test/test_gmm.py b/bob/bio/gmm/test/test_gmm.py new file mode 100644 index 0000000000000000000000000000000000000000..e2434f0893f006453f65046a8999fa7d95ebabba --- /dev/null +++ b/bob/bio/gmm/test/test_gmm.py @@ -0,0 +1,179 @@ +#!/usr/bin/env python +# vim: set fileencoding=utf-8 : +# @author: Manuel Guenther <Manuel.Guenther@idiap.ch> +# @date: Thu May 24 10:41:42 CEST 2012 +# +# Copyright (C) 2011-2012 Idiap Research Institute, Martigny, Switzerland +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, version 3 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +import logging +import os +import tempfile + +import numpy +import pkg_resources + +import bob.bio.gmm + +from bob.bio.base.test import utils +from bob.bio.gmm.algorithm import GMM +from bob.learn.em.mixture import GMMMachine +from bob.learn.em.mixture import GMMStats + +logger = logging.getLogger(__name__) + +regenerate_refs = False + +seed_value = 5489 + + +def test_class(): + """Tests the creation and initialization of the GMM class.""" + gmm1 = bob.bio.base.load_resource( + "gmm", "algorithm", preferred_package="bob.bio.gmm" + ) + assert isinstance(gmm1, GMM) + assert isinstance( + gmm1, bob.bio.base.pipelines.vanilla_biometrics.abstract_classes.BioAlgorithm + ) + assert gmm1.number_of_gaussians == 512 + assert "bob_fit_supports_dask_array" in gmm1._get_tags() + assert gmm1.transform(None) is None + + +def test_training(): + """Tests the generation of the UBM.""" + # Set a small training iteration count + gmm1 = GMM( + number_of_gaussians=2, + kmeans_training_iterations=1, + ubm_training_iterations=1, + init_seed=seed_value, + ) + train_data = utils.random_training_set( + (100, 45), count=5, minimum=-5.0, maximum=5.0 + ) + + # Train the UBM (projector) + gmm1.fit(train_data) + + # Test saving and loading of projector + with tempfile.NamedTemporaryFile(prefix="bob_", suffix="_model.hdf5") as fd: + temp_file = fd.name + gmm1.save_model(temp_file) + + reference_file = pkg_resources.resource_filename( + "bob.bio.gmm.test", "data/gmm_ubm.hdf5" + ) + if regenerate_refs: + gmm1.save_model(reference_file) + + gmm2 = GMM(number_of_gaussians=2) + + gmm2.load_model(temp_file) + ubm_reference = GMMMachine.from_hdf5(reference_file) + assert gmm2.ubm.is_similar_to(ubm_reference) + + +def test_projector(): + """Tests the projector.""" + # Load the UBM + gmm1 = GMM(number_of_gaussians=2) + gmm1.ubm = GMMMachine.from_hdf5( + pkg_resources.resource_filename("bob.bio.gmm.test", "data/gmm_ubm.hdf5") + ) + + # Generate and project random feature + feature = utils.random_array((20, 45), -5.0, 5.0, seed=84) + projected = gmm1.project(feature) + assert isinstance(projected, bob.learn.em.mixture.GMMStats) + + reference_file = pkg_resources.resource_filename( + "bob.bio.gmm.test", "data/gmm_projected.hdf5" + ) + if regenerate_refs: + projected.save(reference_file) + + reference = GMMStats.from_hdf5(reference_file) + assert projected.is_similar_to(reference) + + +def test_enroll(): + # Load the UBM + ubm = GMMMachine.from_hdf5( + pkg_resources.resource_filename("bob.bio.gmm.test", "data/gmm_ubm.hdf5") + ) + # Create a GMM object with that UBM + gmm1 = GMM( + number_of_gaussians=2, enroll_update_means=True, enroll_update_variances=True + ) + gmm1.ubm = ubm + # Enroll the biometric reference from random features + enroll = utils.random_training_set((20, 45), 5, -5.0, 5.0, seed=21) + biometric_reference = gmm1.enroll(enroll) + assert not biometric_reference.is_similar_to(biometric_reference.ubm) + assert isinstance(biometric_reference, GMMMachine) + + reference_file = pkg_resources.resource_filename( + "bob.bio.gmm.test", "data/gmm_enrolled.hdf5" + ) + if regenerate_refs: + gmm1.write_biometric_reference(biometric_reference, reference_file) + + # Compare to pre-generated file + gmm2 = gmm1.read_biometric_reference(reference_file) + assert biometric_reference.is_similar_to(gmm2) + + with tempfile.NamedTemporaryFile(prefix="bob_", suffix="_bioref.hdf5") as fd: + temp_file = fd.name + gmm1.write_biometric_reference(biometric_reference, reference_file) + assert os.path.exists(temp_file) + + +def test_score(): + gmm1 = GMM(number_of_gaussians=2) + gmm1.load_model( + pkg_resources.resource_filename("bob.bio.gmm.test", "data/gmm_ubm.hdf5") + ) + biometric_reference = GMMMachine.from_hdf5( + pkg_resources.resource_filename("bob.bio.gmm.test", "data/gmm_enrolled.hdf5"), + ubm=gmm1.ubm, + ) + probe = GMMStats.from_hdf5( + pkg_resources.resource_filename("bob.bio.gmm.test", "data/gmm_projected.hdf5") + ) + probe_data = utils.random_array((20, 45), -5.0, 5.0, seed=84) + + reference_score = -0.098980 + + numpy.testing.assert_almost_equal( + gmm1.score(biometric_reference, probe), reference_score, decimal=5 + ) + + multi_probes = gmm1.score_for_multiple_probes( + biometric_reference, [probe, probe, probe] + ) + assert multi_probes.shape == (3,), multi_probes.shape + numpy.testing.assert_almost_equal(multi_probes, reference_score, decimal=5) + + multi_refs = gmm1.score_multiple_biometric_references( + [biometric_reference, biometric_reference, biometric_reference], probe + ) + assert multi_refs.shape == (3,), multi_refs.shape + numpy.testing.assert_almost_equal(multi_refs, reference_score, decimal=5) + + # With not projected data + numpy.testing.assert_almost_equal( + gmm1.score(biometric_reference, probe_data), reference_score, decimal=5 + ) diff --git a/doc/implemented.rst b/doc/implemented.rst index c280808ffc505694236d6eb51776d12719c68d01..b8c747fd44b16742d44b7b0eee579c231721c008 100644 --- a/doc/implemented.rst +++ b/doc/implemented.rst @@ -11,9 +11,6 @@ Summary .. autosummary:: bob.bio.gmm.algorithm.GMM - bob.bio.gmm.algorithm.ISV - bob.bio.gmm.algorithm.JFA - bob.bio.gmm.algorithm.IVector Details diff --git a/setup.py b/setup.py index 3b512f4b9e9c7560b7bca408126910b36e2168b4..6ee603a6989b1463384e67afc0e5042afef54ef9 100644 --- a/setup.py +++ b/setup.py @@ -98,17 +98,9 @@ setup( "console_scripts": [], "bob.bio.database": [], "bob.bio.preprocessor": [], - "bob.bio.extractor": [ - "dummy2d = bob.bio.gmm.test.dummy.extractor:extractor", # for test purposes only - ], + "bob.bio.extractor": [], "bob.bio.algorithm": [ "gmm = bob.bio.gmm.config.algorithm.gmm:algorithm", - "gmm-regular = bob.bio.gmm.config.algorithm.gmm_regular:algorithm", - "jfa = bob.bio.gmm.config.algorithm.jfa:algorithm", - "isv = bob.bio.gmm.config.algorithm.isv:algorithm", - "ivector-cosine = bob.bio.gmm.config.algorithm.ivector_cosine:algorithm", - "ivector-plda = bob.bio.gmm.config.algorithm.ivector_plda:algorithm", - "ivector-lda-wccn-plda = bob.bio.gmm.config.algorithm.ivector_lda_wccn_plda:algorithm", ], }, # Classifiers are important if you plan to distribute this package through