Added the keyword argument

58b46fb0 · Tiago Pereira · 616f7889 · 58b46fb0
Commit 58b46fb0 authored 8 years ago by Tiago Pereira
--- a/bob/bio/gmm/algorithm/GMM.py
+++ b/bob/bio/gmm/algorithm/GMM.py
@@ -12,11 +12,58 @@ import numpy
 from bob.bio.base.algorithm import Algorithm
 import logging
 logger = logging.getLogger("bob.bio.gmm")
 class GMM(Algorithm):
-  """Algorithm for computing Universal Background Models and Gaussian Mixture Models of the features.
+    """
-  Features must be normalized to zero mean and unit standard deviation."""
+    Trains an UBM/GMM system in the same as in [Reynolds2000]_: 
+    **Parameters**:
+      number_of_gaussians: 
+        Number of gaussians in the model
+      kmeans_training_iterations:
+        Maximum number of iterations for the KMeans
+      kmeans_initialization_method:
+        Initialization method for the kmeans. Possible values are: 'RANDOM', 'RANDOM_NO_DUPLICATE', 'KMEANS_PLUS_PLUS'.
+      gmm_training_iterations:
+        Maximum number of iterations for the ML Estimator in the GMM Training.
+      training_threshold:
+        Convergence threshold for the ML Estimator.
+      variance_threshold:
+        Variance flooring
+      update_weights:
+        If **True** update the weights during the GMM Training
+      update_variances:
+        If **True** update the variances during the GMM Training
+      update_means:
+        If **True** update the means during the GMM Training
+      relevance_factor:
+        The relevance factor for the GMM MAP estimation
+      gmm_enroll_iterations:
+        Maximum number of iterations for the MAP Estimation.
+      responsibility_threshold:
+        Threshold for the responsibilities
+      INIT_SEED:
+        Seed for the pseudo random number generator
+      scoring_function:
+        Set the GMM scoring function
+    """
    def __init__(
            self,
@@ -24,6 +71,7 @@ class GMM (Algorithm):
            number_of_gaussians,
            # parameters of UBM training
            kmeans_training_iterations=25,  # Maximum number of iterations for K-Means
+            kmeans_initialization_method="RANDOM_NO_DUPLICATE",
            gmm_training_iterations=25,  # Maximum number of iterations for ML GMM Training
            training_threshold=5e-4,  # Threshold to end the ML training
            variance_threshold=5e-4,  # Minimum value that a variance can reach
@@ -33,7 +81,8 @@ class GMM (Algorithm):
            # parameters of the GMM enrollment
            relevance_factor=4,  # Relevance factor as described in Reynolds paper
            gmm_enroll_iterations=1,  # Number of iterations for the enrollment phase
-      responsibility_threshold = 0, # If set, the weight of a particular Gaussian will at least be greater than this threshold. In the case the real weight is lower, the prior mean value will be used to estimate the current mean and variance.
+            responsibility_threshold=0,
+            # If set, the weight of a particular Gaussian will at least be greater than this threshold. In the case the real weight is lower, the prior mean value will be used to estimate the current mean and variance.
            INIT_SEED=5489,
            # scoring
            scoring_function=bob.learn.em.linear_scoring
@@ -81,19 +130,17 @@ class GMM (Algorithm):
        self.scoring_function = scoring_function
        self.ubm = None
-    self.kmeans_trainer = bob.learn.em.KMeansTrainer()
+        self.kmeans_trainer = bob.learn.em.KMeansTrainer(kmeans_initialization_method)
-    self.ubm_trainer = bob.learn.em.ML_GMMTrainer(self.update_means, self.update_variances, self.update_weights, self.responsibility_threshold)
+        self.ubm_trainer = bob.learn.em.ML_GMMTrainer(self.update_means, self.update_variances, self.update_weights,
+                                                      self.responsibility_threshold)
    def _check_feature(self, feature):
        """Checks that the features are appropriate"""
        if not isinstance(feature, numpy.ndarray) or feature.ndim != 2 or feature.dtype != numpy.float64:
            raise ValueError("The given feature is not appropriate")
        if self.ubm is not None and feature.shape[1] != self.ubm.shape[1]:
-      raise ValueError("The given feature is expected to have %d elements, but it has %d" % (self.ubm.shape[1], feature.shape[1]))
+            raise ValueError("The given feature is expected to have %d elements, but it has %d" % (
+            self.ubm.shape[1], feature.shape[1]))
    #######################################################
    ################ UBM training #########################
@@ -112,7 +159,8 @@ class GMM (Algorithm):
        # Trains using the KMeansTrainer
        logger.info("  -> Training K-Means")
-    bob.learn.em.train(self.kmeans_trainer, kmeans, array, self.kmeans_training_iterations, self.training_threshold, self.rng)
+        bob.learn.em.train(self.kmeans_trainer, kmeans, array, self.kmeans_training_iterations, self.training_threshold,
+                           self.rng)
        variances, weights = kmeans.get_variances_and_weights_for_each_cluster(array)
        means = kmeans.means
@@ -125,17 +173,17 @@ class GMM (Algorithm):
        # Trains the GMM
        logger.info("  -> Training GMM")
-    bob.learn.em.train(self.ubm_trainer, self.ubm, array, self.gmm_training_iterations, self.training_threshold, self.rng)
+        bob.learn.em.train(self.ubm_trainer, self.ubm, array, self.gmm_training_iterations, self.training_threshold,
+                           self.rng)
    def save_ubm(self, projector_file):
        """Save projector to file"""
        # Saves the UBM to file
        logger.debug(" .... Saving model to file '%s'", projector_file)
-    hdf5 = projector_file if isinstance(projector_file, bob.io.base.HDF5File) else bob.io.base.HDF5File(projector_file, 'w')
+        hdf5 = projector_file if isinstance(projector_file, bob.io.base.HDF5File) else bob.io.base.HDF5File(
+            projector_file, 'w')
        self.ubm.save(hdf5)
    def train_projector(self, train_features, projector_file):
        """Computes the Universal Background Model from the training ("world") data"""
        [self._check_feature(feature) for feature in train_features]
@@ -149,7 +197,6 @@ class GMM (Algorithm):
        self.save_ubm(projector_file)
    #######################################################
    ############## GMM training using UBM #################
@@ -159,17 +206,17 @@ class GMM (Algorithm):
        self.ubm = bob.learn.em.GMMMachine(hdf5file)
        self.ubm.set_variance_thresholds(self.variance_threshold)
    def load_projector(self, projector_file):
        """Reads the UBM model from file"""
        # read UBM
        self.load_ubm(projector_file)
        # prepare MAP_GMM_Trainer
-    kwargs = dict(mean_var_update_responsibilities_threshold=self.responsibility_threshold) if self.responsibility_threshold > 0. else dict()
+        kwargs = dict(
-    self.enroll_trainer = bob.learn.em.MAP_GMMTrainer(self.ubm, relevance_factor = self.relevance_factor, update_means = True, update_variances = False, **kwargs)
+            mean_var_update_responsibilities_threshold=self.responsibility_threshold) if self.responsibility_threshold > 0. else dict()
+        self.enroll_trainer = bob.learn.em.MAP_GMMTrainer(self.ubm, relevance_factor=self.relevance_factor,
+                                                          update_means=True, update_variances=False, **kwargs)
        self.rng = bob.core.random.mt19937(self.init_seed)
    def project_ubm(self, array):
        logger.debug(" .... Projecting %d feature vectors" % array.shape[0])
        # Accumulates statistics
@@ -179,13 +226,11 @@ class GMM (Algorithm):
        # return the resulting statistics
        return gmm_stats
    def project(self, feature):
        """Computes GMM statistics against a UBM, given an input 2D numpy.ndarray of feature vectors"""
        self._check_feature(feature)
        return self.project_ubm(feature)
    def read_gmm_stats(self, gmm_stats_file):
        """Reads GMM stats from file."""
        return bob.learn.em.GMMStats(bob.io.base.HDF5File(gmm_stats_file))
@@ -199,7 +244,8 @@ class GMM (Algorithm):
        gmm = bob.learn.em.GMMMachine(self.ubm)
        gmm.set_variance_thresholds(self.variance_threshold)
-    bob.learn.em.train(self.enroll_trainer, gmm, array, self.gmm_enroll_iterations, self.training_threshold, self.rng)
+        bob.learn.em.train(self.enroll_trainer, gmm, array, self.gmm_enroll_iterations, self.training_threshold,
+                           self.rng)
        return gmm
    def enroll(self, feature_arrays):
@@ -209,7 +255,6 @@ class GMM (Algorithm):
        # Use the array to train a GMM and return it
        return self.enroll_gmm(array)
    ######################################################
    ################ Feature comparison ##################
    def read_model(self, model_file):
@@ -230,10 +275,8 @@ class GMM (Algorithm):
        for probe in probes:
            assert isinstance(probe, bob.learn.em.GMMStats)
        #    logger.warn("Please verify that this function is correct")
-    return self.probe_fusion_function(self.scoring_function([model], self.ubm, probes, [], frame_length_normalisation = True))
+        return self.probe_fusion_function(
+            self.scoring_function([model], self.ubm, probes, [], frame_length_normalisation=True))
 class GMMRegular(GMM):
@@ -247,7 +290,6 @@ class GMMRegular (GMM):
        # register a different set of functions in the Tool base class
        Algorithm.__init__(self, requires_enroller_training=True, performs_projection=False)
    #######################################################
    ################ UBM training #########################
@@ -256,7 +298,6 @@ class GMMRegular (GMM):
        train_features = [feature for client in train_features for feature in client]
        return self.train_projector(train_features, enroller_file)
    #######################################################
    ############## GMM training using UBM #################
@@ -264,14 +305,12 @@ class GMMRegular (GMM):
        """Reads the UBM model from file"""
        return self.load_projector(enroller_file)
    ######################################################
    ################ Feature comparison ##################
    def read_probe(self, probe_file):
        """Reads a feature from file, which is supposed to be a simple 2D array"""
        return bob.bio.base.load(probe_file)
    def score(self, model, probe):
        """Computes the score for the given model and the given probe.
        The score are Log-Likelihood.
@@ -279,7 +318,8 @@ class GMMRegular (GMM):
        assert isinstance(model, bob.learn.em.GMMMachine)
        self._check_feature(probe)
-    score = sum(model.log_likelihood(probe[i,:]) - self.ubm.log_likelihood(probe[i,:]) for i in range(probe.shape[0]))
+        score = sum(
+            model.log_likelihood(probe[i, :]) - self.ubm.log_likelihood(probe[i, :]) for i in range(probe.shape[0]))
        return score / probe.shape[0]
    def score_for_multiple_probes(self, model, probes):