Skip to content
Snippets Groups Projects
Commit 58b46fb0 authored by Tiago Pereira's avatar Tiago Pereira
Browse files

Added the keyword argument

parent 616f7889
Branches
No related tags found
No related merge requests found
...@@ -12,11 +12,58 @@ import numpy ...@@ -12,11 +12,58 @@ import numpy
from bob.bio.base.algorithm import Algorithm from bob.bio.base.algorithm import Algorithm
import logging import logging
logger = logging.getLogger("bob.bio.gmm") logger = logging.getLogger("bob.bio.gmm")
class GMM(Algorithm): class GMM(Algorithm):
"""Algorithm for computing Universal Background Models and Gaussian Mixture Models of the features. """
Features must be normalized to zero mean and unit standard deviation.""" Trains an UBM/GMM system in the same as in [Reynolds2000]_:
**Parameters**:
number_of_gaussians:
Number of gaussians in the model
kmeans_training_iterations:
Maximum number of iterations for the KMeans
kmeans_initialization_method:
Initialization method for the kmeans. Possible values are: 'RANDOM', 'RANDOM_NO_DUPLICATE', 'KMEANS_PLUS_PLUS'.
gmm_training_iterations:
Maximum number of iterations for the ML Estimator in the GMM Training.
training_threshold:
Convergence threshold for the ML Estimator.
variance_threshold:
Variance flooring
update_weights:
If **True** update the weights during the GMM Training
update_variances:
If **True** update the variances during the GMM Training
update_means:
If **True** update the means during the GMM Training
relevance_factor:
The relevance factor for the GMM MAP estimation
gmm_enroll_iterations:
Maximum number of iterations for the MAP Estimation.
responsibility_threshold:
Threshold for the responsibilities
INIT_SEED:
Seed for the pseudo random number generator
scoring_function:
Set the GMM scoring function
"""
def __init__( def __init__(
self, self,
...@@ -24,6 +71,7 @@ class GMM (Algorithm): ...@@ -24,6 +71,7 @@ class GMM (Algorithm):
number_of_gaussians, number_of_gaussians,
# parameters of UBM training # parameters of UBM training
kmeans_training_iterations=25, # Maximum number of iterations for K-Means kmeans_training_iterations=25, # Maximum number of iterations for K-Means
kmeans_initialization_method="RANDOM_NO_DUPLICATE",
gmm_training_iterations=25, # Maximum number of iterations for ML GMM Training gmm_training_iterations=25, # Maximum number of iterations for ML GMM Training
training_threshold=5e-4, # Threshold to end the ML training training_threshold=5e-4, # Threshold to end the ML training
variance_threshold=5e-4, # Minimum value that a variance can reach variance_threshold=5e-4, # Minimum value that a variance can reach
...@@ -33,7 +81,8 @@ class GMM (Algorithm): ...@@ -33,7 +81,8 @@ class GMM (Algorithm):
# parameters of the GMM enrollment # parameters of the GMM enrollment
relevance_factor=4, # Relevance factor as described in Reynolds paper relevance_factor=4, # Relevance factor as described in Reynolds paper
gmm_enroll_iterations=1, # Number of iterations for the enrollment phase gmm_enroll_iterations=1, # Number of iterations for the enrollment phase
responsibility_threshold = 0, # If set, the weight of a particular Gaussian will at least be greater than this threshold. In the case the real weight is lower, the prior mean value will be used to estimate the current mean and variance. responsibility_threshold=0,
# If set, the weight of a particular Gaussian will at least be greater than this threshold. In the case the real weight is lower, the prior mean value will be used to estimate the current mean and variance.
INIT_SEED=5489, INIT_SEED=5489,
# scoring # scoring
scoring_function=bob.learn.em.linear_scoring scoring_function=bob.learn.em.linear_scoring
...@@ -81,19 +130,17 @@ class GMM (Algorithm): ...@@ -81,19 +130,17 @@ class GMM (Algorithm):
self.scoring_function = scoring_function self.scoring_function = scoring_function
self.ubm = None self.ubm = None
self.kmeans_trainer = bob.learn.em.KMeansTrainer() self.kmeans_trainer = bob.learn.em.KMeansTrainer(kmeans_initialization_method)
self.ubm_trainer = bob.learn.em.ML_GMMTrainer(self.update_means, self.update_variances, self.update_weights, self.responsibility_threshold) self.ubm_trainer = bob.learn.em.ML_GMMTrainer(self.update_means, self.update_variances, self.update_weights,
self.responsibility_threshold)
def _check_feature(self, feature): def _check_feature(self, feature):
"""Checks that the features are appropriate""" """Checks that the features are appropriate"""
if not isinstance(feature, numpy.ndarray) or feature.ndim != 2 or feature.dtype != numpy.float64: if not isinstance(feature, numpy.ndarray) or feature.ndim != 2 or feature.dtype != numpy.float64:
raise ValueError("The given feature is not appropriate") raise ValueError("The given feature is not appropriate")
if self.ubm is not None and feature.shape[1] != self.ubm.shape[1]: if self.ubm is not None and feature.shape[1] != self.ubm.shape[1]:
raise ValueError("The given feature is expected to have %d elements, but it has %d" % (self.ubm.shape[1], feature.shape[1])) raise ValueError("The given feature is expected to have %d elements, but it has %d" % (
self.ubm.shape[1], feature.shape[1]))
####################################################### #######################################################
################ UBM training ######################### ################ UBM training #########################
...@@ -112,7 +159,8 @@ class GMM (Algorithm): ...@@ -112,7 +159,8 @@ class GMM (Algorithm):
# Trains using the KMeansTrainer # Trains using the KMeansTrainer
logger.info(" -> Training K-Means") logger.info(" -> Training K-Means")
bob.learn.em.train(self.kmeans_trainer, kmeans, array, self.kmeans_training_iterations, self.training_threshold, self.rng) bob.learn.em.train(self.kmeans_trainer, kmeans, array, self.kmeans_training_iterations, self.training_threshold,
self.rng)
variances, weights = kmeans.get_variances_and_weights_for_each_cluster(array) variances, weights = kmeans.get_variances_and_weights_for_each_cluster(array)
means = kmeans.means means = kmeans.means
...@@ -125,17 +173,17 @@ class GMM (Algorithm): ...@@ -125,17 +173,17 @@ class GMM (Algorithm):
# Trains the GMM # Trains the GMM
logger.info(" -> Training GMM") logger.info(" -> Training GMM")
bob.learn.em.train(self.ubm_trainer, self.ubm, array, self.gmm_training_iterations, self.training_threshold, self.rng) bob.learn.em.train(self.ubm_trainer, self.ubm, array, self.gmm_training_iterations, self.training_threshold,
self.rng)
def save_ubm(self, projector_file): def save_ubm(self, projector_file):
"""Save projector to file""" """Save projector to file"""
# Saves the UBM to file # Saves the UBM to file
logger.debug(" .... Saving model to file '%s'", projector_file) logger.debug(" .... Saving model to file '%s'", projector_file)
hdf5 = projector_file if isinstance(projector_file, bob.io.base.HDF5File) else bob.io.base.HDF5File(projector_file, 'w') hdf5 = projector_file if isinstance(projector_file, bob.io.base.HDF5File) else bob.io.base.HDF5File(
projector_file, 'w')
self.ubm.save(hdf5) self.ubm.save(hdf5)
def train_projector(self, train_features, projector_file): def train_projector(self, train_features, projector_file):
"""Computes the Universal Background Model from the training ("world") data""" """Computes the Universal Background Model from the training ("world") data"""
[self._check_feature(feature) for feature in train_features] [self._check_feature(feature) for feature in train_features]
...@@ -149,7 +197,6 @@ class GMM (Algorithm): ...@@ -149,7 +197,6 @@ class GMM (Algorithm):
self.save_ubm(projector_file) self.save_ubm(projector_file)
####################################################### #######################################################
############## GMM training using UBM ################# ############## GMM training using UBM #################
...@@ -159,17 +206,17 @@ class GMM (Algorithm): ...@@ -159,17 +206,17 @@ class GMM (Algorithm):
self.ubm = bob.learn.em.GMMMachine(hdf5file) self.ubm = bob.learn.em.GMMMachine(hdf5file)
self.ubm.set_variance_thresholds(self.variance_threshold) self.ubm.set_variance_thresholds(self.variance_threshold)
def load_projector(self, projector_file): def load_projector(self, projector_file):
"""Reads the UBM model from file""" """Reads the UBM model from file"""
# read UBM # read UBM
self.load_ubm(projector_file) self.load_ubm(projector_file)
# prepare MAP_GMM_Trainer # prepare MAP_GMM_Trainer
kwargs = dict(mean_var_update_responsibilities_threshold=self.responsibility_threshold) if self.responsibility_threshold > 0. else dict() kwargs = dict(
self.enroll_trainer = bob.learn.em.MAP_GMMTrainer(self.ubm, relevance_factor = self.relevance_factor, update_means = True, update_variances = False, **kwargs) mean_var_update_responsibilities_threshold=self.responsibility_threshold) if self.responsibility_threshold > 0. else dict()
self.enroll_trainer = bob.learn.em.MAP_GMMTrainer(self.ubm, relevance_factor=self.relevance_factor,
update_means=True, update_variances=False, **kwargs)
self.rng = bob.core.random.mt19937(self.init_seed) self.rng = bob.core.random.mt19937(self.init_seed)
def project_ubm(self, array): def project_ubm(self, array):
logger.debug(" .... Projecting %d feature vectors" % array.shape[0]) logger.debug(" .... Projecting %d feature vectors" % array.shape[0])
# Accumulates statistics # Accumulates statistics
...@@ -179,13 +226,11 @@ class GMM (Algorithm): ...@@ -179,13 +226,11 @@ class GMM (Algorithm):
# return the resulting statistics # return the resulting statistics
return gmm_stats return gmm_stats
def project(self, feature): def project(self, feature):
"""Computes GMM statistics against a UBM, given an input 2D numpy.ndarray of feature vectors""" """Computes GMM statistics against a UBM, given an input 2D numpy.ndarray of feature vectors"""
self._check_feature(feature) self._check_feature(feature)
return self.project_ubm(feature) return self.project_ubm(feature)
def read_gmm_stats(self, gmm_stats_file): def read_gmm_stats(self, gmm_stats_file):
"""Reads GMM stats from file.""" """Reads GMM stats from file."""
return bob.learn.em.GMMStats(bob.io.base.HDF5File(gmm_stats_file)) return bob.learn.em.GMMStats(bob.io.base.HDF5File(gmm_stats_file))
...@@ -199,7 +244,8 @@ class GMM (Algorithm): ...@@ -199,7 +244,8 @@ class GMM (Algorithm):
gmm = bob.learn.em.GMMMachine(self.ubm) gmm = bob.learn.em.GMMMachine(self.ubm)
gmm.set_variance_thresholds(self.variance_threshold) gmm.set_variance_thresholds(self.variance_threshold)
bob.learn.em.train(self.enroll_trainer, gmm, array, self.gmm_enroll_iterations, self.training_threshold, self.rng) bob.learn.em.train(self.enroll_trainer, gmm, array, self.gmm_enroll_iterations, self.training_threshold,
self.rng)
return gmm return gmm
def enroll(self, feature_arrays): def enroll(self, feature_arrays):
...@@ -209,7 +255,6 @@ class GMM (Algorithm): ...@@ -209,7 +255,6 @@ class GMM (Algorithm):
# Use the array to train a GMM and return it # Use the array to train a GMM and return it
return self.enroll_gmm(array) return self.enroll_gmm(array)
###################################################### ######################################################
################ Feature comparison ################## ################ Feature comparison ##################
def read_model(self, model_file): def read_model(self, model_file):
...@@ -230,10 +275,8 @@ class GMM (Algorithm): ...@@ -230,10 +275,8 @@ class GMM (Algorithm):
for probe in probes: for probe in probes:
assert isinstance(probe, bob.learn.em.GMMStats) assert isinstance(probe, bob.learn.em.GMMStats)
# logger.warn("Please verify that this function is correct") # logger.warn("Please verify that this function is correct")
return self.probe_fusion_function(self.scoring_function([model], self.ubm, probes, [], frame_length_normalisation = True)) return self.probe_fusion_function(
self.scoring_function([model], self.ubm, probes, [], frame_length_normalisation=True))
class GMMRegular(GMM): class GMMRegular(GMM):
...@@ -247,7 +290,6 @@ class GMMRegular (GMM): ...@@ -247,7 +290,6 @@ class GMMRegular (GMM):
# register a different set of functions in the Tool base class # register a different set of functions in the Tool base class
Algorithm.__init__(self, requires_enroller_training=True, performs_projection=False) Algorithm.__init__(self, requires_enroller_training=True, performs_projection=False)
####################################################### #######################################################
################ UBM training ######################### ################ UBM training #########################
...@@ -256,7 +298,6 @@ class GMMRegular (GMM): ...@@ -256,7 +298,6 @@ class GMMRegular (GMM):
train_features = [feature for client in train_features for feature in client] train_features = [feature for client in train_features for feature in client]
return self.train_projector(train_features, enroller_file) return self.train_projector(train_features, enroller_file)
####################################################### #######################################################
############## GMM training using UBM ################# ############## GMM training using UBM #################
...@@ -264,14 +305,12 @@ class GMMRegular (GMM): ...@@ -264,14 +305,12 @@ class GMMRegular (GMM):
"""Reads the UBM model from file""" """Reads the UBM model from file"""
return self.load_projector(enroller_file) return self.load_projector(enroller_file)
###################################################### ######################################################
################ Feature comparison ################## ################ Feature comparison ##################
def read_probe(self, probe_file): def read_probe(self, probe_file):
"""Reads a feature from file, which is supposed to be a simple 2D array""" """Reads a feature from file, which is supposed to be a simple 2D array"""
return bob.bio.base.load(probe_file) return bob.bio.base.load(probe_file)
def score(self, model, probe): def score(self, model, probe):
"""Computes the score for the given model and the given probe. """Computes the score for the given model and the given probe.
The score are Log-Likelihood. The score are Log-Likelihood.
...@@ -279,7 +318,8 @@ class GMMRegular (GMM): ...@@ -279,7 +318,8 @@ class GMMRegular (GMM):
assert isinstance(model, bob.learn.em.GMMMachine) assert isinstance(model, bob.learn.em.GMMMachine)
self._check_feature(probe) self._check_feature(probe)
score = sum(model.log_likelihood(probe[i,:]) - self.ubm.log_likelihood(probe[i,:]) for i in range(probe.shape[0])) score = sum(
model.log_likelihood(probe[i, :]) - self.ubm.log_likelihood(probe[i, :]) for i in range(probe.shape[0]))
return score / probe.shape[0] return score / probe.shape[0]
def score_for_multiple_probes(self, model, probes): def score_for_multiple_probes(self, model, probes):
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment