Skip to content
Snippets Groups Projects
Commit d779abda authored by Yannick DAYER's avatar Yannick DAYER
Browse files

Removed unused method left from old API.

parent a0ec1e4f
No related branches found
No related tags found
1 merge request!26Python implementation of GMM
...@@ -14,7 +14,8 @@ import logging ...@@ -14,7 +14,8 @@ import logging
from typing import Callable from typing import Callable
import numpy import dask.array as da
import numpy as np
from sklearn.base import BaseEstimator from sklearn.base import BaseEstimator
...@@ -28,6 +29,8 @@ from bob.learn.em.mixture import linear_scoring ...@@ -28,6 +29,8 @@ from bob.learn.em.mixture import linear_scoring
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
# from bob.pipelines import ToDaskBag # Used when switching from samples to da.Array
class GMM(BioAlgorithm, BaseEstimator): class GMM(BioAlgorithm, BaseEstimator):
"""Algorithm for computing UBM and Gaussian Mixture Models of the features. """Algorithm for computing UBM and Gaussian Mixture Models of the features.
...@@ -111,109 +114,50 @@ class GMM(BioAlgorithm, BaseEstimator): ...@@ -111,109 +114,50 @@ class GMM(BioAlgorithm, BaseEstimator):
self.relevance_factor = relevance_factor self.relevance_factor = relevance_factor
self.gmm_enroll_iterations = gmm_enroll_iterations self.gmm_enroll_iterations = gmm_enroll_iterations
self.init_seed = init_seed self.init_seed = init_seed
self.rng = bob.core.random.mt19937(self.init_seed) # TODO self.rng = self.init_seed # TODO verify if rng object needed
self.responsibility_threshold = responsibility_threshold self.responsibility_threshold = responsibility_threshold
self.scoring_function = scoring_function self.scoring_function = scoring_function
self.ubm = None self.ubm = None
super().__init__()
def _check_feature(self, feature): def _check_feature(self, feature):
"""Checks that the features are appropriate""" """Checks that the features are appropriate"""
if ( if (
not isinstance(feature, numpy.ndarray) not isinstance(feature, np.ndarray)
or feature.ndim != 2 or feature.ndim != 2
or feature.dtype != numpy.float64 or feature.dtype != np.float64
): ):
raise ValueError("The given feature is not appropriate") raise ValueError(f"The given feature is not appropriate: \n{feature}")
if self.ubm is not None and feature.shape[1] != self.ubm.shape[1]: if self.ubm is not None and feature.shape[1] != self.ubm.shape[1]:
raise ValueError( raise ValueError(
"The given feature is expected to have %d elements, but it has %d" "The given feature is expected to have %d elements, but it has %d"
% (self.ubm.shape[1], feature.shape[1]) % (self.ubm.shape[1], feature.shape[1])
) )
####################################################### def save_ubm(self, ubm_file):
# UBM training #
def train_ubm(self, array):
logger.debug(" .... Training UBM with %d feature vectors", array.shape[0])
logger.debug(" .... Creating UBM machine")
self.ubm = GMMMachine(
n_gaussians=self.number_of_gaussians,
trainer="ml",
max_fitting_steps=self.ubm_training_iterations,
convergence_threshold=self.training_threshold,
update_means=self.update_means,
update_variances=self.update_variances,
update_weights=self.update_weights,
# TODO more params?
)
# Trains the GMM
logger.info(" -> Training UBM GMM")
# Resetting the pseudo random number generator so we can have the same initialization for serial and parallel execution.
# self.rng = bob.core.random.mt19937(self.init_seed)
self.ubm.fit(array)
def save_ubm(self, projector_file):
"""Saves the projector to file""" """Saves the projector to file"""
# Saves the UBM to file # Saves the UBM to file
logger.debug(" .... Saving model to file '%s'", projector_file) logger.debug("Saving model to file '%s'", ubm_file)
hdf5 = ( hdf5 = (
projector_file ubm_file
if isinstance(projector_file, bob.io.base.HDF5File) if isinstance(ubm_file, bob.io.base.HDF5File)
else bob.io.base.HDF5File(projector_file, "w") else bob.io.base.HDF5File(ubm_file, "w")
) )
self.ubm.save(hdf5) self.ubm.save(hdf5)
def train_projector(self, train_features, projector_file):
"""Computes the Universal Background Model from the training ("world") data"""
[self._check_feature(feature) for feature in train_features]
logger.info(
" -> Training UBM model with %d training files", len(train_features)
)
# Loads the data into an array
array = numpy.vstack(train_features)
self.train_ubm(array)
self.save_ubm(projector_file)
#######################################################
# GMM training using UBM #
def load_ubm(self, ubm_file): def load_ubm(self, ubm_file):
hdf5file = bob.io.base.HDF5File(ubm_file) hdf5file = bob.io.base.HDF5File(ubm_file)
logger.debug("Loading model from file '%s'", ubm_file)
# read UBM # read UBM
self.ubm = GMMMachine.from_hdf5(hdf5file) self.ubm = GMMMachine.from_hdf5(hdf5file)
self.ubm.variance_thresholds = self.variance_threshold self.ubm.variance_thresholds = self.variance_threshold
def load_projector(self, projector_file): def project(self, array):
"""Reads the UBM model from file""" """Computes GMM statistics against a UBM, given a 2D array of feature vectors"""
# read UBM self._check_feature(array)
self.load_ubm(projector_file)
# prepare MAP_GMM_Trainer
# kwargs = (
# dict(
# mean_var_update_responsibilities_threshold=self.responsibility_threshold
# )
# if self.responsibility_threshold > 0.0
# else dict()
# )
# self.enroll_trainer = bob.learn.em.MAP_GMMTrainer(
# self.ubm,
# relevance_factor=self.relevance_factor,
# update_means=True,
# update_variances=False,
# **kwargs
# )
self.rng = bob.core.random.mt19937(self.init_seed)
def project_ubm(self, array):
logger.debug(" .... Projecting %d feature vectors", array.shape[0]) logger.debug(" .... Projecting %d feature vectors", array.shape[0])
# Accumulates statistics # Accumulates statistics
gmm_stats = GMMStats(self.ubm.shape[0], self.ubm.shape[1]) gmm_stats = GMMStats(self.ubm.shape[0], self.ubm.shape[1])
...@@ -222,25 +166,21 @@ class GMM(BioAlgorithm, BaseEstimator): ...@@ -222,25 +166,21 @@ class GMM(BioAlgorithm, BaseEstimator):
# return the resulting statistics # return the resulting statistics
return gmm_stats return gmm_stats
def project(self, feature):
"""Computes GMM statistics against a UBM, given an input 2D numpy.ndarray of feature vectors"""
self._check_feature(feature)
return self.project_ubm(feature)
def read_gmm_stats(self, gmm_stats_file):
"""Reads GMM stats from file."""
return GMMStats.from_hdf5(bob.io.base.HDF5File(gmm_stats_file))
def read_feature(self, feature_file): def read_feature(self, feature_file):
"""Read the type of features that we require, namely GMM_Stats""" """Read the type of features that we require, namely GMM_Stats"""
return self.read_gmm_stats(feature_file) return GMMStats.from_hdf5(bob.io.base.HDF5File(feature_file))
def write_feature(self, feature, feature_file): def write_feature(self, feature, feature_file):
"""Write the features (GMM_Stats)""" """Write the features (GMM_Stats)"""
return feature.save(feature_file) return feature.save(feature_file)
def enroll_gmm(self, array): def enroll(self, data):
"""Enrolls a GMM using MAP adaptation, given a list of 2D np.ndarray's of feature vectors"""
[self._check_feature(feature) for feature in data]
array = np.vstack(data)
# Use the array to train a GMM and return it
logger.debug(" .... Enrolling with %d feature vectors", array.shape[0]) logger.debug(" .... Enrolling with %d feature vectors", array.shape[0])
# TODO responsibility_threshold # TODO responsibility_threshold
gmm = GMMMachine( gmm = GMMMachine(
n_gaussians=self.number_of_gaussians, n_gaussians=self.number_of_gaussians,
...@@ -248,7 +188,7 @@ class GMM(BioAlgorithm, BaseEstimator): ...@@ -248,7 +188,7 @@ class GMM(BioAlgorithm, BaseEstimator):
ubm=self.ubm, ubm=self.ubm,
convergence_threshold=self.training_threshold, convergence_threshold=self.training_threshold,
max_fitting_steps=self.gmm_enroll_iterations, max_fitting_steps=self.gmm_enroll_iterations,
random_state=self.rng, # TODO random_state=self.rng,
update_means=True, update_means=True,
update_variances=True, # TODO default? update_variances=True, # TODO default?
update_weights=True, # TODO default? update_weights=True, # TODO default?
...@@ -257,15 +197,6 @@ class GMM(BioAlgorithm, BaseEstimator): ...@@ -257,15 +197,6 @@ class GMM(BioAlgorithm, BaseEstimator):
gmm = gmm.fit(array) gmm = gmm.fit(array)
return gmm return gmm
def enroll(self, data):
"""Enrolls a GMM using MAP adaptation, given a list of 2D numpy.ndarray's of feature vectors"""
[self._check_feature(feature) for feature in data]
array = numpy.vstack(data)
# Use the array to train a GMM and return it
return self.enroll_gmm(array)
######################################################
# Feature comparison #
def read_model(self, model_file): def read_model(self, model_file):
"""Reads the model, which is a GMM machine""" """Reads the model, which is a GMM machine"""
return GMMMachine.from_hdf5(bob.io.base.HDF5File(model_file), ubm=self.ubm) return GMMMachine.from_hdf5(bob.io.base.HDF5File(model_file), ubm=self.ubm)
...@@ -287,12 +218,13 @@ class GMM(BioAlgorithm, BaseEstimator): ...@@ -287,12 +218,13 @@ class GMM(BioAlgorithm, BaseEstimator):
The probe data to compare to the model. The probe data to compare to the model.
""" """
# import ipdb; ipdb.set_trace()
assert isinstance(biometric_reference, GMMMachine) assert isinstance(biometric_reference, GMMMachine)
assert isinstance(data, GMMStats) stats = self.project(data)
return self.scoring_function( return self.scoring_function(
models_means=[biometric_reference], models_means=[biometric_reference],
ubm=self.ubm, ubm=self.ubm,
test_stats=data, test_stats=stats,
frame_length_normalization=True, frame_length_normalization=True,
)[0, 0] )[0, 0]
...@@ -311,12 +243,14 @@ class GMM(BioAlgorithm, BaseEstimator): ...@@ -311,12 +243,14 @@ class GMM(BioAlgorithm, BaseEstimator):
The probe data to compare to the models. The probe data to compare to the models.
""" """
assert isinstance(biometric_references, GMMMachine) assert isinstance(biometric_references[0], GMMMachine), type(
assert isinstance(data, GMMStats) biometric_references[0]
)
stats = self.project(data)
return self.scoring_function( return self.scoring_function(
models_means=biometric_references, models_means=biometric_references,
ubm=self.ubm, ubm=self.ubm,
test_stats=data, test_stats=stats,
frame_length_normalization=True, frame_length_normalization=True,
) )
...@@ -326,65 +260,86 @@ class GMM(BioAlgorithm, BaseEstimator): ...@@ -326,65 +260,86 @@ class GMM(BioAlgorithm, BaseEstimator):
for probe in probes: for probe in probes:
assert isinstance(probe, GMMStats) assert isinstance(probe, GMMStats)
# logger.warn("Please verify that this function is correct") # logger.warn("Please verify that this function is correct")
return self.scoring_function( return (
models_means=model.means, self.scoring_function(
ubm=self.ubm, models_means=model.means,
test_stats=probes, ubm=self.ubm,
frame_length_normalization=True, test_stats=probes,
).mean() frame_length_normalization=True,
)
.mean()
.reshape((-1,))
)
def fit(self, X, y=None, **kwargs): def fit(self, X, y=None, **kwargs):
"""Trains the UBM.""" """Trains the UBM."""
self.train_ubm(X) # TODO: Delayed to dask array
return self
def transform(self, X, **kwargs): # def delayed_to_xr_dataset(delayed, meta=None):
"""Passthrough. Enroll applies a different transform as score.""" # """Converts one dask.delayed object to a dask.array"""
return X # if meta is None:
# meta = np.array(delayed.data.compute())
# print(meta.shape)
# darray = da.from_delayed(delayed.data, meta.shape, dtype=meta.dtype, name=False)
# return darray, meta
class GMMRegular(GMM): # def delayed_samples_to_dask_arrays(delayed_samples, meta=None):
"""Algorithm for computing Universal Background Models and Gaussian Mixture Models of the features""" # output = []
# for ds in delayed_samples:
# d_array, meta = delayed_to_xr_dataset(ds, meta)
# output.append(d_array)
# return output, meta
def __init__(self, **kwargs): # def delayeds_to_xr_dataset(delayeds, meta=None):
"""Initializes the local UBM-GMM tool chain with the given file selector object""" # """Converts a set of dask.delayed to a list of dask.array"""
# logger.warn("This class must be checked. Please verify that I didn't do any mistake here. I had to rename 'train_projector' into a 'train_enroller'!") # output = []
# initialize the UBMGMM base class # for d in delayeds:
GMM.__init__(self, **kwargs) # d_array, meta = delayed_samples_to_dask_arrays(d, meta)
# register a different set of functions in the Tool base class # output.extend(d_array)
BioAlgorithm.__init__( # return output
self, requires_enroller_training=True, performs_projection=False
)
####################################################### # import ipdb; ipdb.set_trace()
# UBM training #
def train_enroller(self, train_features, enroller_file): # bags = ToDaskBag(npartitions=10).transform(X)
"""Computes the Universal Background Model from the training ("world") data"""
train_features = [feature for client in train_features for feature in client]
return self.train_projector(train_features, enroller_file)
####################################################### # delayeds = bags.to_delayed()
# GMM training using UBM # # lengths = bags.map_partitions(lambda samples: [len(samples)]).compute()
# for l, d in zip(lengths, delayeds):
# d._length = l
# array_data = da.from_delayed(delayeds, shape=(2,-1,60))
# array_data = da.stack(delayeds_to_xr_dataset(delayeds))
def load_enroller(self, enroller_file): # Stack all the samples in a 2D array of features
"""Reads the UBM model from file""" array = da.vstack(X)
return self.load_projector(enroller_file)
###################################################### logger.debug("UBM with %d feature vectors", array.shape[0])
# Feature comparison #
def score(self, model, probe):
"""Computes the score for the given model and the given probe.
The score are Log-Likelihood.
Therefore, the log of the likelihood ratio is obtained by computing the following difference."""
assert isinstance(model, GMMMachine) logger.debug(f"Creating UBM machine with {self.number_of_gaussians} gaussians")
self._check_feature(probe)
score = sum( self.ubm = GMMMachine(
model.log_likelihood(probe[i, :]) - self.ubm.log_likelihood(probe[i, :]) n_gaussians=self.number_of_gaussians,
for i in range(probe.shape[0]) trainer="ml",
max_fitting_steps=self.ubm_training_iterations,
convergence_threshold=self.training_threshold,
update_means=self.update_means,
update_variances=self.update_variances,
update_weights=self.update_weights,
# TODO more params?
) )
return score / probe.shape[0]
def score_for_multiple_probes(self, model, probes): # Trains the GMM
raise NotImplementedError("Implement Me!") logger.info("Training UBM GMM")
# Resetting the pseudo random number generator so we can have the same initialization for serial and parallel execution.
# self.rng = bob.core.random.mt19937(self.init_seed)
self.ubm.fit(array)
return self
def transform(self, X, **kwargs):
"""Passthrough. Enroll applies a different transform as score."""
# The idea would be to apply the projection in Transform (going from extracted
# to GMMStats), but we must not apply this during the training (fit requires
# extracted data directly).
# `project` is applied in the score function directly.
return X
from .GMM import GMM from .GMM import GMM
from .GMM import GMMRegular
# gets sphinx autodoc done right - don't remove it # gets sphinx autodoc done right - don't remove it
...@@ -20,6 +19,5 @@ def __appropriate__(*args): ...@@ -20,6 +19,5 @@ def __appropriate__(*args):
__appropriate__( __appropriate__(
GMM, GMM,
GMMRegular,
) )
__all__ = [_ for _ in dir() if not _.startswith("_")] __all__ = [_ for _ in dir() if not _.startswith("_")]
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment