diff --git a/bob/bio/gmm/algorithm/GMM.py b/bob/bio/gmm/algorithm/GMM.py index d412abbcf5ff62ecc266e5224a296296462a5fab..1f664ba71a6fe5490908e160f94e6c286a63f9c5 100644 --- a/bob/bio/gmm/algorithm/GMM.py +++ b/bob/bio/gmm/algorithm/GMM.py @@ -132,7 +132,8 @@ class GMM (Algorithm): """Save projector to file""" # Saves the UBM to file logger.debug(" .... Saving model to file '%s'", projector_file) - self.ubm.save(bob.io.base.HDF5File(projector_file, "w")) + hdf5 = projector_file if isinstance(projector_file, bob.io.base.HDF5File) else bob.io.base.HDF5File(projector_file, 'w') + self.ubm.save(hdf5) def train_projector(self, train_features, projector_file): diff --git a/bob/bio/gmm/algorithm/IVector.py b/bob/bio/gmm/algorithm/IVector.py new file mode 100644 index 0000000000000000000000000000000000000000..90a36fb86235484bb25079265717a6f2ae8ad0fb --- /dev/null +++ b/bob/bio/gmm/algorithm/IVector.py @@ -0,0 +1,210 @@ +#!/usr/bin/env python +# vim: set fileencoding=utf-8 : +# Laurent El Shafey <Laurent.El-Shafey@idiap.ch> + +import bob.core +import bob.io.base +import bob.learn.linear +import bob.learn.em + +import numpy + +from .GMM import GMM +from bob.bio.base.algorithm import Algorithm + +import logging +logger = logging.getLogger("bob.bio.gmm") + +class IVector (GMM): + """Tool for extracting I-Vectors""" + + def __init__( + self, + # IVector training + subspace_dimension_of_t, # T subspace dimension + tv_training_iterations = 25, # Number of EM iterations for the JFA training + update_sigma = True, + # parameters of the GMM + **kwargs + ): + """Initializes the local GMM tool with the given file selector object""" + # call base class constructor with its set of parameters + GMM.__init__(self, **kwargs) + + # call tool constructor to overwrite what was set before + Algorithm.__init__( + self, + performs_projection = True, + use_projected_features_for_enrollment = True, + requires_enroller_training = False, # not needed anymore because it's done while training the projector + split_training_features_by_client = False, + + subspace_dimension_of_t = subspace_dimension_of_t, + tv_training_iterations = tv_training_iterations, + update_sigma = update_sigma, + + multiple_model_scoring = None, + multiple_probe_scoring = None, + **kwargs + ) + + self.update_sigma = update_sigma + self.subspace_dimension_of_t = subspace_dimension_of_t + self.tv_training_iterations = tv_training_iterations + self.ivector_trainer = bob.learn.em.IVectorTrainer(update_sigma=update_sigma) + self.whitening_trainer = bob.learn.linear.WhiteningTrainer() + + + def _check_projected(self, feature): + """Checks that the features are appropriate""" + if not isinstance(feature, numpy.ndarray) or len(feature.shape) != 1 or feature.dtype != numpy.float64: + raise ValueError("The given feature is not appropriate") + if self.whitener is not None and feature.shape[0] != self.whitener.shape[1]: + raise ValueError("The given feature is expected to have %d elements, but it has %d" % (self.whitener.shape[1], feature.shape[0])) + + + def train_ivector(self, training_stats): + logger.info(" -> Training IVector enroller") + self.tv = bob.learn.em.IVectorMachine(self.ubm, self.subspace_dimension_of_t) + self.tv.variance_threshold = self.variance_threshold + + # train IVector model + bob.learn.em.train(self.ivector_trainer, self.tv, training_stats, self.tv_training_iterations, rng=self.rng) + + def train_whitening(self, training_features): + ivectors_matrix = numpy.vstack(training_features) + # create a Linear Machine + self.whitener = bob.learn.linear.Machine(ivectors_matrix.shape[1],ivectors_matrix.shape[1]) + # create the whitening trainer + self.whitening_trainer.train(ivectors_matrix, self.whitener) + + def train_projector(self, train_features, projector_file): + """Train Projector and Enroller at the same time""" + [self._check_feature(feature) for feature in train_features] + + # train UBM + data = numpy.vstack(train_features) + self.train_ubm(data) + del data + + # train IVector + logger.info(" -> Projecting training data") + training_stats = [self.project_ubm(feature) for feature in train_features] + # train IVector + self.train_ivector(training_stats) + + # project training i-vectors + whitening_train_data = [self.project_ivec(stats) for stats in training_stats] + self.train_whitening(whitening_train_data) + + # save + self.save_projector(projector_file) + + def save_projector(self, projector_file): + # Save the IVector base AND the UBM AND the whitening into the same file + hdf5file = bob.io.base.HDF5File(projector_file, "w") + hdf5file.create_group('Projector') + hdf5file.cd('Projector') + self.save_ubm(hdf5file) + + hdf5file.cd('/') + hdf5file.create_group('Enroller') + hdf5file.cd('Enroller') + self.tv.save(hdf5file) + + hdf5file.cd('/') + hdf5file.create_group('Whitener') + hdf5file.cd('Whitener') + self.whitener.save(hdf5file) + + + def load_tv(self, tv_file): + hdf5file = bob.io.base.HDF5File(tv_file) + self.tv = bob.learn.em.IVectorMachine(hdf5file) + # add UBM model from base class + self.tv.ubm = self.ubm + + def load_whitening(self, whitening_file): + hdf5file = bob.io.base.HDF5File(whitening_file) + self.whitener = bob.learn.linear.Machine(hdf5file) + + + def load_projector(self, projector_file): + """Load the GMM and the ISV model from the same HDF5 file""" + hdf5file = bob.io.base.HDF5File(projector_file) + + # Load Projector + hdf5file.cd('/Projector') + self.load_ubm(hdf5file) + + # Load Enroller + hdf5file.cd('/Enroller') + self.load_tv(hdf5file) + + # Load Whitening + hdf5file.cd('/Whitener') + self.load_whitening(hdf5file) + + + def project_ivec(self, gmm_stats): + return self.tv.project(gmm_stats) + + def project_whitening(self, ivector): + whitened = self.whitener.forward(ivector) + return whitened / numpy.linalg.norm(whitened) + + ####################################################### + ############## IVector projection ##################### + def project(self, feature_array): + """Computes GMM statistics against a UBM, then corresponding Ux vector""" + self._check_feature(feature_array) + # project UBM + projected_ubm = self.project_ubm(feature_array) + # project I-Vector + ivector = self.project_ivec(projected_ubm) + # whiten I-Vector + return self.project_whitening(ivector) + + ####################################################### + ################## ISV model enroll #################### + def write_feature(self, data, feature_file): + """Saves the feature, which is the (whitened) I-Vector.""" + bob.bio.base.save(data, feature_file) + + def read_feature(self, feature_file): + """Read the type of features that we require, namely i-vectors (stored as simple numpy arrays)""" + return bob.bio.base.load(feature_file) + + + + ####################################################### + ################## Model Enrollment ################### + def enroll(self, enroll_features): + """Performs IVector enrollment""" + [self._check_projected(feature) for feature in enroll_features] + model = numpy.mean(numpy.vstack(enroll_features), axis=0) + return model + + + ###################################################### + ################ Feature comparison ################## + def read_model(self, model_file): + """Reads the whitened i-vector that holds the model""" + return bob.bio.base.load(model_file) + + def read_probe(self, probe_file): + """read probe file which is an i-vector""" + return bob.bio.base.load(probe_file) + + def score(self, model, probe): + """Computes the score for the given model and the given probe.""" + self._check_projected(model) + self._check_projected(probe) + return numpy.dot(model/numpy.linalg.norm(model), probe/numpy.linalg.norm(probe)) + + + def score_for_multiple_probes(self, model, probes): + """This function computes the score between the given model and several given probe files.""" + [self._check_projected(probe) for probe in probes] + probe = numpy.mean(numpy.vstack(probes), axis=0) + return self.score(model, probe) diff --git a/bob/bio/gmm/algorithm/__init__.py b/bob/bio/gmm/algorithm/__init__.py index dff2ced5b05dafe54c7bb193cd6f29823864c684..e3029635df3a5671fb218e7df6042e56d68b8ba8 100644 --- a/bob/bio/gmm/algorithm/__init__.py +++ b/bob/bio/gmm/algorithm/__init__.py @@ -1,3 +1,4 @@ from .GMM import GMM, GMMRegular from .JFA import JFA from .ISV import ISV +from .IVector import IVector diff --git a/bob/bio/gmm/config/algorithm/isv.py b/bob/bio/gmm/config/algorithm/isv.py index 24a8be4cc02ec2bb1a3b68e943f92ed3017ebd03..3ae069d881764d904ea8d4419c5b5b01760e0b35 100644 --- a/bob/bio/gmm/config/algorithm/isv.py +++ b/bob/bio/gmm/config/algorithm/isv.py @@ -1,7 +1,6 @@ #!/usr/bin/env python import bob.bio.gmm -import numpy algorithm = bob.bio.gmm.algorithm.ISV( # ISV parameters diff --git a/bob/bio/gmm/config/algorithm/ivector.py b/bob/bio/gmm/config/algorithm/ivector.py new file mode 100644 index 0000000000000000000000000000000000000000..ec07b8065c8e1d2eac1ebdc40c9e592388c1cef9 --- /dev/null +++ b/bob/bio/gmm/config/algorithm/ivector.py @@ -0,0 +1,10 @@ +import bob.bio.gmm + +algorithm = bob.bio.gmm.algorithm.IVector( + # IVector parameters + subspace_dimension_of_t = 400, + update_sigma = True, + tv_training_iterations = 3, # Number of EM iterations for the TV training + # GMM parameters + number_of_gaussians = 512, +) diff --git a/bob/bio/gmm/test/data/ivector_model.hdf5 b/bob/bio/gmm/test/data/ivector_model.hdf5 new file mode 100644 index 0000000000000000000000000000000000000000..1c2349f4f18926d1be88f569c215ed8665d480ef Binary files /dev/null and b/bob/bio/gmm/test/data/ivector_model.hdf5 differ diff --git a/bob/bio/gmm/test/data/ivector_projected.hdf5 b/bob/bio/gmm/test/data/ivector_projected.hdf5 new file mode 100644 index 0000000000000000000000000000000000000000..013da29abc64fdda1a5b85eb737886147b0f18ed Binary files /dev/null and b/bob/bio/gmm/test/data/ivector_projected.hdf5 differ diff --git a/bob/bio/gmm/test/data/ivector_projector.hdf5 b/bob/bio/gmm/test/data/ivector_projector.hdf5 new file mode 100644 index 0000000000000000000000000000000000000000..726988762bbdfdb51d6f62493a41e3be3c4fa3dd Binary files /dev/null and b/bob/bio/gmm/test/data/ivector_projector.hdf5 differ diff --git a/bob/bio/gmm/test/test_algorithms.py b/bob/bio/gmm/test/test_algorithms.py index b358496f4765cbc907849e889a575b7c3473803b..ae933d740dff40eca1fd059daf6359df3f1ba580 100644 --- a/bob/bio/gmm/test/test_algorithms.py +++ b/bob/bio/gmm/test/test_algorithms.py @@ -326,80 +326,63 @@ def test_jfa(): # assert abs(jfa1.score_for_multiple_probes(model, [probe, probe]) - reference_score) < 1e-5, jfa1.score_for_multiple_probes(model, [probe, probe]) -""" - def test10_ivector(self): - # NOTE: This test will fail when it is run solely. Please always run all Tool tests in order to assure that they work. - # read input - feature = facereclib.utils.load(self.input_dir('dct_blocks.hdf5')) - # assure that the config file is readable - tool = self.config('ivector') - self.assertTrue(isinstance(tool, facereclib.tools.IVector)) - - # here, we use a reduced complexity for test purposes - tool = facereclib.tools.IVector( - number_of_gaussians = 2, - subspace_dimension_of_t=2, # T subspace dimension - update_sigma = False, # TODO Do another test with True - tv_training_iterations = 1, # Number of EM iterations for the JFA training - variance_threshold = 1e-5, - INIT_SEED = seed_value - ) - self.assertTrue(tool.performs_projection) - self.assertTrue(tool.requires_projector_training) - self.assertTrue(tool.use_projected_features_for_enrollment) - self.assertFalse(tool.split_training_features_by_client) - self.assertFalse(tool.requires_enroller_training) +def test_ivector(): + temp_file = bob.io.base.test_utils.temporary_filename() + ivec1 = bob.bio.base.load_resource("ivector", "algorithm") + assert isinstance(ivec1, bob.bio.gmm.algorithm.IVector) + assert isinstance(ivec1, bob.bio.gmm.algorithm.GMM) + assert isinstance(ivec1, bob.bio.base.algorithm.Algorithm) + assert ivec1.performs_projection + assert ivec1.requires_projector_training + assert ivec1.use_projected_features_for_enrollment + assert not ivec1.split_training_features_by_client + assert not ivec1.requires_enroller_training + + # create smaller IVector object + ivec2 = bob.bio.gmm.algorithm.IVector( + number_of_gaussians = 2, + subspace_dimension_of_t = 2, + kmeans_training_iterations = 1, + tv_training_iterations = 1, + INIT_SEED = seed_value + ) + + train_data = utils.random_training_set((100,45), count=5, minimum=-5., maximum=5.) + # reference is the same as for GMM projection + reference_file = pkg_resources.resource_filename('bob.bio.gmm.test', 'data/ivector_projector.hdf5') + try: # train the projector - t = tempfile.mkstemp('ubm.hdf5', prefix='frltest_')[1] - tool.train_projector(facereclib.utils.tests.random_training_set(feature.shape, count=5, minimum=-5., maximum=5.), t) - if regenerate_refs: - import shutil - shutil.copy2(t, self.reference_dir('ivector_projector.hdf5')) - - # load the projector file - tool.load_projector(self.reference_dir('ivector_projector.hdf5')) - - # compare ISV projector with reference - hdf5file = bob.io.base.HDF5File(t) - hdf5file.cd('Projector') - projector_reference = bob.learn.em.GMMMachine(hdf5file) - self.assertTrue(tool.m_ubm.is_similar_to(projector_reference)) - - # compare ISV enroller with reference - hdf5file.cd('/') - hdf5file.cd('Enroller') - enroller_reference = bob.learn.em.IVectorMachine(hdf5file) - enroller_reference.ubm = projector_reference - if not _mac_os: - self.assertTrue(tool.m_tv.is_similar_to(enroller_reference)) - os.remove(t) - - # project the feature - projected = tool.project(feature) - if regenerate_refs: - tool.save_feature(projected, self.reference_dir('ivector_feature.hdf5')) - - # compare the projected feature with the reference - projected_reference = tool.read_feature(self.reference_dir('ivector_feature.hdf5')) - self.assertTrue(numpy.allclose(projected,projected_reference)) - - # enroll model with the projected feature - # This is not yet supported - # model = tool.enroll([projected[0]]) - # if regenerate_refs: - # model.save(bob.io.HDF5File(self.reference_dir('ivector_model.hdf5'), 'w')) - #reference_model = tool.read_model(self.reference_dir('ivector_model.hdf5')) - # compare the IVector model with the reference - #self.assertTrue(model.is_similar_to(reference_model)) - - # check that the read_probe function reads the correct values - probe = tool.read_probe(self.reference_dir('ivector_feature.hdf5')) - self.assertTrue(numpy.allclose(probe,projected)) - - # score with projected feature and compare to the weird reference score ... - # This in not implemented yet - - # score with a concatenation of the probe - # This is not implemented yet -""" + ivec2.train_projector(train_data, temp_file) + + assert os.path.exists(temp_file) + + if regenerate_refs: shutil.copy(temp_file, reference_file) + + # check projection matrix + ivec1.load_projector(reference_file) + ivec2.load_projector(temp_file) + + assert ivec1.ubm.is_similar_to(ivec2.ubm) + assert ivec1.tv.is_similar_to(ivec2.tv) + assert ivec1.whitener.is_similar_to(ivec2.whitener) + finally: + if os.path.exists(temp_file): os.remove(temp_file) + + # generate and project random feature + feature = utils.random_array((20,45), -5., 5., seed=84) + projected = ivec1.project(feature) + _compare(projected, pkg_resources.resource_filename('bob.bio.gmm.test', 'data/ivector_projected.hdf5'), ivec1.write_feature, ivec1.read_feature) + + # enroll model from random features + random_features = utils.random_training_set((20,45), count=5, minimum=-5., maximum=5.) + enroll_features = [ivec1.project(feature) for feature in random_features] + model = ivec1.enroll(enroll_features) + _compare(model, pkg_resources.resource_filename('bob.bio.gmm.test', 'data/ivector_model.hdf5'), ivec1.write_model, ivec1.read_model) + + # compare model with probe + probe = ivec1.read_probe(pkg_resources.resource_filename('bob.bio.gmm.test', 'data/ivector_projected.hdf5')) + reference_score = -0.00187151 + assert abs(ivec1.score(model, probe) - reference_score) < 1e-5, "The scores differ: %3.8f, %3.8f" % (ivec1.score(model, probe), reference_score) + # TODO: implement that + assert abs(ivec1.score_for_multiple_probes(model, [probe, probe]) - reference_score) < 1e-5 diff --git a/setup.py b/setup.py index 6b180159d8d2b50855dbf046860cd4d9ba1d6a0c..801609bbc872d4b7b1fea4ce4c60b9f7522f5962 100644 --- a/setup.py +++ b/setup.py @@ -121,6 +121,7 @@ setup( 'gmm-regular = bob.bio.gmm.config.algorithm.gmm_regular:algorithm', 'jfa = bob.bio.gmm.config.algorithm.jfa:algorithm', 'isv = bob.bio.gmm.config.algorithm.isv:algorithm', + 'ivector = bob.bio.gmm.config.algorithm.ivector:algorithm', ], },