diff --git a/bob/bio/base/algorithm/PLDA.py b/bob/bio/base/algorithm/PLDA.py new file mode 100644 index 0000000000000000000000000000000000000000..d8661b7db2f6b5761afb27aea518c033c0e8e28d --- /dev/null +++ b/bob/bio/base/algorithm/PLDA.py @@ -0,0 +1,163 @@ +#!/usr/bin/env python +# vim: set fileencoding=utf-8 : +# Laurent El Shafey <Laurent.El-Shafey@idiap.ch> + +import bob.core +import bob.io.base +import bob.learn.linear +import bob.learn.em + +import numpy + +from .Algorithm import Algorithm +import logging +logger = logging.getLogger("bob.bio.base") + + +class PLDA (Algorithm): + """Tool chain for computing PLDA (over PCA-dimensionality reduced) features""" + + def __init__( + self, + subspace_dimension_of_f, # Size of subspace F + subspace_dimension_of_g, # Size of subspace G + subspace_dimension_pca = None, # if given, perform PCA on data and reduce the PCA subspace to the given dimension + plda_training_iterations = 200, # Maximum number of iterations for the EM loop + # TODO: refactor the remaining parameters! + INIT_SEED = 5489, # seed for initializing + INIT_F_METHOD = 'BETWEEN_SCATTER', + INIT_G_METHOD = 'WITHIN_SCATTER', + INIT_S_METHOD = 'VARIANCE_DATA', + multiple_probe_scoring = 'joint_likelihood' + ): + + """Initializes the local (PCA-)PLDA tool chain with the given file selector object""" + # call base class constructor and register that this class requires training for enrollment + Algorithm.__init__( + self, + requires_enroller_training = True, + + subspace_dimension_of_f = subspace_dimension_of_f, # Size of subspace F + subspace_dimension_of_g = subspace_dimension_of_g, # Size of subspace G + subspace_dimension_pca = subspace_dimension_pca, # if given, perform PCA on data and reduce the PCA subspace to the given dimension + plda_training_iterations = plda_training_iterations, # Maximum number of iterations for the EM loop + # TODO: refactor the remaining parameters! + INIT_SEED = INIT_SEED, # seed for initializing + INIT_F_METHOD = str(INIT_F_METHOD), + INIT_G_METHOD = str(INIT_G_METHOD), + INIT_S_METHOD =str(INIT_S_METHOD), + multiple_probe_scoring = multiple_probe_scoring, + multiple_model_scoring = None + ) + + self.subspace_dimension_of_f = subspace_dimension_of_f + self.subspace_dimension_of_g = subspace_dimension_of_g + self.subspace_dimension_pca = subspace_dimension_pca + self.plda_training_iterations = plda_training_iterations + self.score_set = {'joint_likelihood': 'joint_likelihood', 'average':numpy.average, 'min':min, 'max':max}[multiple_probe_scoring] + + # TODO: refactor + self.plda_trainer = bob.learn.em.PLDATrainer() + self.plda_trainer.init_f_method = INIT_F_METHOD + self.plda_trainer.init_g_method = INIT_G_METHOD + self.plda_trainer.init_sigma_method = INIT_S_METHOD + self.rng = bob.core.random.mt19937(INIT_SEED) + self.pca_machine = None + self.plda_base = None + + + + def _train_pca(self, training_set): + """Trains and returns a LinearMachine that is trained using PCA""" + data = numpy.vstack([feature for client in training_set for feature in client]) + + logger.info(" -> Training LinearMachine using PCA ") + trainer = bob.learn.linear.PCATrainer() + machine, _ = trainer.train(data) + # limit number of pcs + machine.resize(machine.shape[0], self.subspace_dimension_pca) + return machine + + def _perform_pca_client(self, client): + """Perform PCA on an array""" + return numpy.vstack([self.pca_machine(feature) for feature in client]) + + def _perform_pca(self, training_set): + """Perform PCA on data""" + return [self._perform_pca_client(client) for client in training_set] + + + def train_enroller(self, training_features, projector_file): + """Generates the PLDA base model from a list of arrays (one per identity), + and a set of training parameters. If PCA is requested, it is trained on the same data. + Both the trained PLDABase and the PCA machine are written.""" + + + # train PCA and perform PCA on training data + if self.subspace_dimension_pca is not None: + self.pca_machine = self._train_pca(training_features) + training_features = self._perform_pca(training_features) + + input_dimension = training_features[0].shape[1] + logger.info(" -> Training PLDA base machine") + + # train machine + self.plda_base = bob.learn.em.PLDABase(input_dimension, self.subspace_dimension_of_f, self.subspace_dimension_of_g) + bob.learn.em.train(self.plda_trainer, self.plda_base, training_features, self.plda_training_iterations, self.rng) + + # write machines to file + proj_hdf5file = bob.io.base.HDF5File(str(projector_file), "w") + if self.subspace_dimension_pca is not None: + proj_hdf5file.create_group('/pca') + proj_hdf5file.cd('/pca') + self.pca_machine.save(proj_hdf5file) + proj_hdf5file.create_group('/plda') + proj_hdf5file.cd('/plda') + self.plda_base.save(proj_hdf5file) + + + def load_enroller(self, projector_file): + """Reads the PCA projection matrix and the PLDA model from file""" + # read enroller (PCA and PLDA matrix) + hdf5 = bob.io.base.HDF5File(projector_file) + if hdf5.has_group("/pca"): + hdf5.cd('/pca') + self.pca_machine = bob.learn.linear.Machine(hdf5) + hdf5.cd('/plda') + self.plda_base = bob.learn.em.PLDABase(hdf5) + + + def enroll(self, enroll_features): + """Enrolls the model by computing an average of the given input vectors""" + plda_machine = bob.learn.em.PLDAMachine(self.plda_base) + # project features, if enabled + if self.pca_machine is not None: + enroll_features = self._perform_pca_client(enroll_features) + # enroll + self.plda_trainer.enroll(plda_machine, enroll_features) + return plda_machine + + + def read_model(self, model_file): + """Reads the model, which in this case is a PLDA-Machine""" + # read machine and attach base machine + plda_machine = bob.learn.em.PLDAMachine(bob.io.base.HDF5File(model_file), self.plda_base) + return plda_machine + + + def score(self, model, probe): + """Computes the PLDA score for the given model and probe""" + return self.score_for_multiple_probes(model, [probe]) + + + def score_for_multiple_probes(self, model, probes): + """This function computes the score between the given model and several given probe files. + In this base class implementation, it computes the scores for each probe file using the 'score' method, + and fuses the scores using the fusion method specified in the constructor of this class.""" + if self.pca_machine is not None: + probes = [self.pca_machine(probe) for probe in probes] + # forward + if self.score_set == 'joint_likelihood': + return model.log_likelihood_ratio(numpy.vstack(probes)) + else: + return self.score_set([model.log_likelihood_ratio(probe) for probe in probes]) diff --git a/bob/bio/base/algorithm/__init__.py b/bob/bio/base/algorithm/__init__.py index d22a30f2cd0dffb2edfe3a619f8d55d4f82f17f0..6baf97fd4c8b08be6f5a38ee7ccf6dc3adce890b 100644 --- a/bob/bio/base/algorithm/__init__.py +++ b/bob/bio/base/algorithm/__init__.py @@ -1,4 +1,5 @@ from .Algorithm import Algorithm from .PCA import PCA from .LDA import LDA +from .PLDA import PLDA from .BIC import BIC diff --git a/bob/bio/base/config/algorithm/pca_plda.py b/bob/bio/base/config/algorithm/pca_plda.py new file mode 100644 index 0000000000000000000000000000000000000000..2ff3a8e61f384b2af0020bfe52268d0d6d5b08ed --- /dev/null +++ b/bob/bio/base/config/algorithm/pca_plda.py @@ -0,0 +1,9 @@ +#!/usr/bin/env python + +import bob.bio.base + +algorithm = bob.bio.base.algorithm.PLDA( + subspace_dimension_of_f = 16, # Size of subspace F + subspace_dimension_of_g = 16, # Size of subspace G + subspace_dimension_pca = 150 # Size of the PCA subspace +) diff --git a/bob/bio/base/config/algorithm/plda.py b/bob/bio/base/config/algorithm/plda.py new file mode 100644 index 0000000000000000000000000000000000000000..c3d362fb492b419a05badadbbb3df9563f38b170 --- /dev/null +++ b/bob/bio/base/config/algorithm/plda.py @@ -0,0 +1,8 @@ +#!/usr/bin/env python + +import bob.bio.base + +algorithm = bob.bio.base.algorithm.PLDA( + subspace_dimension_of_f = 16, # Size of subspace F + subspace_dimension_of_g = 16 # Size of subspace G +) diff --git a/bob/bio/base/test/data/plda_enroller.hdf5 b/bob/bio/base/test/data/plda_enroller.hdf5 new file mode 100644 index 0000000000000000000000000000000000000000..c242d7151b2b01a09bfd38940b4387c41af945b1 Binary files /dev/null and b/bob/bio/base/test/data/plda_enroller.hdf5 differ diff --git a/bob/bio/base/test/data/plda_model.hdf5 b/bob/bio/base/test/data/plda_model.hdf5 new file mode 100644 index 0000000000000000000000000000000000000000..d2d6285d520c1cfef63c46ca520ee4549a5bb652 Binary files /dev/null and b/bob/bio/base/test/data/plda_model.hdf5 differ diff --git a/bob/bio/base/test/test_algorithms.py b/bob/bio/base/test/test_algorithms.py index b732b1c9f1f7ece7cbc4d8caf9810cace25e1653..9d7f0bb1b1799639fb095b0da35e3557db5b42d9 100644 --- a/bob/bio/base/test/test_algorithms.py +++ b/bob/bio/base/test/test_algorithms.py @@ -27,11 +27,7 @@ import pkg_resources regenerate_refs = False -#seed_value = 5489 - -import sys -_mac_os = sys.platform == 'darwin' - +seed_value = 5489 import scipy.spatial @@ -279,446 +275,59 @@ def test_bic(): assert abs(bic1.score_for_multiple_probes(model, [probe, probe]) - reference_score) < 1e-5 -""" - def test01_gabor_jet(self): - # read input - extractor = facereclib.utils.tests.configuration_file('grid-graph', 'feature_extractor', 'features') - feature = extractor.read_feature(self.input_dir('graph_regular.hdf5')) - tool = self.config('gabor-jet') - self.assertFalse(tool.performs_projection) - self.assertFalse(tool.requires_enroller_training) - - # enroll - model = tool.enroll([feature]) - # execute the preprocessor - if regenerate_refs: - tool.save_model(model, self.reference_dir('graph_model.hdf5')) - reference = tool.read_model(self.reference_dir('graph_model.hdf5')) - self.assertEqual(len(model), 1) - for n in range(len(model[0])): - self.assertTrue((numpy.abs(model[0][n].abs - reference[0][n].abs) < 1e-5).all()) - self.assertTrue((numpy.abs(model[0][n].phase - reference[0][n].phase) < 1e-5).all()) - - # score - sim = tool.score(model, feature) - self.assertAlmostEqual(sim, 1.) - self.assertAlmostEqual(tool.score_for_multiple_probes(model, [feature, feature]), 1.) - - # test averaging - tool = facereclib.tools.GaborJets( - "PhaseDiffPlusCanberra", - gabor_sigma = math.sqrt(2.) * math.pi, - multiple_feature_scoring = "average_model" - ) - model = tool.enroll([feature, feature]) - - # absoulte values must be identical - for n in range(len(model)): - self.assertTrue((numpy.abs(model[n].abs - reference[0][n].abs) < 1e-5).all()) - # phases might differ with 2 Pi - for n in range(len(model)): - for j in range(len(model[n].phase)): - self.assertTrue(abs(model[n].phase[j] - reference[0][n].phase[j]) < 1e-5 or abs(model[n].phase[j] - reference[0][n].phase[j] + 2*math.pi) < 1e-5 or abs(model[n].phase[j] - reference[0][n].phase[j] - 2*math.pi) < 1e-5) - - sim = tool.score(model, feature) - self.assertAlmostEqual(sim, 1.) - self.assertAlmostEqual(tool.score_for_multiple_probes(model, [feature, feature]), 1.) - - - - def test02_lgbphs(self): - # read input - feature1 = facereclib.utils.load(self.input_dir('lgbphs_sparse.hdf5')) - feature2 = facereclib.utils.load(self.input_dir('lgbphs_no_phase.hdf5')) - tool = self.config('lgbphs') - self.assertFalse(tool.performs_projection) - self.assertFalse(tool.requires_enroller_training) - - # enroll model - model = tool.enroll([feature1]) - self.compare(model, 'lgbphs_model.hdf5') - - # score - sim = tool.score(model, feature2) - self.assertAlmostEqual(sim, 40960.) - self.assertAlmostEqual(tool.score_for_multiple_probes(model, [feature2, feature2]), sim) - - - - - - - - def test06_gmm(self): - # read input - feature = facereclib.utils.load(self.input_dir('dct_blocks.hdf5')) - # assure that the config file is readable - tool = self.config('gmm') - self.assertTrue(isinstance(tool, facereclib.tools.UBMGMM)) - - # here, we use a reduced complexity for test purposes - tool = facereclib.tools.UBMGMM( - number_of_gaussians = 2, - k_means_training_iterations = 1, - gmm_training_iterations = 1, - INIT_SEED = seed_value, - ) - self.assertTrue(tool.performs_projection) - self.assertTrue(tool.requires_projector_training) - self.assertFalse(tool.use_projected_features_for_enrollment) - self.assertFalse(tool.split_training_features_by_client) - - # train the projector - t = tempfile.mkstemp('ubm.hdf5', prefix='frltest_')[1] - tool.train_projector(facereclib.utils.tests.random_training_set(feature.shape, count=5, minimum=-5., maximum=5.), t) - if regenerate_refs: - import shutil - shutil.copy2(t, self.reference_dir('gmm_projector.hdf5')) - - # load the projector file - tool.load_projector(self.reference_dir('gmm_projector.hdf5')) - # compare GMM projector with reference - new_machine = bob.learn.em.GMMMachine(bob.io.base.HDF5File(t)) - self.assertTrue(tool.m_ubm.is_similar_to(new_machine)) - os.remove(t) - - # project the feature - projected = tool.project(feature) - if regenerate_refs: - projected.save(bob.io.base.HDF5File(self.reference_dir('gmm_feature.hdf5'), 'w')) - probe = tool.read_probe(self.reference_dir('gmm_feature.hdf5')) - self.assertTrue(projected.is_similar_to(probe)) - - # enroll model with the unprojected feature - model = tool.enroll([feature]) - if regenerate_refs: - model.save(bob.io.base.HDF5File(self.reference_dir('gmm_model.hdf5'), 'w')) - reference_model = tool.read_model(self.reference_dir('gmm_model.hdf5')) - self.assertTrue(model.is_similar_to(reference_model)) - - # score with projected feature and compare to the weird reference score ... - sim = tool.score(reference_model, probe) - self.assertAlmostEqual(sim, 0.25472347774) - self.assertAlmostEqual(tool.score_for_multiple_probes(model, [probe, probe]), sim) - - - def test06a_gmm_regular(self): - # read input - feature = facereclib.utils.load(self.input_dir('dct_blocks.hdf5')) - # assure that the config file is readable - tool = self.config('ubm_gmm_regular_scoring') - self.assertTrue(isinstance(tool, facereclib.tools.UBMGMMRegular)) - - # here, we use a reduced complexity for test purposes - tool = facereclib.tools.UBMGMMRegular( - number_of_gaussians = 2, - k_means_training_iterations = 1, - gmm_training_iterations = 1, - INIT_SEED = seed_value - ) - self.assertFalse(tool.performs_projection) - self.assertTrue(tool.requires_enroller_training) - - # train the enroller - t = tempfile.mkstemp('ubm.hdf5', prefix='frltest_')[1] - tool.train_enroller(facereclib.utils.tests.random_training_set(feature.shape, count=5, minimum=-5., maximum=5.), t) - # assure that it is identical to the normal UBM projector - tool.load_enroller(self.reference_dir('gmm_projector.hdf5')) - - # enroll model with the unprojected feature - model = tool.enroll([feature]) - reference_model = tool.read_model(self.reference_dir('gmm_model.hdf5')) - self.assertTrue(model.is_similar_to(reference_model)) - - # score with unprojected feature and compare to the weird reference score ... - probe = tool.read_probe(self.input_dir('dct_blocks.hdf5')) - sim = tool.score(reference_model, probe) - - self.assertAlmostEqual(sim, 0.143875716) - - - def test07_isv(self): - # read input - feature = facereclib.utils.load(self.input_dir('dct_blocks.hdf5')) - # assure that the config file is readable - tool = self.config('isv') - self.assertTrue(isinstance(tool, facereclib.tools.ISV)) - - # Here, we use a reduced complexity for test purposes - tool = facereclib.tools.ISV( - number_of_gaussians = 2, - subspace_dimension_of_u = 160, - k_means_training_iterations = 1, - gmm_training_iterations = 1, - isv_training_iterations = 1, - INIT_SEED = seed_value - ) - self.assertTrue(tool.performs_projection) - self.assertTrue(tool.requires_projector_training) - self.assertTrue(tool.use_projected_features_for_enrollment) - self.assertTrue(tool.split_training_features_by_client) - self.assertFalse(tool.requires_enroller_training) - - # train the projector - t = tempfile.mkstemp('ubm.hdf5', prefix='frltest_')[1] - tool.train_projector(facereclib.utils.tests.random_training_set_by_id(feature.shape, count=5, minimum=-5., maximum=5.), t) - if regenerate_refs: - import shutil - shutil.copy2(t, self.reference_dir('isv_projector.hdf5')) - - # load the projector file - tool.load_projector(self.reference_dir('isv_projector.hdf5')) - - # compare ISV projector with reference - hdf5file = bob.io.base.HDF5File(t) - hdf5file.cd('Projector') - projector_reference = bob.learn.em.GMMMachine(hdf5file) - self.assertTrue(tool.m_ubm.is_similar_to(projector_reference)) - - # compare ISV enroller with reference - hdf5file.cd('/') - hdf5file.cd('Enroller') - enroller_reference = bob.learn.em.ISVBase(hdf5file) - enroller_reference.ubm = projector_reference - if not _mac_os: - self.assertTrue(tool.m_isvbase.is_similar_to(enroller_reference)) - os.remove(t) - - # project the feature - projected = tool.project(feature) - if regenerate_refs: - tool.save_feature(projected, self.reference_dir('isv_feature.hdf5')) - - # compare the projected feature with the reference - projected_reference = tool.read_feature(self.reference_dir('isv_feature.hdf5')) - self.assertTrue(projected[0].is_similar_to(projected_reference)) - - # enroll model with the projected feature - model = tool.enroll([projected[0]]) - if regenerate_refs: - model.save(bob.io.base.HDF5File(self.reference_dir('isv_model.hdf5'), 'w')) - reference_model = tool.read_model(self.reference_dir('isv_model.hdf5')) - # compare the ISV model with the reference - self.assertTrue(model.is_similar_to(reference_model)) - - # check that the read_probe function reads the correct values - probe = tool.read_probe(self.reference_dir('isv_feature.hdf5')) - self.assertTrue(probe[0].is_similar_to(projected[0])) - self.assertEqual(probe[1].any(), projected[1].any()) - - # score with projected feature and compare to the weird reference score ... - sim = tool.score(model, probe) - self.assertAlmostEqual(sim, 0.002739667184506023) - - # score with a concatenation of the probe - self.assertAlmostEqual(tool.score_for_multiple_probes(model, [probe, probe]), sim, places=5) - - - def test08_jfa(self): - # read input - feature = facereclib.utils.load(self.input_dir('dct_blocks.hdf5')) - # assure that the config file is readable - tool = self.config('jfa') - self.assertTrue(isinstance(tool, facereclib.tools.JFA)) - - # here, we use a reduced complexity for test purposes - tool = facereclib.tools.JFA( - number_of_gaussians = 2, - subspace_dimension_of_u = 2, - subspace_dimension_of_v = 2, - k_means_training_iterations = 1, - gmm_training_iterations = 1, - jfa_training_iterations = 1, - INIT_SEED = seed_value - ) - self.assertTrue(tool.performs_projection) - self.assertTrue(tool.requires_projector_training) - self.assertTrue(tool.use_projected_features_for_enrollment) - self.assertFalse(tool.split_training_features_by_client) - self.assertTrue(tool.requires_enroller_training) - - # train the projector - t = tempfile.mkstemp('ubm.hdf5', prefix='frltest_')[1] - tool.train_projector(facereclib.utils.tests.random_training_set(feature.shape, count=5, minimum=-5., maximum=5.), t) - if regenerate_refs: - import shutil - shutil.copy2(t, self.reference_dir('jfa_projector.hdf5')) - - # load the projector file - tool.load_projector(self.reference_dir('jfa_projector.hdf5')) - # compare JFA projector with reference - new_machine = bob.learn.em.GMMMachine(bob.io.base.HDF5File(t)) - self.assertTrue(tool.m_ubm.is_similar_to(new_machine)) - os.remove(t) - - # project the feature - projected = tool.project(feature) - if regenerate_refs: - projected.save(bob.io.base.HDF5File(self.reference_dir('jfa_feature.hdf5'), 'w')) - # compare the projected feature with the reference - projected_reference = tool.read_feature(self.reference_dir('jfa_feature.hdf5')) - self.assertTrue(projected.is_similar_to(projected_reference)) - - # train the enroller - t = tempfile.mkstemp('enroll.hdf5', prefix='frltest_')[1] - tool.train_enroller(self.train_gmm_stats(self.reference_dir('jfa_feature.hdf5'), count=5, minimum=-5., maximum=5.), t) - if regenerate_refs: - import shutil - shutil.copy2(t, self.reference_dir('jfa_enroller.hdf5')) - tool.load_enroller(self.reference_dir('jfa_enroller.hdf5')) - # compare JFA enroller with reference - enroller_reference = bob.learn.em.JFABase(bob.io.base.HDF5File(t)) - enroller_reference.ubm = new_machine - if not _mac_os: - self.assertTrue(tool.m_jfabase.is_similar_to(enroller_reference)) - os.remove(t) - - # enroll model with the projected feature - model = tool.enroll([projected]) - if regenerate_refs: - model.save(bob.io.base.HDF5File(self.reference_dir('jfa_model.hdf5'), 'w')) - # assert that the model is ok - reference_model = tool.read_model(self.reference_dir('jfa_model.hdf5')) - self.assertTrue(model.is_similar_to(reference_model)) - - # check that the read_probe function reads the requested data - probe = tool.read_probe(self.reference_dir('jfa_feature.hdf5')) - self.assertTrue(probe.is_similar_to(projected)) - - # score with projected feature and compare to the weird reference score ... - sim = tool.score(model, probe) - self.assertAlmostEqual(sim, 0.25473213400211353) - # score with a concatenation of the probe - # self.assertAlmostEqual(tool.score_for_multiple_probes(model, [probe, probe]), sim) - - - def test09_plda(self): - # read input - feature = facereclib.utils.load(self.input_dir('linearize.hdf5')) - # assure that the config file is readable - tool = self.config('pca+plda') - self.assertTrue(isinstance(tool, facereclib.tools.PLDA)) - - # here, we use a reduced complexity for test purposes - tool = facereclib.tools.PLDA( - subspace_dimension_of_f = 2, - subspace_dimension_of_g = 2, - subspace_dimension_pca = 10, - plda_training_iterations = 1, - INIT_SEED = seed_value, - ) - self.assertFalse(tool.performs_projection) - self.assertTrue(tool.requires_enroller_training) - - # train the projector - t = tempfile.mkstemp('pca+plda.hdf5', prefix='frltest_')[1] - tool.train_enroller(facereclib.utils.tests.random_training_set_by_id(feature.shape, count=20, minimum=0., maximum=255.), t) - if regenerate_refs: - import shutil - shutil.copy2(t, self.reference_dir('pca+plda_enroller.hdf5')) - - # load the projector file - tool.load_enroller(self.reference_dir('pca+plda_enroller.hdf5')) - # compare the resulting machines - test_file = bob.io.base.HDF5File(t) - test_file.cd('/pca') - pca_machine = bob.learn.linear.Machine(test_file) - test_file.cd('/plda') - plda_machine = bob.learn.em.PLDABase(test_file) - # TODO: compare the PCA machines - #self.assertEqual(pca_machine, tool.m_pca_machine) - # TODO: compare the PLDA machines - #self.assertEqual(plda_machine, tool.m_plda_base_machine) - os.remove(t) - - # enroll model - model = tool.enroll([feature]) - if regenerate_refs: - model.save(bob.io.base.HDF5File(self.reference_dir('pca+plda_model.hdf5'), 'w')) - # TODO: compare the models with the reference - #reference_model = tool.read_model(self.reference_dir('pca+plda_model.hdf5')) - #self.assertEqual(model, reference_model) - - # score - sim = tool.score(model, feature) - self.assertAlmostEqual(sim, 0.) - # score with a concatenation of the probe - self.assertAlmostEqual(tool.score_for_multiple_probes(model, [feature, feature]), 0.) - - - def test10_ivector(self): - # NOTE: This test will fail when it is run solely. Please always run all Tool tests in order to assure that they work. - # read input - feature = facereclib.utils.load(self.input_dir('dct_blocks.hdf5')) - # assure that the config file is readable - tool = self.config('ivector') - self.assertTrue(isinstance(tool, facereclib.tools.IVector)) - - # here, we use a reduced complexity for test purposes - tool = facereclib.tools.IVector( - number_of_gaussians = 2, - subspace_dimension_of_t=2, # T subspace dimension - update_sigma = False, # TODO Do another test with True - tv_training_iterations = 1, # Number of EM iterations for the JFA training - variance_threshold = 1e-5, - INIT_SEED = seed_value - ) - self.assertTrue(tool.performs_projection) - self.assertTrue(tool.requires_projector_training) - self.assertTrue(tool.use_projected_features_for_enrollment) - self.assertFalse(tool.split_training_features_by_client) - self.assertFalse(tool.requires_enroller_training) - - # train the projector - t = tempfile.mkstemp('ubm.hdf5', prefix='frltest_')[1] - tool.train_projector(facereclib.utils.tests.random_training_set(feature.shape, count=5, minimum=-5., maximum=5.), t) - if regenerate_refs: - import shutil - shutil.copy2(t, self.reference_dir('ivector_projector.hdf5')) - - # load the projector file - tool.load_projector(self.reference_dir('ivector_projector.hdf5')) - - # compare ISV projector with reference - hdf5file = bob.io.base.HDF5File(t) - hdf5file.cd('Projector') - projector_reference = bob.learn.em.GMMMachine(hdf5file) - self.assertTrue(tool.m_ubm.is_similar_to(projector_reference)) - - # compare ISV enroller with reference - hdf5file.cd('/') - hdf5file.cd('Enroller') - enroller_reference = bob.learn.em.IVectorMachine(hdf5file) - enroller_reference.ubm = projector_reference - if not _mac_os: - self.assertTrue(tool.m_tv.is_similar_to(enroller_reference)) - os.remove(t) - - # project the feature - projected = tool.project(feature) - if regenerate_refs: - tool.save_feature(projected, self.reference_dir('ivector_feature.hdf5')) - - # compare the projected feature with the reference - projected_reference = tool.read_feature(self.reference_dir('ivector_feature.hdf5')) - self.assertTrue(numpy.allclose(projected,projected_reference)) - - # enroll model with the projected feature - # This is not yet supported - # model = tool.enroll([projected[0]]) - # if regenerate_refs: - # model.save(bob.io.HDF5File(self.reference_dir('ivector_model.hdf5'), 'w')) - #reference_model = tool.read_model(self.reference_dir('ivector_model.hdf5')) - # compare the IVector model with the reference - #self.assertTrue(model.is_similar_to(reference_model)) - - # check that the read_probe function reads the correct values - probe = tool.read_probe(self.reference_dir('ivector_feature.hdf5')) - self.assertTrue(numpy.allclose(probe,projected)) - - # score with projected feature and compare to the weird reference score ... - # This in not implemented yet - - # score with a concatenation of the probe - # This is not implemented yet -""" +def test_plda(): + temp_file = bob.io.base.test_utils.temporary_filename() + # assure that the configurations are loadable + plda1 = bob.bio.base.load_resource("plda", "algorithm") + assert isinstance(plda1, bob.bio.base.algorithm.PLDA) + assert isinstance(plda1, bob.bio.base.algorithm.Algorithm) + plda2 = bob.bio.base.load_resource("pca+plda", "algorithm") + assert isinstance(plda2, bob.bio.base.algorithm.PLDA) + assert isinstance(plda2, bob.bio.base.algorithm.Algorithm) + + assert not plda1.performs_projection + assert not plda1.requires_projector_training + assert not plda1.use_projected_features_for_enrollment + assert not plda1.split_training_features_by_client + assert plda1.requires_enroller_training + + # generate a smaller PCA subspcae + plda3 = bob.bio.base.algorithm.PLDA(subspace_dimension_of_f = 2, subspace_dimension_of_g = 2, subspace_dimension_pca = 10, plda_training_iterations = 1, INIT_SEED = seed_value) + + # create random training set + train_set = utils.random_training_set_by_id(200, count=20, minimum=0., maximum=255.) + # train the projector + reference_file = pkg_resources.resource_filename('bob.bio.base.test', 'data/plda_enroller.hdf5') + try: + # train projector + plda3.train_enroller(train_set, temp_file) + assert os.path.exists(temp_file) + + if regenerate_refs: shutil.copy(temp_file, reference_file) + + # check projection matrix + plda1.load_enroller(reference_file) + plda3.load_enroller(temp_file) + + assert plda1.pca_machine.is_similar_to(plda3.pca_machine) + assert plda1.plda_base.is_similar_to(plda3.plda_base) + + finally: + if os.path.exists(temp_file): os.remove(temp_file) + + # generate and project random feature + feature = utils.random_array(200, 0., 255., seed=84) + + # enroll model from random features + reference = pkg_resources.resource_filename('bob.bio.base.test', 'data/plda_model.hdf5') + model = plda1.enroll([feature]) + # execute the preprocessor + if regenerate_refs: + plda1.write_model(model, reference) + reference = plda1.read_model(reference) + assert model.is_similar_to(reference) + + # compare model with probe + reference_score = 0. + assert abs(plda1.score(model, feature) - reference_score) < 1e-5, "The scores differ: %3.8f, %3.8f" % (plda1.score(model, feature), reference_score) + assert abs(plda1.score_for_multiple_probes(model, [feature, feature]) - reference_score) < 1e-5 diff --git a/buildout.cfg b/buildout.cfg index 6b701057a05d707db323cb9fbb265bc7896aa8ce..ea8f77908d5852a105c5efa897b1435d72187900 100644 --- a/buildout.cfg +++ b/buildout.cfg @@ -5,15 +5,46 @@ [buildout] parts = scripts eggs = bob.bio.base -extensions = bob.buildout - -develop = . + gridtk +extensions = bob.buildout + mr.developer +auto-checkout = * +develop = src/bob.extension + src/bob.blitz + src/bob.core + src/bob.io.base + src/bob.learn.activation + src/bob.math + src/bob.learn.linear + src/bob.sp + src/bob.learn.em + src/bob.measure + src/bob.db.verification.utils + src/bob.db.atnt + src/bob.io.image + . + ; options for bob.buildout debug = true verbose = true newest = false +[sources] +bob.extension = git https://github.com/bioidiap/bob.extension +bob.blitz = git https://github.com/bioidiap/bob.blitz +bob.core = git https://github.com/bioidiap/bob.core +bob.io.base = git https://github.com/bioidiap/bob.io.base +bob.learn.activation = git https://github.com/bioidiap/bob.learn.activation +bob.math = git https://github.com/bioidiap/bob.math +bob.sp = git https://github.com/bioidiap/bob.sp +bob.learn.linear = git https://github.com/bioidiap/bob.learn.linear +bob.learn.em = git https://github.com/bioidiap/bob.learn.em +bob.measure = git https://github.com/bioidiap/bob.measure +bob.db.verification.utils = git https://github.com/bioidiap/bob.db.verification.utils +bob.db.atnt = git https://github.com/bioidiap/bob.db.atnt +bob.io.image = git https://github.com/bioidiap/bob.io.image + [scripts] recipe = bob.buildout:scripts dependent-scripts = true diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..754f8e275ac6ab69cc7625511625710093fd2bb4 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,15 @@ +setuptools +bob.extension +bob.blitz +bob.core +bob.io.base +bob.learn.activation +bob.math +bob.learn.linear +bob.sp +bob.learn.em +bob.measure +bob.db.verification.utils +bob.db.atnt # for test purposes +bob.io.image # for test purposes +matplotlib # for plotting diff --git a/setup.py b/setup.py index fbe07a678b1c6bb135ded380f6f83cf6fc367071..f0d761575c7308fe296de8fd2af3c39fcc6b7fb6 100644 --- a/setup.py +++ b/setup.py @@ -33,7 +33,11 @@ # allows you to test your package with new python dependencies w/o requiring # administrative interventions. -from setuptools import setup, find_packages +from setuptools import setup, find_packages, dist +dist.Distribution(dict(setup_requires=['bob.extension'])) + +from bob.extension.utils import load_requirements +install_requires = load_requirements() # The only thing we do in this file is to call the setup() function with all # parameters that define our package. @@ -64,9 +68,7 @@ setup( # on the current system will be installed locally and only visible to the # scripts of this package. Don't worry - You won't need administrative # privileges when using buildout. - install_requires = [ - 'setuptools', - ], + install_requires = install_requires, # Your project should be called something like 'bob.<foo>' or # 'bob.<foo>.<bar>'. To implement this correctly and still get all your @@ -122,7 +124,9 @@ setup( 'dummy = bob.bio.base.test.dummy.algorithm:algorithm', # for test purposes only 'pca = bob.bio.base.config.algorithm.pca:algorithm', 'lda = bob.bio.base.config.algorithm.lda:algorithm', - 'pca+lda = bob.bio.base.config.algorithm.lda:algorithm', + 'pca+lda = bob.bio.base.config.algorithm.pca_lda:algorithm', + 'plda = bob.bio.base.config.algorithm.plda:algorithm', + 'pca+plda = bob.bio.base.config.algorithm.pca_plda:algorithm', 'bic = bob.bio.base.config.algorithm.bic:algorithm', ],