diff --git a/bob/bio/base/algorithm/BIC.py b/bob/bio/base/algorithm/BIC.py new file mode 100644 index 0000000000000000000000000000000000000000..ff05283400b56f7f9d90a62864f9664bbd708052 --- /dev/null +++ b/bob/bio/base/algorithm/BIC.py @@ -0,0 +1,150 @@ +#!/usr/bin/env python +# vim: set fileencoding=utf-8 : +# Manuel Guenther <Manuel.Guenther@idiap.ch> + +import bob.io.base +import bob.learn.linear + +import numpy +import math + +from .Algorithm import Algorithm +from .. import utils + +import logging +logger = logging.getLogger("bob.bio.base") + +class BIC (Algorithm): + """Computes the Intrapersonal/Extrapersonal classifier using a generic feature type and feature comparison function""" + + def __init__( + self, + comparison_function, # the function to be used to compare two features; this highly depends on the type of features that are used + maximum_training_pair_count = None, # if set, limit the number of training pairs to the given number in a non-random manner + subspace_dimensions = None, # if set as a pair (intra_dim, extra_dim), PCA subspace truncation for the two classes is performed + uses_dffs = False, # use the distance from feature space; only valid when PCA truncation is enabled; WARNING: uses this flag with care + read_function = utils.load, + write_function = utils.save, + **kwargs # parameters directly sent to the base class + ): + + # call base class function and register that this tool requires training for the enrollment + Algorithm.__init__( + self, + requires_enroller_training = True, + + comparison_function = str(comparison_function), + maximum_training_pair_count = maximum_training_pair_count, + subspace_dimensions = subspace_dimensions, + uses_dffs = uses_dffs, + read_function=str(read_function), + write_function=str(write_function), + + **kwargs + ) + + # set up the BIC tool + self.comparison_function = comparison_function + self.read_function = read_function + self.write_function = write_function + self.maximum_pair_count = maximum_training_pair_count + self.use_dffs = uses_dffs + if subspace_dimensions is not None: + self.M_I = subspace_dimensions[0] + self.M_E = subspace_dimensions[1] + self.bic_machine = bob.learn.linear.BICMachine(self.use_dffs) + else: + self.bic_machine = bob.learn.linear.BICMachine(False) + self.M_I = None + self.M_E = None + + + def _sqr(self, x): + return x*x + + + def _trainset_for(self, pairs): + """Computes the array containing the comparison results for the given set of image pairs.""" + return numpy.vstack([self.comparison_function(f1, f2) for (f1, f2) in pairs]) + + + def train_enroller(self, train_features, enroller_file): + """Trains the IEC Tool, i.e., computes intrapersonal and extrapersonal subspaces""" + + # compute intrapersonal and extrapersonal pairs + logger.info(" -> Computing pairs") + intra_pairs, extra_pairs = bob.learn.linear.bic_intra_extra_pairs(train_features) + # limit pairs, if desired + if self.maximum_pair_count is not None: + if len(intra_pairs) > self.maximum_pair_count: + logger.info(" -> Limiting intrapersonal pairs from %d to %d" %(len(intra_pairs), self.maximum_pair_count)) + intra_pairs = utils.selected_elements(intra_pairs, self.maximum_pair_count) + if len(extra_pairs) > self.maximum_pair_count: + logger.info(" -> Limiting extrapersonal pairs from %d to %d" %(len(extra_pairs), self.maximum_pair_count)) + extra_pairs = utils.selected_elements(extra_pairs, self.maximum_pair_count) + + + # train the BIC Machine with these pairs + logger.info(" -> Computing %d intrapersonal results", len(intra_pairs)) + intra_vectors = self._trainset_for(intra_pairs) + logger.info(" -> Computing %d extrapersonal results", len(extra_pairs)) + extra_vectors = self._trainset_for(extra_pairs) + + logger.info(" -> Training BIC machine") + trainer = bob.learn.linear.BICTrainer(self.M_I, self.M_E) if self.M_I is not None else bob.learn.linear.BICTrainer() + trainer.train(intra_vectors, extra_vectors, self.bic_machine) + + # save the machine to file + self.bic_machine.save(bob.io.base.HDF5File(enroller_file, 'w')) + + + def load_enroller(self, enroller_file): + """Reads the intrapersonal and extrapersonal mean and variance values""" + self.bic_machine.load(bob.io.base.HDF5File(enroller_file, 'r')) + # to set this should not be required, but just in case + # you re-use a trained enroller file that hat different setup of use_DFFS + self.bic_machine.use_DFFS = self.use_dffs + + + def enroll(self, enroll_features): + """Enrolls features by concatenating them""" + return enroll_features + + + def write_model(self, model, model_file): + """Writes all features of the model into one HDF5 file, using the ``save_function`` specified in the constructor.""" + hdf5 = bob.io.base.HDF5File(model_file, "w") + for i, f in enumerate(model): + hdf5.create_group("Feature%d" % i) + hdf5.cd("Feature%d" % i) + self.write_function(f, hdf5) + hdf5.cd("..") + + + def read_model(self, model_file): + """Loads all features of the model from the HDF5 file, using the ``load_function`` specified in the constructor.""" + hdf5 = bob.io.base.HDF5File(model_file) + i = 0 + model = [] + while hdf5.has_group("Feature%d" % i): + hdf5.cd("Feature%d" % i) + model.append(self.read_function(hdf5)) + hdf5.cd("..") + i += 1 + return model + + + def read_probe(self, probe_file): + """Loads the probe feature from file, using the ``load_function`` specified in the constructor.""" + return self.load_function(bob.io.base.HDF5File(probe_file)) + + + def score(self, model, probe): + """Computes the IEC score for the given model and probe pair""" + # compute average score for the models + scores = [] + for i in range(len(model)): + diff = self.comparison_function(model[i], probe) + assert len(diff) == self.bic_machine.input_size + scores.append(self.bic_machine(diff)) + return self.model_fusion_function(scores) diff --git a/bob/bio/base/algorithm/__init__.py b/bob/bio/base/algorithm/__init__.py index 0866bd1e1cae8c8149e479e2618ae278a1771a58..d22a30f2cd0dffb2edfe3a619f8d55d4f82f17f0 100644 --- a/bob/bio/base/algorithm/__init__.py +++ b/bob/bio/base/algorithm/__init__.py @@ -1,3 +1,4 @@ from .Algorithm import Algorithm from .PCA import PCA from .LDA import LDA +from .BIC import BIC diff --git a/bob/bio/base/config/algorithm/bic.py b/bob/bio/base/config/algorithm/bic.py new file mode 100644 index 0000000000000000000000000000000000000000..cdfda84a5493c01f925bf166b8050918c53140ea --- /dev/null +++ b/bob/bio/base/config/algorithm/bic.py @@ -0,0 +1,13 @@ +#!/usr/bin/env python + +import bob.bio.base +import numpy + +algorithm = bob.bio.base.algorithm.BIC( + # Distance measure to compare two features in input space + comparison_function = numpy.subtract, + # Limit the number of training pairs + maximum_training_pair_count = 10000, + # Dimensions of intrapersonal and extrapersonal subspaces + subspace_dimensions = (30, 30) +) diff --git a/bob/bio/base/test/data/bic_enroller.hdf5 b/bob/bio/base/test/data/bic_enroller.hdf5 new file mode 100644 index 0000000000000000000000000000000000000000..3dada5a66ce8d314d52d457ed812e856c06a87c5 Binary files /dev/null and b/bob/bio/base/test/data/bic_enroller.hdf5 differ diff --git a/bob/bio/base/test/data/bic_model.hdf5 b/bob/bio/base/test/data/bic_model.hdf5 new file mode 100644 index 0000000000000000000000000000000000000000..f465dfb69061f4ca50f375722ab0bd9e38b22683 Binary files /dev/null and b/bob/bio/base/test/data/bic_model.hdf5 differ diff --git a/bob/bio/base/test/data/iec_enroller.hdf5 b/bob/bio/base/test/data/iec_enroller.hdf5 new file mode 100644 index 0000000000000000000000000000000000000000..a51d341f414618d3b23c587fc8cec4115ddacd8d Binary files /dev/null and b/bob/bio/base/test/data/iec_enroller.hdf5 differ diff --git a/bob/bio/base/test/test_algorithms.py b/bob/bio/base/test/test_algorithms.py index b9525276b405bb7b0f344ce877c48cb0be20763d..b732b1c9f1f7ece7cbc4d8caf9810cace25e1653 100644 --- a/bob/bio/base/test/test_algorithms.py +++ b/bob/bio/base/test/test_algorithms.py @@ -209,6 +209,76 @@ def test_lda(): if os.path.exists(temp_file): os.remove(temp_file) + +def test_bic(): + temp_file = bob.io.base.test_utils.temporary_filename() + # assure that the configurations are loadable + bic1 = bob.bio.base.load_resource("bic", "algorithm") + assert isinstance(bic1, bob.bio.base.algorithm.BIC) + assert isinstance(bic1, bob.bio.base.algorithm.Algorithm) + + assert not bic1.performs_projection + assert not bic1.requires_projector_training + assert not bic1.use_projected_features_for_enrollment + assert bic1.requires_enroller_training + + + # create random training set + train_set = utils.random_training_set_by_id(200, count=10, minimum=0., maximum=255.) + # train the enroller + bic2 = bob.bio.base.algorithm.BIC(numpy.subtract, 100, (5,7)) + reference_file = pkg_resources.resource_filename('bob.bio.base.test', 'data/bic_enroller.hdf5') + try: + # train enroller + bic2.train_enroller(train_set, temp_file) + assert os.path.exists(temp_file) + + if regenerate_refs: shutil.copy(temp_file, reference_file) + + # check projection matrix + bic1.load_enroller(reference_file) + bic2.load_enroller(temp_file) + + assert bic1.bic_machine.is_similar_to(bic2.bic_machine) + finally: + if os.path.exists(temp_file): os.remove(temp_file) + + # enroll model from random features + enroll = utils.random_training_set(200, 5, 0., 255., seed=21) + model = bic1.enroll(enroll) + _compare(model, pkg_resources.resource_filename('bob.bio.base.test', 'data/bic_model.hdf5'), bic1.write_model, bic1.read_model) + + # compare model with probe + probe = utils.random_array(200, 0., 255., seed=84) + reference_score = 0.04994252 + assert abs(bic1.score(model, probe) - reference_score) < 1e-5, "The scores differ: %3.8f, %3.8f" % (bic1.score(model, probe), reference_score) + assert abs(bic1.score_for_multiple_probes(model, [probe, probe]) - reference_score) < 1e-5 + + # the same for the IEC + bic3 = bob.bio.base.algorithm.BIC(numpy.subtract, 100) + reference_file = pkg_resources.resource_filename('bob.bio.base.test', 'data/iec_enroller.hdf5') + try: + # train enroller + bic3.train_enroller(train_set, temp_file) + assert os.path.exists(temp_file) + + if regenerate_refs: shutil.copy(temp_file, reference_file) + + # check projection matrix + bic1.load_enroller(reference_file) + bic3.load_enroller(temp_file) + + assert bic1.bic_machine.is_similar_to(bic3.bic_machine) + finally: + if os.path.exists(temp_file): os.remove(temp_file) + + # compare model with probe + probe = utils.random_array(200, 0., 255., seed=84) + reference_score = 0.18119139 + assert abs(bic1.score(model, probe) - reference_score) < 1e-5, "The scores differ: %3.8f, %3.8f" % (bic1.score(model, probe), reference_score) + assert abs(bic1.score_for_multiple_probes(model, [probe, probe]) - reference_score) < 1e-5 + + """ def test01_gabor_jet(self): # read input @@ -277,59 +347,6 @@ def test_lda(): - def test05_bic(self): - # read input - feature = facereclib.utils.load(self.input_dir('linearize.hdf5')) - # check that the config file is readable - tool = self.config('bic') - self.assertTrue(isinstance(tool, facereclib.tools.BIC)) - - # here, we use a reduced complexity for test purposes - tool = facereclib.tools.BIC(numpy.subtract, 100, (5,7)) - self.assertFalse(tool.performs_projection) - self.assertTrue(tool.requires_enroller_training) - - # train the enroller - t = tempfile.mkstemp('bic.hdf5', prefix='frltest_')[1] - tool.train_enroller(facereclib.utils.tests.random_training_set_by_id(feature.shape, count=10, minimum=0., maximum=255.), t) - if regenerate_refs: - import shutil - shutil.copy2(t, self.reference_dir('bic_enroller.hdf5')) - - # load the projector file - tool.load_enroller(self.reference_dir('bic_enroller.hdf5')) - # compare the resulting machines - new_machine = bob.learn.linear.BICMachine(bob.io.base.HDF5File(t)) - self.assertTrue(tool.m_bic_machine.is_similar_to(new_machine)) - os.remove(t) - - # enroll model - model = tool.enroll([feature]) - self.compare(model, 'bic_model.hdf5') - - # score and compare to the weird reference score ... - sim = tool.score(model, feature) - self.assertAlmostEqual(sim, 0.31276072) - - # now, test without PCA - tool = facereclib.tools.BIC(numpy.subtract, 100) - # train the enroller - t = tempfile.mkstemp('iec.hdf5', prefix='frltest_')[1] - tool.train_enroller(facereclib.utils.tests.random_training_set_by_id(feature.shape, count=10, minimum=0., maximum=255.), t) - if regenerate_refs: - import shutil - shutil.copy2(t, self.reference_dir('iec_enroller.hdf5')) - - # load the projector file - tool.load_enroller(self.reference_dir('iec_enroller.hdf5')) - # compare the resulting machines - new_machine = bob.learn.linear.BICMachine(bob.io.base.HDF5File(t)) - self.assertTrue(tool.m_bic_machine.is_similar_to(new_machine)) - os.remove(t) - - # score and compare to the weird reference score ... - sim = tool.score(model, feature) - self.assertAlmostEqual(sim, 0.4070329180) def test06_gmm(self): diff --git a/setup.py b/setup.py index 6a0ee5e782373b23d21c925ad5667f22e8338269..213ac20a467cc23a63fde2fef137dd6d2c8f201b 100644 --- a/setup.py +++ b/setup.py @@ -123,6 +123,7 @@ setup( 'pca = bob.bio.base.config.algorithm.pca:algorithm', 'lda = bob.bio.base.config.algorithm.lda:algorithm', 'pca+lda = bob.bio.base.config.algorithm.lda:algorithm', + 'bic = bob.bio.base.config.algorithm.bic:algorithm', ], },