Added BIC algorithm

0e0586d8 · Manuel Günther · be67d5a2 · 0e0586d8 · 0e0586d8 · 0e0586d8
Commit 0e0586d8 authored 9 years ago by Manuel Günther
--- a/bob/bio/base/algorithm/BIC.py
+++ b/bob/bio/base/algorithm/BIC.py
+#!/usr/bin/env python
+# vim: set fileencoding=utf-8 :
+# Manuel Guenther <Manuel.Guenther@idiap.ch>
+
+import bob.io.base
+import bob.learn.linear
+
+import numpy
+import math
+
+from .Algorithm import Algorithm
+from .. import utils
+
+import logging
+logger = logging.getLogger("bob.bio.base")
+
+class BIC (Algorithm):
+  """Computes the Intrapersonal/Extrapersonal classifier using a generic feature type and feature comparison function"""
+
+  def __init__(
+      self,
+      comparison_function, # the function to be used to compare two features; this highly depends on the type of features that are used
+      maximum_training_pair_count = None,  # if set, limit the number of training pairs to the given number in a non-random manner
+      subspace_dimensions = None, # if set as a pair (intra_dim, extra_dim), PCA subspace truncation for the two classes is performed
+      uses_dffs = False, # use the distance from feature space; only valid when PCA truncation is enabled; WARNING: uses this flag with care
+      read_function = utils.load,
+      write_function = utils.save,
+      **kwargs # parameters directly sent to the base class
+  ):
+
+    # call base class function and register that this tool requires training for the enrollment
+    Algorithm.__init__(
+        self,
+        requires_enroller_training = True,
+
+        comparison_function = str(comparison_function),
+        maximum_training_pair_count = maximum_training_pair_count,
+        subspace_dimensions = subspace_dimensions,
+        uses_dffs = uses_dffs,
+        read_function=str(read_function),
+        write_function=str(write_function),
+
+        **kwargs
+    )
+
+    # set up the BIC tool
+    self.comparison_function = comparison_function
+    self.read_function = read_function
+    self.write_function = write_function
+    self.maximum_pair_count = maximum_training_pair_count
+    self.use_dffs = uses_dffs
+    if subspace_dimensions is not None:
+      self.M_I = subspace_dimensions[0]
+      self.M_E = subspace_dimensions[1]
+      self.bic_machine = bob.learn.linear.BICMachine(self.use_dffs)
+    else:
+      self.bic_machine = bob.learn.linear.BICMachine(False)
+      self.M_I = None
+      self.M_E = None
+
+
+  def _sqr(self, x):
+    return x*x
+
+
+  def _trainset_for(self, pairs):
+    """Computes the array containing the comparison results for the given set of image pairs."""
+    return numpy.vstack([self.comparison_function(f1, f2) for (f1, f2) in pairs])
+
+
+  def train_enroller(self, train_features, enroller_file):
+    """Trains the IEC Tool, i.e., computes intrapersonal and extrapersonal subspaces"""
+
+    # compute intrapersonal and extrapersonal pairs
+    logger.info("  -> Computing pairs")
+    intra_pairs, extra_pairs = bob.learn.linear.bic_intra_extra_pairs(train_features)
+    # limit pairs, if desired
+    if self.maximum_pair_count is not None:
+      if len(intra_pairs) > self.maximum_pair_count:
+        logger.info("  -> Limiting intrapersonal pairs from %d to %d" %(len(intra_pairs), self.maximum_pair_count))
+        intra_pairs = utils.selected_elements(intra_pairs, self.maximum_pair_count)
+      if len(extra_pairs) > self.maximum_pair_count:
+        logger.info("  -> Limiting extrapersonal pairs from %d to %d" %(len(extra_pairs), self.maximum_pair_count))
+        extra_pairs = utils.selected_elements(extra_pairs, self.maximum_pair_count)
+
+
+    # train the BIC Machine with these pairs
+    logger.info("  -> Computing %d intrapersonal results", len(intra_pairs))
+    intra_vectors = self._trainset_for(intra_pairs)
+    logger.info("  -> Computing %d extrapersonal results", len(extra_pairs))
+    extra_vectors = self._trainset_for(extra_pairs)
+
+    logger.info("  -> Training BIC machine")
+    trainer = bob.learn.linear.BICTrainer(self.M_I, self.M_E) if self.M_I is not None else bob.learn.linear.BICTrainer()
+    trainer.train(intra_vectors, extra_vectors, self.bic_machine)
+
+    # save the machine to file
+    self.bic_machine.save(bob.io.base.HDF5File(enroller_file, 'w'))
+
+
+  def load_enroller(self, enroller_file):
+    """Reads the intrapersonal and extrapersonal mean and variance values"""
+    self.bic_machine.load(bob.io.base.HDF5File(enroller_file, 'r'))
+    # to set this should not be required, but just in case
+    # you re-use a trained enroller file that hat different setup of use_DFFS
+    self.bic_machine.use_DFFS = self.use_dffs
+
+
+  def enroll(self, enroll_features):
+    """Enrolls features by concatenating them"""
+    return enroll_features
+
+
+  def write_model(self, model, model_file):
+    """Writes all features of the model into one HDF5 file, using the ``save_function`` specified in the constructor."""
+    hdf5 = bob.io.base.HDF5File(model_file, "w")
+    for i, f in enumerate(model):
+      hdf5.create_group("Feature%d" % i)
+      hdf5.cd("Feature%d" % i)
+      self.write_function(f, hdf5)
+      hdf5.cd("..")
+
+
+  def read_model(self, model_file):
+    """Loads all features of the model from the HDF5 file, using the ``load_function`` specified in the constructor."""
+    hdf5 = bob.io.base.HDF5File(model_file)
+    i = 0
+    model = []
+    while hdf5.has_group("Feature%d" % i):
+      hdf5.cd("Feature%d" % i)
+      model.append(self.read_function(hdf5))
+      hdf5.cd("..")
+      i += 1
+    return model
+
+
+  def read_probe(self, probe_file):
+    """Loads the probe feature from file, using the ``load_function`` specified in the constructor."""
+    return self.load_function(bob.io.base.HDF5File(probe_file))
+
+
+  def score(self, model, probe):
+    """Computes the IEC score for the given model and probe pair"""
+    # compute average score for the models
+    scores = []
+    for i in range(len(model)):
+      diff = self.comparison_function(model[i], probe)
+      assert len(diff) == self.bic_machine.input_size
+      scores.append(self.bic_machine(diff))
+    return self.model_fusion_function(scores)
--- a/bob/bio/base/algorithm/__init__.py
+++ b/bob/bio/base/algorithm/__init__.py
 from .Algorithm import Algorithm
 from .PCA import PCA
 from .LDA import LDA
+from .BIC import BIC
--- a/bob/bio/base/config/algorithm/bic.py
+++ b/bob/bio/base/config/algorithm/bic.py
+#!/usr/bin/env python
+
+import bob.bio.base
+import numpy
+
+algorithm = bob.bio.base.algorithm.BIC(
+  # Distance measure to compare two features in input space
+  comparison_function = numpy.subtract,
+  # Limit the number of training pairs
+  maximum_training_pair_count = 10000,
+  # Dimensions of intrapersonal and extrapersonal subspaces
+  subspace_dimensions = (30, 30)
+)
--- a/bob/bio/base/test/data/bic_enroller.hdf5
+++ b/bob/bio/base/test/data/bic_enroller.hdf5
--- a/bob/bio/base/test/data/bic_model.hdf5
+++ b/bob/bio/base/test/data/bic_model.hdf5
--- a/bob/bio/base/test/data/iec_enroller.hdf5
+++ b/bob/bio/base/test/data/iec_enroller.hdf5
--- a/bob/bio/base/test/test_algorithms.py
+++ b/bob/bio/base/test/test_algorithms.py
@@ -209,6 +209,76 @@ def test_lda():
    if os.path.exists(temp_file): os.remove(temp_file)


+
+def test_bic():
+  temp_file = bob.io.base.test_utils.temporary_filename()
+  # assure that the configurations are loadable
+  bic1 = bob.bio.base.load_resource("bic", "algorithm")
+  assert isinstance(bic1, bob.bio.base.algorithm.BIC)
+  assert isinstance(bic1, bob.bio.base.algorithm.Algorithm)
+
+  assert not bic1.performs_projection
+  assert not bic1.requires_projector_training
+  assert not bic1.use_projected_features_for_enrollment
+  assert bic1.requires_enroller_training
+
+
+  # create random training set
+  train_set = utils.random_training_set_by_id(200, count=10, minimum=0., maximum=255.)
+  # train the enroller
+  bic2 = bob.bio.base.algorithm.BIC(numpy.subtract, 100, (5,7))
+  reference_file = pkg_resources.resource_filename('bob.bio.base.test', 'data/bic_enroller.hdf5')
+  try:
+    # train enroller
+    bic2.train_enroller(train_set, temp_file)
+    assert os.path.exists(temp_file)
+
+    if regenerate_refs: shutil.copy(temp_file, reference_file)
+
+    # check projection matrix
+    bic1.load_enroller(reference_file)
+    bic2.load_enroller(temp_file)
+
+    assert bic1.bic_machine.is_similar_to(bic2.bic_machine)
+  finally:
+    if os.path.exists(temp_file): os.remove(temp_file)
+
+  # enroll model from random features
+  enroll = utils.random_training_set(200, 5, 0., 255., seed=21)
+  model = bic1.enroll(enroll)
+  _compare(model, pkg_resources.resource_filename('bob.bio.base.test', 'data/bic_model.hdf5'), bic1.write_model, bic1.read_model)
+
+  # compare model with probe
+  probe = utils.random_array(200, 0., 255., seed=84)
+  reference_score = 0.04994252
+  assert abs(bic1.score(model, probe) - reference_score) < 1e-5, "The scores differ: %3.8f, %3.8f" % (bic1.score(model, probe), reference_score)
+  assert abs(bic1.score_for_multiple_probes(model, [probe, probe]) - reference_score) < 1e-5
+
+  # the same for the IEC
+  bic3 = bob.bio.base.algorithm.BIC(numpy.subtract, 100)
+  reference_file = pkg_resources.resource_filename('bob.bio.base.test', 'data/iec_enroller.hdf5')
+  try:
+    # train enroller
+    bic3.train_enroller(train_set, temp_file)
+    assert os.path.exists(temp_file)
+
+    if regenerate_refs: shutil.copy(temp_file, reference_file)
+
+    # check projection matrix
+    bic1.load_enroller(reference_file)
+    bic3.load_enroller(temp_file)
+
+    assert bic1.bic_machine.is_similar_to(bic3.bic_machine)
+  finally:
+    if os.path.exists(temp_file): os.remove(temp_file)
+
+  # compare model with probe
+  probe = utils.random_array(200, 0., 255., seed=84)
+  reference_score = 0.18119139
+  assert abs(bic1.score(model, probe) - reference_score) < 1e-5, "The scores differ: %3.8f, %3.8f" % (bic1.score(model, probe), reference_score)
+  assert abs(bic1.score_for_multiple_probes(model, [probe, probe]) - reference_score) < 1e-5
+
+
 """
  def test01_gabor_jet(self):
    # read input
@@ -277,59 +347,6 @@ def test_lda():



-  def test05_bic(self):
-    # read input
-    feature = facereclib.utils.load(self.input_dir('linearize.hdf5'))
-    # check that the config file is readable
-    tool = self.config('bic')
-    self.assertTrue(isinstance(tool, facereclib.tools.BIC))
-
-    # here, we use a reduced complexity for test purposes
-    tool = facereclib.tools.BIC(numpy.subtract, 100, (5,7))
-    self.assertFalse(tool.performs_projection)
-    self.assertTrue(tool.requires_enroller_training)
-
-    # train the enroller
-    t = tempfile.mkstemp('bic.hdf5', prefix='frltest_')[1]
-    tool.train_enroller(facereclib.utils.tests.random_training_set_by_id(feature.shape, count=10, minimum=0., maximum=255.), t)
-    if regenerate_refs:
-      import shutil
-      shutil.copy2(t, self.reference_dir('bic_enroller.hdf5'))
-
-    # load the projector file
-    tool.load_enroller(self.reference_dir('bic_enroller.hdf5'))
-    # compare the resulting machines
-    new_machine = bob.learn.linear.BICMachine(bob.io.base.HDF5File(t))
-    self.assertTrue(tool.m_bic_machine.is_similar_to(new_machine))
-    os.remove(t)
-
-    # enroll model
-    model = tool.enroll([feature])
-    self.compare(model, 'bic_model.hdf5')
-
-    # score and compare to the weird reference score ...
-    sim = tool.score(model, feature)
-    self.assertAlmostEqual(sim, 0.31276072)
-
-    # now, test without PCA
-    tool = facereclib.tools.BIC(numpy.subtract, 100)
-    # train the enroller
-    t = tempfile.mkstemp('iec.hdf5', prefix='frltest_')[1]
-    tool.train_enroller(facereclib.utils.tests.random_training_set_by_id(feature.shape, count=10, minimum=0., maximum=255.), t)
-    if regenerate_refs:
-      import shutil
-      shutil.copy2(t, self.reference_dir('iec_enroller.hdf5'))
-
-    # load the projector file
-    tool.load_enroller(self.reference_dir('iec_enroller.hdf5'))
-    # compare the resulting machines
-    new_machine = bob.learn.linear.BICMachine(bob.io.base.HDF5File(t))
-    self.assertTrue(tool.m_bic_machine.is_similar_to(new_machine))
-    os.remove(t)
-
-    # score and compare to the weird reference score ...
-    sim = tool.score(model, feature)
-    self.assertAlmostEqual(sim, 0.4070329180)


  def test06_gmm(self):

--- a/setup.py
+++ b/setup.py
@@ -123,6 +123,7 @@ setup(
        'pca               = bob.bio.base.config.algorithm.pca:algorithm',
        'lda               = bob.bio.base.config.algorithm.lda:algorithm',
        'pca+lda           = bob.bio.base.config.algorithm.lda:algorithm',
+        'bic               = bob.bio.base.config.algorithm.bic:algorithm',
      ],
   },