From f41083f8e8fda7dd8310f1795e2ff30f8f5e38d6 Mon Sep 17 00:00:00 2001
From: Manuel Gunther <>
Date: Thu, 11 Feb 2016 09:30:54 -0700
Subject: [PATCH] Added an algorithm that applies a simple distance function

 bob/bio/base/algorithm/            | 110 ++++++++++++++++++
 bob/bio/base/algorithm/            |   1 +
 .../base/config/algorithm/  |   9 ++
 .../config/algorithm/    |   9 ++
 bob/bio/base/test/          |  38 +++---                                      |   2 +
 version.txt                                   |   2 +-
 7 files changed, 156 insertions(+), 15 deletions(-)
 create mode 100644 bob/bio/base/algorithm/
 create mode 100644 bob/bio/base/config/algorithm/
 create mode 100644 bob/bio/base/config/algorithm/

diff --git a/bob/bio/base/algorithm/ b/bob/bio/base/algorithm/
new file mode 100644
index 00000000..10291337
--- /dev/null
+++ b/bob/bio/base/algorithm/
@@ -0,0 +1,110 @@
+#!/usr/bin/env python
+# vim: set fileencoding=utf-8 :
+# Manuel Gunther <>
+import numpy
+import scipy.spatial
+from .Algorithm import Algorithm
+import logging
+logger = logging.getLogger("")
+class Distance (Algorithm):
+  """This class defines a simple distance measure between two features.
+  Independent of the actual shape, each feature vector is treated as a one-dimensional vector, and the specified distance function is used to compute the distance between the two features.
+  If the given ``distance_function`` actually computes a distance, we simply return its negative value (as all :py:class:`Algorithm`'s are supposed to return similarity values).
+  If the ``distance_function`` computes similarities, the similarity value is returned unaltered.
+  **Parameters:**
+  ``distance_function`` : callable
+    A function taking two 1D arrays and returning a ``float``
+  ``is_distance_function`` : bool
+    Set this flag to ``False`` if the given ``distance_function`` computes a similarity value (i.e., higher values are better)
+  ``kwargs`` : ``key=value`` pairs
+    A list of keyword arguments directly passed to the :py:class:`Algorithm` base class constructor.
+  """
+  def __init__(
+      self,
+      distance_function = scipy.spatial.distance.euclidean,
+      is_distance_function = True,
+      **kwargs  # parameters directly sent to the base class
+  ):
+    # call base class constructor and register that the algorithm performs a projection
+    Algorithm.__init__(
+        self,
+        distance_function = str(distance_function),
+        is_distance_function = is_distance_function,
+        **kwargs
+    )
+    self.distance_function = distance_function
+    self.factor = -1. if is_distance_function else 1.
+  def _check_feature(self, feature):
+    """Checks that the features are appropriate"""
+    if not isinstance(feature, numpy.ndarray):
+      raise ValueError("The given feature should be of type numpy.ndarray")
+  def enroll(self, enroll_features):
+    """enroll(enroll_features) -> model
+    Enrolls the model by storing all given input vectors.
+    **Parameters:**
+    ``enroll_features`` : [:py:class:`numpy.ndarray`]
+      The list of projected features to enroll the model from.
+    **Returns:**
+    ``model`` : 2D :py:class:`numpy.ndarray`
+      The enrolled model.
+    """
+    assert len(enroll_features)
+    [self._check_feature(feature) for feature in enroll_features]
+    # just store all the features
+    return numpy.vstack([f.flatten() for f in enroll_features])
+  def score(self, model, probe):
+    """score(model, probe) -> float
+    Computes the distance of the model to the probe using the distance function specified in the constructor.
+    **Parameters:**
+    ``model`` : 2D :py:class:`numpy.ndarray`
+      The model storing all enrollment features
+    ``probe`` : :py:class:`numpy.ndarray`
+      The probe feature vector
+    **Returns:**
+    ``score`` : float
+      A similarity value between ``model`` and ``probe``
+    """
+    self._check_feature(probe)
+    probe = probe.flatten()
+    # return the negative distance (as a similarity measure)
+    if model.ndim == 2:
+      # we have multiple models, so we use the multiple model scoring
+      return self.score_for_multiple_models(model, probe)
+    else:
+      # single model, single probe (multiple probes have already been handled)
+      return self.factor * self.distance_function(model, probe)
+  # re-define unused functions, just so that they do not get documented
+  def train_projector(*args,**kwargs): raise NotImplementedError()
+  def load_projector(*args,**kwargs): pass
+  def project(*args,**kwargs): raise NotImplementedError()
+  def write_feature(*args,**kwargs): raise NotImplementedError()
+  def read_feature(*args,**kwargs): raise NotImplementedError()
+  def train_enroller(*args,**kwargs): raise NotImplementedError()
+  def load_enroller(*args,**kwargs): pass
diff --git a/bob/bio/base/algorithm/ b/bob/bio/base/algorithm/
index e1bc1ad9..015cf9e6 100644
--- a/bob/bio/base/algorithm/
+++ b/bob/bio/base/algorithm/
@@ -1,4 +1,5 @@
 from .Algorithm import Algorithm
+from .Distance import Distance
 from .PCA import PCA
 from .LDA import LDA
 from .PLDA import PLDA
diff --git a/bob/bio/base/config/algorithm/ b/bob/bio/base/config/algorithm/
new file mode 100644
index 00000000..00d82ea8
--- /dev/null
+++ b/bob/bio/base/config/algorithm/
@@ -0,0 +1,9 @@
+#!/usr/bin/env python
+import scipy.spatial
+algorithm =
+    distance_function = scipy.spatial.distance.cosine,
+    is_distance_function = True
diff --git a/bob/bio/base/config/algorithm/ b/bob/bio/base/config/algorithm/
new file mode 100644
index 00000000..3ab5422d
--- /dev/null
+++ b/bob/bio/base/config/algorithm/
@@ -0,0 +1,9 @@
+#!/usr/bin/env python
+import scipy.spatial
+algorithm =
+    distance_function = scipy.spatial.distance.euclidean,
+    is_distance_function = True
diff --git a/bob/bio/base/test/ b/bob/bio/base/test/
index ffb872a5..66fd5881 100644
--- a/bob/bio/base/test/
+++ b/bob/bio/base/test/
@@ -45,20 +45,30 @@ def _compare(data, reference, write_function =, read_function
   assert numpy.allclose(data, read_function(reference), atol=1e-5)
-def _gmm_stats(self, feature_file, count = 50, minimum = 0, maximum = 1):
-  # generate a random sequence of GMM-Stats features
-  numpy.random.seed(42)
-  train_set = []
-  f =
-  for i in range(count):
-    per_id = []
-    for j in range(count):
-      gmm_stats = bob.learn.em.GMMStats(f)
-      gmm_stats.sum_px = numpy.random.random(gmm_stats.sum_px.shape) * (maximum - minimum) + minimum
-      gmm_stats.sum_pxx = numpy.random.random(gmm_stats.sum_pxx.shape) * (maximum - minimum) + minimum
-      per_id.append(gmm_stats)
-    train_set.append(per_id)
-  return train_set
+def test_distance():
+  # test the two registered distance functions
+  # euclidean distance
+  euclidean ="distance-euclidean", "algorithm", preferred_package = '')
+  assert isinstance(euclidean,
+  assert isinstance(euclidean,
+  assert not euclidean.performs_projection
+  assert not euclidean.requires_projector_training
+  assert not euclidean.use_projected_features_for_enrollment
+  assert not euclidean.split_training_features_by_client
+  assert not euclidean.requires_enroller_training
+  # test distance computation
+  f1 = numpy.ones((20,10), numpy.float64)
+  f2 = numpy.ones((20,10), numpy.float64) * 2.
+  model = euclidean.enroll([f1, f1])
+  assert abs(euclidean.score_for_multiple_probes(model, [f2, f2]) + math.sqrt(200.)) < 1e-6, euclidean.score_for_multiple_probes(model, [f2, f2])
+  # test cosine distance
+  cosine ="distance-cosine", "algorithm", preferred_package = '')
+  model = cosine.enroll([f1, f1])
+  assert abs(cosine.score_for_multiple_probes(model, [f2, f2])) < 1e-8, cosine.score_for_multiple_probes(model, [f2, f2])
 def test_pca():
diff --git a/ b/
index 26e9c57d..df866752 100644
--- a/
+++ b/
@@ -127,6 +127,8 @@ setup(
       '': [
         'dummy             =', # for test purposes only
+        'distance-euclidean =',
+        'distance-cosine   =',
         'pca               =',
         'lda               =',
         'pca+lda           =',
diff --git a/version.txt b/version.txt
index 4b20305a..13cb7641 100644
--- a/version.txt
+++ b/version.txt
@@ -1 +1 @@
\ No newline at end of file