From f41083f8e8fda7dd8310f1795e2ff30f8f5e38d6 Mon Sep 17 00:00:00 2001
From: Manuel Gunther <siebenkopf@googlemail.com>
Date: Thu, 11 Feb 2016 09:30:54 -0700
Subject: [PATCH] Added an algorithm that applies a simple distance function

---
 bob/bio/base/algorithm/Distance.py            | 110 ++++++++++++++++++
 bob/bio/base/algorithm/__init__.py            |   1 +
 .../base/config/algorithm/distance_cosine.py  |   9 ++
 .../config/algorithm/distance_euclidean.py    |   9 ++
 bob/bio/base/test/test_algorithms.py          |  38 +++---
 setup.py                                      |   2 +
 version.txt                                   |   2 +-
 7 files changed, 156 insertions(+), 15 deletions(-)
 create mode 100644 bob/bio/base/algorithm/Distance.py
 create mode 100644 bob/bio/base/config/algorithm/distance_cosine.py
 create mode 100644 bob/bio/base/config/algorithm/distance_euclidean.py

diff --git a/bob/bio/base/algorithm/Distance.py b/bob/bio/base/algorithm/Distance.py
new file mode 100644
index 00000000..10291337
--- /dev/null
+++ b/bob/bio/base/algorithm/Distance.py
@@ -0,0 +1,110 @@
+#!/usr/bin/env python
+# vim: set fileencoding=utf-8 :
+# Manuel Gunther <siebenkopf@googlemail.com>
+
+import numpy
+import scipy.spatial
+
+from .Algorithm import Algorithm
+
+import logging
+logger = logging.getLogger("bob.bio.base")
+
+class Distance (Algorithm):
+  """This class defines a simple distance measure between two features.
+  Independent of the actual shape, each feature vector is treated as a one-dimensional vector, and the specified distance function is used to compute the distance between the two features.
+  If the given ``distance_function`` actually computes a distance, we simply return its negative value (as all :py:class:`Algorithm`'s are supposed to return similarity values).
+  If the ``distance_function`` computes similarities, the similarity value is returned unaltered.
+
+  **Parameters:**
+
+  ``distance_function`` : callable
+    A function taking two 1D arrays and returning a ``float``
+
+  ``is_distance_function`` : bool
+    Set this flag to ``False`` if the given ``distance_function`` computes a similarity value (i.e., higher values are better)
+
+  ``kwargs`` : ``key=value`` pairs
+    A list of keyword arguments directly passed to the :py:class:`Algorithm` base class constructor.
+  """
+
+  def __init__(
+      self,
+      distance_function = scipy.spatial.distance.euclidean,
+      is_distance_function = True,
+      **kwargs  # parameters directly sent to the base class
+  ):
+
+    # call base class constructor and register that the algorithm performs a projection
+    Algorithm.__init__(
+        self,
+        distance_function = str(distance_function),
+        is_distance_function = is_distance_function,
+
+        **kwargs
+    )
+
+    self.distance_function = distance_function
+    self.factor = -1. if is_distance_function else 1.
+
+  def _check_feature(self, feature):
+    """Checks that the features are appropriate"""
+    if not isinstance(feature, numpy.ndarray):
+      raise ValueError("The given feature should be of type numpy.ndarray")
+
+  def enroll(self, enroll_features):
+    """enroll(enroll_features) -> model
+
+    Enrolls the model by storing all given input vectors.
+
+    **Parameters:**
+
+    ``enroll_features`` : [:py:class:`numpy.ndarray`]
+      The list of projected features to enroll the model from.
+
+    **Returns:**
+
+    ``model`` : 2D :py:class:`numpy.ndarray`
+      The enrolled model.
+    """
+    assert len(enroll_features)
+    [self._check_feature(feature) for feature in enroll_features]
+    # just store all the features
+    return numpy.vstack([f.flatten() for f in enroll_features])
+
+  def score(self, model, probe):
+    """score(model, probe) -> float
+
+    Computes the distance of the model to the probe using the distance function specified in the constructor.
+
+    **Parameters:**
+
+    ``model`` : 2D :py:class:`numpy.ndarray`
+      The model storing all enrollment features
+
+    ``probe`` : :py:class:`numpy.ndarray`
+      The probe feature vector
+
+    **Returns:**
+
+    ``score`` : float
+      A similarity value between ``model`` and ``probe``
+    """
+    self._check_feature(probe)
+    probe = probe.flatten()
+    # return the negative distance (as a similarity measure)
+    if model.ndim == 2:
+      # we have multiple models, so we use the multiple model scoring
+      return self.score_for_multiple_models(model, probe)
+    else:
+      # single model, single probe (multiple probes have already been handled)
+      return self.factor * self.distance_function(model, probe)
+
+  # re-define unused functions, just so that they do not get documented
+  def train_projector(*args,**kwargs): raise NotImplementedError()
+  def load_projector(*args,**kwargs): pass
+  def project(*args,**kwargs): raise NotImplementedError()
+  def write_feature(*args,**kwargs): raise NotImplementedError()
+  def read_feature(*args,**kwargs): raise NotImplementedError()
+  def train_enroller(*args,**kwargs): raise NotImplementedError()
+  def load_enroller(*args,**kwargs): pass
diff --git a/bob/bio/base/algorithm/__init__.py b/bob/bio/base/algorithm/__init__.py
index e1bc1ad9..015cf9e6 100644
--- a/bob/bio/base/algorithm/__init__.py
+++ b/bob/bio/base/algorithm/__init__.py
@@ -1,4 +1,5 @@
 from .Algorithm import Algorithm
+from .Distance import Distance
 from .PCA import PCA
 from .LDA import LDA
 from .PLDA import PLDA
diff --git a/bob/bio/base/config/algorithm/distance_cosine.py b/bob/bio/base/config/algorithm/distance_cosine.py
new file mode 100644
index 00000000..00d82ea8
--- /dev/null
+++ b/bob/bio/base/config/algorithm/distance_cosine.py
@@ -0,0 +1,9 @@
+#!/usr/bin/env python
+
+import bob.bio.base
+import scipy.spatial
+
+algorithm = bob.bio.base.algorithm.Distance(
+    distance_function = scipy.spatial.distance.cosine,
+    is_distance_function = True
+)
diff --git a/bob/bio/base/config/algorithm/distance_euclidean.py b/bob/bio/base/config/algorithm/distance_euclidean.py
new file mode 100644
index 00000000..3ab5422d
--- /dev/null
+++ b/bob/bio/base/config/algorithm/distance_euclidean.py
@@ -0,0 +1,9 @@
+#!/usr/bin/env python
+
+import bob.bio.base
+import scipy.spatial
+
+algorithm = bob.bio.base.algorithm.Distance(
+    distance_function = scipy.spatial.distance.euclidean,
+    is_distance_function = True
+)
diff --git a/bob/bio/base/test/test_algorithms.py b/bob/bio/base/test/test_algorithms.py
index ffb872a5..66fd5881 100644
--- a/bob/bio/base/test/test_algorithms.py
+++ b/bob/bio/base/test/test_algorithms.py
@@ -45,20 +45,30 @@ def _compare(data, reference, write_function = bob.bio.base.save, read_function
   assert numpy.allclose(data, read_function(reference), atol=1e-5)
 
 
-def _gmm_stats(self, feature_file, count = 50, minimum = 0, maximum = 1):
-  # generate a random sequence of GMM-Stats features
-  numpy.random.seed(42)
-  train_set = []
-  f = bob.io.base.HDF5File(feature_file)
-  for i in range(count):
-    per_id = []
-    for j in range(count):
-      gmm_stats = bob.learn.em.GMMStats(f)
-      gmm_stats.sum_px = numpy.random.random(gmm_stats.sum_px.shape) * (maximum - minimum) + minimum
-      gmm_stats.sum_pxx = numpy.random.random(gmm_stats.sum_pxx.shape) * (maximum - minimum) + minimum
-      per_id.append(gmm_stats)
-    train_set.append(per_id)
-  return train_set
+def test_distance():
+  # test the two registered distance functions
+
+  # euclidean distance
+  euclidean = bob.bio.base.load_resource("distance-euclidean", "algorithm", preferred_package = 'bob.bio.base')
+  assert isinstance(euclidean, bob.bio.base.algorithm.Distance)
+  assert isinstance(euclidean, bob.bio.base.algorithm.Algorithm)
+  assert not euclidean.performs_projection
+  assert not euclidean.requires_projector_training
+  assert not euclidean.use_projected_features_for_enrollment
+  assert not euclidean.split_training_features_by_client
+  assert not euclidean.requires_enroller_training
+
+  # test distance computation
+  f1 = numpy.ones((20,10), numpy.float64)
+  f2 = numpy.ones((20,10), numpy.float64) * 2.
+
+  model = euclidean.enroll([f1, f1])
+  assert abs(euclidean.score_for_multiple_probes(model, [f2, f2]) + math.sqrt(200.)) < 1e-6, euclidean.score_for_multiple_probes(model, [f2, f2])
+
+  # test cosine distance
+  cosine = bob.bio.base.load_resource("distance-cosine", "algorithm", preferred_package = 'bob.bio.base')
+  model = cosine.enroll([f1, f1])
+  assert abs(cosine.score_for_multiple_probes(model, [f2, f2])) < 1e-8, cosine.score_for_multiple_probes(model, [f2, f2])
 
 
 def test_pca():
diff --git a/setup.py b/setup.py
index 26e9c57d..df866752 100644
--- a/setup.py
+++ b/setup.py
@@ -127,6 +127,8 @@ setup(
 
       'bob.bio.algorithm': [
         'dummy             = bob.bio.base.test.dummy.algorithm:algorithm', # for test purposes only
+        'distance-euclidean = bob.bio.base.config.algorithm.distance_euclidean:algorithm',
+        'distance-cosine   = bob.bio.base.config.algorithm.distance_cosine:algorithm',
         'pca               = bob.bio.base.config.algorithm.pca:algorithm',
         'lda               = bob.bio.base.config.algorithm.lda:algorithm',
         'pca+lda           = bob.bio.base.config.algorithm.pca_lda:algorithm',
diff --git a/version.txt b/version.txt
index 4b20305a..13cb7641 100644
--- a/version.txt
+++ b/version.txt
@@ -1 +1 @@
-2.0.5b0
\ No newline at end of file
+2.0.5b1
-- 
GitLab