Commit f41083f8 authored by Manuel Günther's avatar Manuel Günther
Browse files

Added an algorithm that applies a simple distance function

parent fb9964be
#!/usr/bin/env python
# vim: set fileencoding=utf-8 :
# Manuel Gunther <>
import numpy
import scipy.spatial
from .Algorithm import Algorithm
import logging
logger = logging.getLogger("")
class Distance (Algorithm):
"""This class defines a simple distance measure between two features.
Independent of the actual shape, each feature vector is treated as a one-dimensional vector, and the specified distance function is used to compute the distance between the two features.
If the given ``distance_function`` actually computes a distance, we simply return its negative value (as all :py:class:`Algorithm`'s are supposed to return similarity values).
If the ``distance_function`` computes similarities, the similarity value is returned unaltered.
``distance_function`` : callable
A function taking two 1D arrays and returning a ``float``
``is_distance_function`` : bool
Set this flag to ``False`` if the given ``distance_function`` computes a similarity value (i.e., higher values are better)
``kwargs`` : ``key=value`` pairs
A list of keyword arguments directly passed to the :py:class:`Algorithm` base class constructor.
def __init__(
distance_function = scipy.spatial.distance.euclidean,
is_distance_function = True,
**kwargs # parameters directly sent to the base class
# call base class constructor and register that the algorithm performs a projection
distance_function = str(distance_function),
is_distance_function = is_distance_function,
self.distance_function = distance_function
self.factor = -1. if is_distance_function else 1.
def _check_feature(self, feature):
"""Checks that the features are appropriate"""
if not isinstance(feature, numpy.ndarray):
raise ValueError("The given feature should be of type numpy.ndarray")
def enroll(self, enroll_features):
"""enroll(enroll_features) -> model
Enrolls the model by storing all given input vectors.
``enroll_features`` : [:py:class:`numpy.ndarray`]
The list of projected features to enroll the model from.
``model`` : 2D :py:class:`numpy.ndarray`
The enrolled model.
assert len(enroll_features)
[self._check_feature(feature) for feature in enroll_features]
# just store all the features
return numpy.vstack([f.flatten() for f in enroll_features])
def score(self, model, probe):
"""score(model, probe) -> float
Computes the distance of the model to the probe using the distance function specified in the constructor.
``model`` : 2D :py:class:`numpy.ndarray`
The model storing all enrollment features
``probe`` : :py:class:`numpy.ndarray`
The probe feature vector
``score`` : float
A similarity value between ``model`` and ``probe``
probe = probe.flatten()
# return the negative distance (as a similarity measure)
if model.ndim == 2:
# we have multiple models, so we use the multiple model scoring
return self.score_for_multiple_models(model, probe)
# single model, single probe (multiple probes have already been handled)
return self.factor * self.distance_function(model, probe)
# re-define unused functions, just so that they do not get documented
def train_projector(*args,**kwargs): raise NotImplementedError()
def load_projector(*args,**kwargs): pass
def project(*args,**kwargs): raise NotImplementedError()
def write_feature(*args,**kwargs): raise NotImplementedError()
def read_feature(*args,**kwargs): raise NotImplementedError()
def train_enroller(*args,**kwargs): raise NotImplementedError()
def load_enroller(*args,**kwargs): pass
from .Algorithm import Algorithm
from .Distance import Distance
from .PCA import PCA
from .LDA import LDA
from .PLDA import PLDA
#!/usr/bin/env python
import scipy.spatial
algorithm =
distance_function = scipy.spatial.distance.cosine,
is_distance_function = True
#!/usr/bin/env python
import scipy.spatial
algorithm =
distance_function = scipy.spatial.distance.euclidean,
is_distance_function = True
......@@ -45,20 +45,30 @@ def _compare(data, reference, write_function =, read_function
assert numpy.allclose(data, read_function(reference), atol=1e-5)
def _gmm_stats(self, feature_file, count = 50, minimum = 0, maximum = 1):
# generate a random sequence of GMM-Stats features
train_set = []
f =
for i in range(count):
per_id = []
for j in range(count):
gmm_stats = bob.learn.em.GMMStats(f)
gmm_stats.sum_px = numpy.random.random(gmm_stats.sum_px.shape) * (maximum - minimum) + minimum
gmm_stats.sum_pxx = numpy.random.random(gmm_stats.sum_pxx.shape) * (maximum - minimum) + minimum
return train_set
def test_distance():
# test the two registered distance functions
# euclidean distance
euclidean ="distance-euclidean", "algorithm", preferred_package = '')
assert isinstance(euclidean,
assert isinstance(euclidean,
assert not euclidean.performs_projection
assert not euclidean.requires_projector_training
assert not euclidean.use_projected_features_for_enrollment
assert not euclidean.split_training_features_by_client
assert not euclidean.requires_enroller_training
# test distance computation
f1 = numpy.ones((20,10), numpy.float64)
f2 = numpy.ones((20,10), numpy.float64) * 2.
model = euclidean.enroll([f1, f1])
assert abs(euclidean.score_for_multiple_probes(model, [f2, f2]) + math.sqrt(200.)) < 1e-6, euclidean.score_for_multiple_probes(model, [f2, f2])
# test cosine distance
cosine ="distance-cosine", "algorithm", preferred_package = '')
model = cosine.enroll([f1, f1])
assert abs(cosine.score_for_multiple_probes(model, [f2, f2])) < 1e-8, cosine.score_for_multiple_probes(model, [f2, f2])
def test_pca():
......@@ -127,6 +127,8 @@ setup(
'': [
'dummy =', # for test purposes only
'distance-euclidean =',
'distance-cosine =',
'pca =',
'lda =',
'pca+lda =',
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment