diff --git a/bob/bio/base/algorithm/Distance.py b/bob/bio/base/algorithm/Distance.py new file mode 100644 index 0000000000000000000000000000000000000000..102913375447e96048af614267776ceb82074295 --- /dev/null +++ b/bob/bio/base/algorithm/Distance.py @@ -0,0 +1,110 @@ +#!/usr/bin/env python +# vim: set fileencoding=utf-8 : +# Manuel Gunther <siebenkopf@googlemail.com> + +import numpy +import scipy.spatial + +from .Algorithm import Algorithm + +import logging +logger = logging.getLogger("bob.bio.base") + +class Distance (Algorithm): + """This class defines a simple distance measure between two features. + Independent of the actual shape, each feature vector is treated as a one-dimensional vector, and the specified distance function is used to compute the distance between the two features. + If the given ``distance_function`` actually computes a distance, we simply return its negative value (as all :py:class:`Algorithm`'s are supposed to return similarity values). + If the ``distance_function`` computes similarities, the similarity value is returned unaltered. + + **Parameters:** + + ``distance_function`` : callable + A function taking two 1D arrays and returning a ``float`` + + ``is_distance_function`` : bool + Set this flag to ``False`` if the given ``distance_function`` computes a similarity value (i.e., higher values are better) + + ``kwargs`` : ``key=value`` pairs + A list of keyword arguments directly passed to the :py:class:`Algorithm` base class constructor. + """ + + def __init__( + self, + distance_function = scipy.spatial.distance.euclidean, + is_distance_function = True, + **kwargs # parameters directly sent to the base class + ): + + # call base class constructor and register that the algorithm performs a projection + Algorithm.__init__( + self, + distance_function = str(distance_function), + is_distance_function = is_distance_function, + + **kwargs + ) + + self.distance_function = distance_function + self.factor = -1. if is_distance_function else 1. + + def _check_feature(self, feature): + """Checks that the features are appropriate""" + if not isinstance(feature, numpy.ndarray): + raise ValueError("The given feature should be of type numpy.ndarray") + + def enroll(self, enroll_features): + """enroll(enroll_features) -> model + + Enrolls the model by storing all given input vectors. + + **Parameters:** + + ``enroll_features`` : [:py:class:`numpy.ndarray`] + The list of projected features to enroll the model from. + + **Returns:** + + ``model`` : 2D :py:class:`numpy.ndarray` + The enrolled model. + """ + assert len(enroll_features) + [self._check_feature(feature) for feature in enroll_features] + # just store all the features + return numpy.vstack([f.flatten() for f in enroll_features]) + + def score(self, model, probe): + """score(model, probe) -> float + + Computes the distance of the model to the probe using the distance function specified in the constructor. + + **Parameters:** + + ``model`` : 2D :py:class:`numpy.ndarray` + The model storing all enrollment features + + ``probe`` : :py:class:`numpy.ndarray` + The probe feature vector + + **Returns:** + + ``score`` : float + A similarity value between ``model`` and ``probe`` + """ + self._check_feature(probe) + probe = probe.flatten() + # return the negative distance (as a similarity measure) + if model.ndim == 2: + # we have multiple models, so we use the multiple model scoring + return self.score_for_multiple_models(model, probe) + else: + # single model, single probe (multiple probes have already been handled) + return self.factor * self.distance_function(model, probe) + + # re-define unused functions, just so that they do not get documented + def train_projector(*args,**kwargs): raise NotImplementedError() + def load_projector(*args,**kwargs): pass + def project(*args,**kwargs): raise NotImplementedError() + def write_feature(*args,**kwargs): raise NotImplementedError() + def read_feature(*args,**kwargs): raise NotImplementedError() + def train_enroller(*args,**kwargs): raise NotImplementedError() + def load_enroller(*args,**kwargs): pass diff --git a/bob/bio/base/algorithm/__init__.py b/bob/bio/base/algorithm/__init__.py index e1bc1ad9c788efeabba4e0936c436c3dc0df01fd..015cf9e6421f230ff61a8132ac2b172e6825f94d 100644 --- a/bob/bio/base/algorithm/__init__.py +++ b/bob/bio/base/algorithm/__init__.py @@ -1,4 +1,5 @@ from .Algorithm import Algorithm +from .Distance import Distance from .PCA import PCA from .LDA import LDA from .PLDA import PLDA diff --git a/bob/bio/base/config/algorithm/distance_cosine.py b/bob/bio/base/config/algorithm/distance_cosine.py new file mode 100644 index 0000000000000000000000000000000000000000..00d82ea8bc5760e77b0a91ddba94ed4fbd84280a --- /dev/null +++ b/bob/bio/base/config/algorithm/distance_cosine.py @@ -0,0 +1,9 @@ +#!/usr/bin/env python + +import bob.bio.base +import scipy.spatial + +algorithm = bob.bio.base.algorithm.Distance( + distance_function = scipy.spatial.distance.cosine, + is_distance_function = True +) diff --git a/bob/bio/base/config/algorithm/distance_euclidean.py b/bob/bio/base/config/algorithm/distance_euclidean.py new file mode 100644 index 0000000000000000000000000000000000000000..3ab5422df0751cf75fd85864d218e44b7c1ac037 --- /dev/null +++ b/bob/bio/base/config/algorithm/distance_euclidean.py @@ -0,0 +1,9 @@ +#!/usr/bin/env python + +import bob.bio.base +import scipy.spatial + +algorithm = bob.bio.base.algorithm.Distance( + distance_function = scipy.spatial.distance.euclidean, + is_distance_function = True +) diff --git a/bob/bio/base/test/test_algorithms.py b/bob/bio/base/test/test_algorithms.py index ffb872a5cfe061e746218415a73096751abc9bb0..66fd58817e4c482459035687c55b556d183b4720 100644 --- a/bob/bio/base/test/test_algorithms.py +++ b/bob/bio/base/test/test_algorithms.py @@ -45,20 +45,30 @@ def _compare(data, reference, write_function = bob.bio.base.save, read_function assert numpy.allclose(data, read_function(reference), atol=1e-5) -def _gmm_stats(self, feature_file, count = 50, minimum = 0, maximum = 1): - # generate a random sequence of GMM-Stats features - numpy.random.seed(42) - train_set = [] - f = bob.io.base.HDF5File(feature_file) - for i in range(count): - per_id = [] - for j in range(count): - gmm_stats = bob.learn.em.GMMStats(f) - gmm_stats.sum_px = numpy.random.random(gmm_stats.sum_px.shape) * (maximum - minimum) + minimum - gmm_stats.sum_pxx = numpy.random.random(gmm_stats.sum_pxx.shape) * (maximum - minimum) + minimum - per_id.append(gmm_stats) - train_set.append(per_id) - return train_set +def test_distance(): + # test the two registered distance functions + + # euclidean distance + euclidean = bob.bio.base.load_resource("distance-euclidean", "algorithm", preferred_package = 'bob.bio.base') + assert isinstance(euclidean, bob.bio.base.algorithm.Distance) + assert isinstance(euclidean, bob.bio.base.algorithm.Algorithm) + assert not euclidean.performs_projection + assert not euclidean.requires_projector_training + assert not euclidean.use_projected_features_for_enrollment + assert not euclidean.split_training_features_by_client + assert not euclidean.requires_enroller_training + + # test distance computation + f1 = numpy.ones((20,10), numpy.float64) + f2 = numpy.ones((20,10), numpy.float64) * 2. + + model = euclidean.enroll([f1, f1]) + assert abs(euclidean.score_for_multiple_probes(model, [f2, f2]) + math.sqrt(200.)) < 1e-6, euclidean.score_for_multiple_probes(model, [f2, f2]) + + # test cosine distance + cosine = bob.bio.base.load_resource("distance-cosine", "algorithm", preferred_package = 'bob.bio.base') + model = cosine.enroll([f1, f1]) + assert abs(cosine.score_for_multiple_probes(model, [f2, f2])) < 1e-8, cosine.score_for_multiple_probes(model, [f2, f2]) def test_pca(): diff --git a/setup.py b/setup.py index 26e9c57ddb91c5d2fd0511d34c3966197e606108..df8667524c0b439cfbb1031e50578cf30ec5d1af 100644 --- a/setup.py +++ b/setup.py @@ -127,6 +127,8 @@ setup( 'bob.bio.algorithm': [ 'dummy = bob.bio.base.test.dummy.algorithm:algorithm', # for test purposes only + 'distance-euclidean = bob.bio.base.config.algorithm.distance_euclidean:algorithm', + 'distance-cosine = bob.bio.base.config.algorithm.distance_cosine:algorithm', 'pca = bob.bio.base.config.algorithm.pca:algorithm', 'lda = bob.bio.base.config.algorithm.lda:algorithm', 'pca+lda = bob.bio.base.config.algorithm.pca_lda:algorithm', diff --git a/version.txt b/version.txt index 4b20305a32f4b3cca7e638420b2ee799272d7547..13cb7641ef98c0fc390ada319861b27da234a3dd 100644 --- a/version.txt +++ b/version.txt @@ -1 +1 @@ -2.0.5b0 \ No newline at end of file +2.0.5b1