Commit f41083f8 by Manuel Günther

### Added an algorithm that applies a simple distance function

parent fb9964be
 #!/usr/bin/env python # vim: set fileencoding=utf-8 : # Manuel Gunther import numpy import scipy.spatial from .Algorithm import Algorithm import logging logger = logging.getLogger("bob.bio.base") class Distance (Algorithm): """This class defines a simple distance measure between two features. Independent of the actual shape, each feature vector is treated as a one-dimensional vector, and the specified distance function is used to compute the distance between the two features. If the given ``distance_function`` actually computes a distance, we simply return its negative value (as all :py:class:`Algorithm`'s are supposed to return similarity values). If the ``distance_function`` computes similarities, the similarity value is returned unaltered. **Parameters:** ``distance_function`` : callable A function taking two 1D arrays and returning a ``float`` ``is_distance_function`` : bool Set this flag to ``False`` if the given ``distance_function`` computes a similarity value (i.e., higher values are better) ``kwargs`` : ``key=value`` pairs A list of keyword arguments directly passed to the :py:class:`Algorithm` base class constructor. """ def __init__( self, distance_function = scipy.spatial.distance.euclidean, is_distance_function = True, **kwargs # parameters directly sent to the base class ): # call base class constructor and register that the algorithm performs a projection Algorithm.__init__( self, distance_function = str(distance_function), is_distance_function = is_distance_function, **kwargs ) self.distance_function = distance_function self.factor = -1. if is_distance_function else 1. def _check_feature(self, feature): """Checks that the features are appropriate""" if not isinstance(feature, numpy.ndarray): raise ValueError("The given feature should be of type numpy.ndarray") def enroll(self, enroll_features): """enroll(enroll_features) -> model Enrolls the model by storing all given input vectors. **Parameters:** ``enroll_features`` : [:py:class:`numpy.ndarray`] The list of projected features to enroll the model from. **Returns:** ``model`` : 2D :py:class:`numpy.ndarray` The enrolled model. """ assert len(enroll_features) [self._check_feature(feature) for feature in enroll_features] # just store all the features return numpy.vstack([f.flatten() for f in enroll_features]) def score(self, model, probe): """score(model, probe) -> float Computes the distance of the model to the probe using the distance function specified in the constructor. **Parameters:** ``model`` : 2D :py:class:`numpy.ndarray` The model storing all enrollment features ``probe`` : :py:class:`numpy.ndarray` The probe feature vector **Returns:** ``score`` : float A similarity value between ``model`` and ``probe`` """ self._check_feature(probe) probe = probe.flatten() # return the negative distance (as a similarity measure) if model.ndim == 2: # we have multiple models, so we use the multiple model scoring return self.score_for_multiple_models(model, probe) else: # single model, single probe (multiple probes have already been handled) return self.factor * self.distance_function(model, probe) # re-define unused functions, just so that they do not get documented def train_projector(*args,**kwargs): raise NotImplementedError() def load_projector(*args,**kwargs): pass def project(*args,**kwargs): raise NotImplementedError() def write_feature(*args,**kwargs): raise NotImplementedError() def read_feature(*args,**kwargs): raise NotImplementedError() def train_enroller(*args,**kwargs): raise NotImplementedError() def load_enroller(*args,**kwargs): pass
 from .Algorithm import Algorithm from .Distance import Distance from .PCA import PCA from .LDA import LDA from .PLDA import PLDA ... ...
 #!/usr/bin/env python import bob.bio.base import scipy.spatial algorithm = bob.bio.base.algorithm.Distance( distance_function = scipy.spatial.distance.cosine, is_distance_function = True )
 #!/usr/bin/env python import bob.bio.base import scipy.spatial algorithm = bob.bio.base.algorithm.Distance( distance_function = scipy.spatial.distance.euclidean, is_distance_function = True )
 ... ... @@ -45,20 +45,30 @@ def _compare(data, reference, write_function = bob.bio.base.save, read_function assert numpy.allclose(data, read_function(reference), atol=1e-5) def _gmm_stats(self, feature_file, count = 50, minimum = 0, maximum = 1): # generate a random sequence of GMM-Stats features numpy.random.seed(42) train_set = [] f = bob.io.base.HDF5File(feature_file) for i in range(count): per_id = [] for j in range(count): gmm_stats = bob.learn.em.GMMStats(f) gmm_stats.sum_px = numpy.random.random(gmm_stats.sum_px.shape) * (maximum - minimum) + minimum gmm_stats.sum_pxx = numpy.random.random(gmm_stats.sum_pxx.shape) * (maximum - minimum) + minimum per_id.append(gmm_stats) train_set.append(per_id) return train_set def test_distance(): # test the two registered distance functions # euclidean distance euclidean = bob.bio.base.load_resource("distance-euclidean", "algorithm", preferred_package = 'bob.bio.base') assert isinstance(euclidean, bob.bio.base.algorithm.Distance) assert isinstance(euclidean, bob.bio.base.algorithm.Algorithm) assert not euclidean.performs_projection assert not euclidean.requires_projector_training assert not euclidean.use_projected_features_for_enrollment assert not euclidean.split_training_features_by_client assert not euclidean.requires_enroller_training # test distance computation f1 = numpy.ones((20,10), numpy.float64) f2 = numpy.ones((20,10), numpy.float64) * 2. model = euclidean.enroll([f1, f1]) assert abs(euclidean.score_for_multiple_probes(model, [f2, f2]) + math.sqrt(200.)) < 1e-6, euclidean.score_for_multiple_probes(model, [f2, f2]) # test cosine distance cosine = bob.bio.base.load_resource("distance-cosine", "algorithm", preferred_package = 'bob.bio.base') model = cosine.enroll([f1, f1]) assert abs(cosine.score_for_multiple_probes(model, [f2, f2])) < 1e-8, cosine.score_for_multiple_probes(model, [f2, f2]) def test_pca(): ... ...
 ... ... @@ -127,6 +127,8 @@ setup( 'bob.bio.algorithm': [ 'dummy = bob.bio.base.test.dummy.algorithm:algorithm', # for test purposes only 'distance-euclidean = bob.bio.base.config.algorithm.distance_euclidean:algorithm', 'distance-cosine = bob.bio.base.config.algorithm.distance_cosine:algorithm', 'pca = bob.bio.base.config.algorithm.pca:algorithm', 'lda = bob.bio.base.config.algorithm.lda:algorithm', 'pca+lda = bob.bio.base.config.algorithm.pca_lda:algorithm', ... ...
 2.0.5b0 \ No newline at end of file 2.0.5b1
