diff --git a/bob/bio/base/algorithm/PCA.py b/bob/bio/base/algorithm/PCA.py new file mode 100644 index 0000000000000000000000000000000000000000..28431321f607215cfcd4938dd8649ee980d18bb4 --- /dev/null +++ b/bob/bio/base/algorithm/PCA.py @@ -0,0 +1,122 @@ +#!/usr/bin/env python +# vim: set fileencoding=utf-8 : +# Manuel Guenther <Manuel.Guenther@idiap.ch> + +import bob.learn.linear +import bob.io.base + +import numpy +import scipy.spatial + +from .Algorithm import Algorithm + +import logging +logger = logging.getLogger("bob.bio.base") + +class PCA (Algorithm): + """Tool for computing eigenfaces""" + + def __init__( + self, + subspace_dimension, # if int, number of subspace dimensions; if float, percentage of variance to keep + distance_function = scipy.spatial.distance.euclidean, + is_distance_function = True, + uses_variances = False, + **kwargs # parameters directly sent to the base class + ): + + """Initializes the PCA Algorithm with the given setup""" + # call base class constructor and register that the tool performs a projection + Algorithm.__init__( + self, + performs_projection = True, + + subspace_dimension = subspace_dimension, + distance_function = str(distance_function), + is_distance_function = is_distance_function, + uses_variances = uses_variances, + + **kwargs + ) + + self.m_subspace_dim = subspace_dimension + self.m_machine = None + self.m_distance_function = distance_function + self.m_factor = -1. if is_distance_function else 1. + self.m_uses_variances = uses_variances + + + def train_projector(self, training_features, projector_file): + """Generates the PCA covariance matrix""" + # Initializes the data + data = numpy.vstack([feature.flatten() for feature in training_features]) + + logger.info(" -> Training LinearMachine using PCA") + t = bob.learn.linear.PCATrainer() + self.m_machine, self.m_variances = t.train(data) + # For re-shaping, we need to copy... + self.m_variances = self.m_variances.copy() + + # compute variance percentage, if desired + if isinstance(self.m_subspace_dim, float): + cummulated = numpy.cumsum(self.m_variances) / numpy.sum(self.m_variances) + for index in range(len(cummulated)): + if cummulated[index] > self.m_subspace_dim: + self.m_subspace_dim = index + break + self.m_subspace_dim = index + + logger.info(" ... Keeping %d PCA dimensions", self.m_subspace_dim) + + # re-shape machine + self.m_machine.resize(self.m_machine.shape[0], self.m_subspace_dim) + self.m_variances.resize(self.m_subspace_dim) + + f = bob.io.base.HDF5File(projector_file, "w") + f.set("Eigenvalues", self.m_variances) + f.create_group("Machine") + f.cd("/Machine") + self.m_machine.save(f) + + + def load_projector(self, projector_file): + """Reads the PCA projection matrix from file""" + # read PCA projector + f = bob.io.base.HDF5File(projector_file) + self.m_variances = f.read("Eigenvalues") + f.cd("/Machine") + self.m_machine = bob.learn.linear.Machine(f) + # Allocates an array for the projected data + self.m_projected_feature = numpy.ndarray(self.m_machine.shape[1], numpy.float64) + + def project(self, feature): + """Projects the data using the stored covariance matrix""" + # Projects the data + self.m_machine(feature, self.m_projected_feature) + # return the projected data + return self.m_projected_feature + + def enroll(self, enroll_features): + """Enrolls the model by computing an average of the given input vectors""" + assert len(enroll_features) + # just store all the features + model = numpy.zeros((len(enroll_features), enroll_features[0].shape[0]), numpy.float64) + for n, feature in enumerate(enroll_features): + model[n,:] += feature[:] + + # return enrolled model + return model + + + def score(self, model, probe): + """Computes the distance of the model to the probe using the distance function taken from the config file""" + # return the negative distance (as a similarity measure) + if len(model.shape) == 2: + # we have multiple models, so we use the multiple model scoring + return self.score_for_multiple_models(model, probe) + elif self.m_uses_variances: + # single model, single probe (multiple probes have already been handled) + return self.m_factor * self.m_distance_function(model, probe, self.m_variances) + else: + # single model, single probe (multiple probes have already been handled) + return self.m_factor * self.m_distance_function(model, probe) diff --git a/bob/bio/base/algorithm/__init__.py b/bob/bio/base/algorithm/__init__.py index 3dc8a08effeaf86a0dedde5b8506819be3d428a3..e58905972785d83297046a8247f782185bacf0ea 100644 --- a/bob/bio/base/algorithm/__init__.py +++ b/bob/bio/base/algorithm/__init__.py @@ -1 +1,2 @@ from .Algorithm import Algorithm +from .PCA import PCA diff --git a/bob/bio/base/config/algorithm/pca.py b/bob/bio/base/config/algorithm/pca.py new file mode 100644 index 0000000000000000000000000000000000000000..29b6f309f03450ad3e83db52126954af50b8ea92 --- /dev/null +++ b/bob/bio/base/config/algorithm/pca.py @@ -0,0 +1,10 @@ +#!/usr/bin/env python + +import bob.bio.base +import scipy.spatial + +algorithm = bob.bio.base.algorithm.PCA( + subspace_dimension = 30, + distance_function = scipy.spatial.distance.euclidean, + is_distance_function = True +) diff --git a/bob/bio/base/config/extractor/linearize.py b/bob/bio/base/config/extractor/linearize.py new file mode 100644 index 0000000000000000000000000000000000000000..7d452d6e3363c553ecd9f069982e0269b3f0166c --- /dev/null +++ b/bob/bio/base/config/extractor/linearize.py @@ -0,0 +1,6 @@ +#!/usr/bin/env python + +import bob.bio.base + +# Linearization of the data to a vector, no data type specified +extractor = bob.bio.base.extractor.Linearize() diff --git a/bob/bio/base/extractor/Linearize.py b/bob/bio/base/extractor/Linearize.py new file mode 100644 index 0000000000000000000000000000000000000000..ab3585b3c26500dd2fb150a96c2a1aa4a4ac0ca0 --- /dev/null +++ b/bob/bio/base/extractor/Linearize.py @@ -0,0 +1,36 @@ +#!/usr/bin/env python +# vim: set fileencoding=utf-8 : +# @author: Manuel Guenther <Manuel.Guenther@idiap.ch> +# @date: Fri Oct 26 17:05:40 CEST 2012 +# +# Copyright (C) 2011-2012 Idiap Research Institute, Martigny, Switzerland +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, version 3 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +from .Extractor import Extractor +import numpy + +class Linearize (Extractor): + """Extracts features by simply concatenating all pixels of the data into one long vector""" + + def __init__(self, dtype=None): + """If the ``dtype`` parameter is given, it specifies the data type that is enforced for the features.""" + Extractor.__init__(self, dtype = dtype) + self.dtype = dtype + + def __call__(self, data): + """Takes data of arbitrary dimensions and linearizes it into a 1D vector; enforcing the data type, if desired""" + linear = numpy.reshape(data, data.size) + if self.dtype is not None: + linear = linear.astype(self.dtype) + return linear diff --git a/bob/bio/base/extractor/__init__.py b/bob/bio/base/extractor/__init__.py index c5b2097627f223e1b9d2e0879025f882204a3b59..92e638c9e073f4453b2849cecd884ba6b1d5d7e6 100644 --- a/bob/bio/base/extractor/__init__.py +++ b/bob/bio/base/extractor/__init__.py @@ -5,3 +5,4 @@ """Basic features for biometric recognition""" from .Extractor import Extractor +from .Linearize import Linearize diff --git a/bob/bio/base/test/test_extractor.py b/bob/bio/base/test/test_extractor.py new file mode 100644 index 0000000000000000000000000000000000000000..f283eac9157e9c3449dac894cac399f8e9495218 --- /dev/null +++ b/bob/bio/base/test/test_extractor.py @@ -0,0 +1,38 @@ +import bob.bio.base +import bob.io.base.test_utils +import os +import numpy + +from . import utils + +def test_linearize(): + # load extractor + extractor = bob.bio.base.load_resource("linearize", "extractor") + + # generate input + data = utils.random_training_set((10,10), 1)[0] + assert len(data.shape) == 2 + + # extract features + extracted = extractor(data) + assert len(extracted.shape) == 1 + assert extracted.shape[0] == data.shape[0] * data.shape[1] + assert extracted.dtype == data.dtype + + # test IO + filename = bob.io.base.test_utils.temporary_filename() + try: + extractor.write_feature(extracted, filename) + extracted2 = extractor.read_feature(filename) + + assert (extracted == extracted2).all() + + finally: + os.remove(filename) + + # extract with dtype + extractor = bob.bio.base.extractor.Linearize(dtype=numpy.complex128) + extracted = extractor(data) + assert len(extracted.shape) == 1 + assert extracted.shape[0] == data.shape[0] * data.shape[1] + assert extracted.dtype == numpy.complex128