From e92c046c5a2d447085077a406586457901f4448b Mon Sep 17 00:00:00 2001 From: Tiago Freitas Pereira <tiagofrepereira@gmail.com> Date: Tue, 10 Mar 2020 15:30:00 +0100 Subject: [PATCH] Reverted to legacy code --- bob/bio/base/algorithm/PCA.py | 230 +++++++++++++++++++++++----------- 1 file changed, 158 insertions(+), 72 deletions(-) diff --git a/bob/bio/base/algorithm/PCA.py b/bob/bio/base/algorithm/PCA.py index 425e0986..a856292d 100644 --- a/bob/bio/base/algorithm/PCA.py +++ b/bob/bio/base/algorithm/PCA.py @@ -1,20 +1,20 @@ #!/usr/bin/env python # vim: set fileencoding=utf-8 : -# Tiago de Freitas Pereira <tiago.pereira@idiap.ch> +# Manuel Guenther <Manuel.Guenther@idiap.ch> +import bob.learn.linear +import bob.io.base -from bob.bio.base.pipelines.vanilla_biometrics.blocks import VanillaBiometricsAlgoritm -import sklearn.decomposition -from scipy.spatial.distance import euclidean import numpy +import scipy.spatial -import logging +from .Algorithm import Algorithm +import logging logger = logging.getLogger("bob.bio.base") - -class PCA(VanillaBiometricsAlgoritm): - """Performs a principal component analysis (PCA) on the given data. +class PCA (Algorithm): + """Performs a principal component analysis (PCA) on the given data. This algorithm computes a PCA projection (:py:class:`bob.learn.linear.PCATrainer`) on the given training features, projects the features to eigenspace and computes the distance of two projected features in eigenspace. For example, the eigenface algorithm as proposed by [TP91]_ can be run with this class. @@ -29,95 +29,181 @@ class PCA(VanillaBiometricsAlgoritm): A function taking two parameters and returns a float. If ``uses_variances`` is set to ``True``, the function is provided with a third parameter, which is the vector of variances (aka. eigenvalues). - svd_solver: std - The way to solve the eigen value problem + is_distance_function : bool + Set this flag to ``False`` if the given ``distance_function`` computes a similarity value (i.e., higher values are better) - factor: float - Multiplication factor used for the scoring stage + use_variances : bool + If set to ``True``, the ``distance_function`` is provided with a third argument, which is the vector of variances (aka. eigenvalues). kwargs : ``key=value`` pairs A list of keyword arguments directly passed to the :py:class:`Algorithm` base class constructor. """ - def __init__( - self, - subspace_dimension, # if int, number of subspace dimensions; if float, percentage of variance to keep - distance_function=euclidean, - svd_solver="auto", - factor=-1, - **kwargs, # parameters directly sent to the base class - ): + def __init__( + self, + subspace_dimension, # if int, number of subspace dimensions; if float, percentage of variance to keep + distance_function = scipy.spatial.distance.euclidean, + is_distance_function = True, + uses_variances = False, + **kwargs # parameters directly sent to the base class + ): + + # call base class constructor and register that the algorithm performs a projection + super(PCA, self).__init__( + performs_projection = True, + + subspace_dimension = subspace_dimension, + distance_function = str(distance_function), + is_distance_function = is_distance_function, + uses_variances = uses_variances, + + **kwargs + ) + + self.subspace_dim = subspace_dimension + self.machine = None + self.distance_function = distance_function + self.factor = -1. if is_distance_function else 1. + self.uses_variances = uses_variances + + + def _check_feature(self, feature, projected=False): + """Checks that the features are appropriate""" + if not isinstance(feature, numpy.ndarray) or feature.ndim != 1 or feature.dtype != numpy.float64: + raise ValueError("The given feature is not appropriate") + index = 1 if projected else 0 + if self.machine is not None and feature.shape[0] != self.machine.shape[index]: + raise ValueError("The given feature is expected to have %d elements, but it has %d" % (self.machine.shape[index], feature.shape[0])) + + + def train_projector(self, training_features, projector_file): + """Generates the PCA covariance matrix and writes it into the given projector_file. + + **Parameters:** + + training_features : [1D :py:class:`numpy.ndarray`] + A list of 1D training arrays (vectors) to train the PCA projection matrix with. + + projector_file : str + A writable file, into which the PCA projection matrix (as a :py:class:`bob.learn.linear.Machine`) and the eigenvalues will be written. + """ + # Assure that all data are 1D + [self._check_feature(feature) for feature in training_features] + + # Initializes the data + data = numpy.vstack(training_features) + logger.info(" -> Training LinearMachine using PCA") + t = bob.learn.linear.PCATrainer() + self.machine, self.variances = t.train(data) + # For re-shaping, we need to copy... + self.variances = self.variances.copy() + + # compute variance percentage, if desired + if isinstance(self.subspace_dim, float): + cummulated = numpy.cumsum(self.variances) / numpy.sum(self.variances) + for index in range(len(cummulated)): + if cummulated[index] > self.subspace_dim: + self.subspace_dim = index + break + self.subspace_dim = index + logger.info(" ... Keeping %d PCA dimensions", self.subspace_dim) + # re-shape machine + self.machine.resize(self.machine.shape[0], self.subspace_dim) + self.variances = numpy.resize(self.variances, (self.subspace_dim)) + + f = bob.io.base.HDF5File(projector_file, "w") + f.set("Eigenvalues", self.variances) + f.create_group("Machine") + f.cd("/Machine") + self.machine.save(f) + + + def load_projector(self, projector_file): + """Reads the PCA projection matrix and the eigenvalues from file. + + **Parameters:** + + projector_file : str + An existing file, from which the PCA projection matrix and the eigenvalues are read. + """ + # read PCA projector + f = bob.io.base.HDF5File(projector_file) + self.variances = f.read("Eigenvalues") + f.cd("/Machine") + self.machine = bob.learn.linear.Machine(f) - # call base class constructor and register that the algorithm performs a projection - super(PCA, self).__init__(performs_projection=True) - self.subspace_dim = subspace_dimension - self.distance_function = distance_function - self.svd_solver = svd_solver - self.factor = -1 + def project(self, feature): + """project(feature) -> projected - def fit(self, samplesets, checkpoints): - """ - This method should implement the sub-pipeline 0 of the Vanilla Biometrics Pipeline :ref:`_vanilla-pipeline-0`. + Projects the given feature into eigenspace. - It represents the training of background models that an algorithm may need. + **Parameters:** + + feature : 1D :py:class:`numpy.ndarray` + The 1D feature to be projected. - Parameters - ---------- + **Returns:** - samplesets: :py:class:`bob.pipelines.sample.sample.SampleSet` - Set of samples used to train a background model + projected : 1D :py:class:`numpy.ndarray` + The ``feature`` projected into eigenspace. + """ + self._check_feature(feature) + # Projects the data + return self.machine(feature) - checkpoint: str - If provided, must the path leading to a location where this - model should be saved at (complete path without extension) - - currently, it needs to be provided because of existing - serialization requirements (see bob/bob.io.base#106), but - checkpointing will still work as expected. - - """ + def enroll(self, enroll_features): + """enroll(enroll_features) -> model - pca = sklearn.decomposition.PCA(self.subspace_dim, svd_solver=self.svd_solver) - samples_array = self._stack_samples_2_ndarray(samplesets) - logger.info( - "Training PCA with samples of shape {0}".format(samples_array.shape) - ) - pca.fit(samples_array) + Enrolls the model by storing all given input vectors. - # TODO: save the shit + **Parameters:** - return pca + enroll_features : [1D :py:class:`numpy.ndarray`] + The list of projected features to enroll the model from. - def project_one_sample(self, background_model, data): - if data.ndim == 1: - return background_model.transform(data.reshape(1, -1)) + **Returns:** - return background_model.transform(data) + model : 2D :py:class:`numpy.ndarray` + The enrolled model. + """ + assert len(enroll_features) + [self._check_feature(feature, True) for feature in enroll_features] + # just store all the features + return numpy.vstack(enroll_features) - def enroll_one_sample(self, data): - return numpy.mean(data, axis=0) - def score_one_sample(self, biometric_reference, data): - """It handles the score computation for one sample + def score(self, model, probe): + """score(model, probe) -> float - Parameters - ---------- + Computes the distance of the model to the probe using the distance function specified in the constructor. - biometric_reference : list - Biometric reference to be compared + **Parameters:** - data : list - Data to be compared + model : 2D :py:class:`numpy.ndarray` + The model storing all enrollment features. - Returns - ------- + probe : 1D :py:class:`numpy.ndarray` + The probe feature vector in eigenspace. - scores : list - For each sample in a probe, returns as many scores as there are - samples in the probe, together with the probe's and the - relevant reference's subject identifiers. + **Returns:** - """ + score : float + A similarity value between ``model`` and ``probe`` + """ + self._check_feature(probe, True) + # return the negative distance (as a similarity measure) + if len(model.shape) == 2: + # we have multiple models, so we use the multiple model scoring + return self.score_for_multiple_models(model, probe) + elif self.uses_variances: + # single model, single probe (multiple probes have already been handled) + return self.factor * self.distance_function(model, probe, self.variances) + else: + # single model, single probe (multiple probes have already been handled) + return self.factor * self.distance_function(model, probe) - return self.factor * self.distance_function(biometric_reference, data) + # re-define unused functions, just so that they do not get documented + def train_enroller(*args,**kwargs): raise NotImplementedError() + def load_enroller(*args,**kwargs): pass -- GitLab