Skip to content
Snippets Groups Projects
Commit 9324175d authored by Tiago de Freitas Pereira's avatar Tiago de Freitas Pereira
Browse files

Reverted to legacy code

parent 2467a283
No related branches found
No related tags found
2 merge requests!182New processor API,!180[dask] Preparing bob.bio.base for dask pipelines
#!/usr/bin/env python
# vim: set fileencoding=utf-8 :
# Tiago de Freitas Pereira <tiago.pereira@idiap.ch>
# Manuel Guenther <Manuel.Guenther@idiap.ch>
import bob.learn.linear
import bob.io.base
from bob.bio.base.pipelines.vanilla_biometrics.blocks import VanillaBiometricsAlgoritm
import sklearn.decomposition
from scipy.spatial.distance import euclidean
import numpy
import scipy.spatial
import logging
from .Algorithm import Algorithm
import logging
logger = logging.getLogger("bob.bio.base")
class PCA(VanillaBiometricsAlgoritm):
"""Performs a principal component analysis (PCA) on the given data.
class PCA (Algorithm):
"""Performs a principal component analysis (PCA) on the given data.
This algorithm computes a PCA projection (:py:class:`bob.learn.linear.PCATrainer`) on the given training features, projects the features to eigenspace and computes the distance of two projected features in eigenspace.
For example, the eigenface algorithm as proposed by [TP91]_ can be run with this class.
......@@ -29,95 +29,181 @@ class PCA(VanillaBiometricsAlgoritm):
A function taking two parameters and returns a float.
If ``uses_variances`` is set to ``True``, the function is provided with a third parameter, which is the vector of variances (aka. eigenvalues).
svd_solver: std
The way to solve the eigen value problem
is_distance_function : bool
Set this flag to ``False`` if the given ``distance_function`` computes a similarity value (i.e., higher values are better)
factor: float
Multiplication factor used for the scoring stage
use_variances : bool
If set to ``True``, the ``distance_function`` is provided with a third argument, which is the vector of variances (aka. eigenvalues).
kwargs : ``key=value`` pairs
A list of keyword arguments directly passed to the :py:class:`Algorithm` base class constructor.
"""
def __init__(
self,
subspace_dimension, # if int, number of subspace dimensions; if float, percentage of variance to keep
distance_function=euclidean,
svd_solver="auto",
factor=-1,
**kwargs, # parameters directly sent to the base class
):
def __init__(
self,
subspace_dimension, # if int, number of subspace dimensions; if float, percentage of variance to keep
distance_function = scipy.spatial.distance.euclidean,
is_distance_function = True,
uses_variances = False,
**kwargs # parameters directly sent to the base class
):
# call base class constructor and register that the algorithm performs a projection
super(PCA, self).__init__(
performs_projection = True,
subspace_dimension = subspace_dimension,
distance_function = str(distance_function),
is_distance_function = is_distance_function,
uses_variances = uses_variances,
**kwargs
)
self.subspace_dim = subspace_dimension
self.machine = None
self.distance_function = distance_function
self.factor = -1. if is_distance_function else 1.
self.uses_variances = uses_variances
def _check_feature(self, feature, projected=False):
"""Checks that the features are appropriate"""
if not isinstance(feature, numpy.ndarray) or feature.ndim != 1 or feature.dtype != numpy.float64:
raise ValueError("The given feature is not appropriate")
index = 1 if projected else 0
if self.machine is not None and feature.shape[0] != self.machine.shape[index]:
raise ValueError("The given feature is expected to have %d elements, but it has %d" % (self.machine.shape[index], feature.shape[0]))
def train_projector(self, training_features, projector_file):
"""Generates the PCA covariance matrix and writes it into the given projector_file.
**Parameters:**
training_features : [1D :py:class:`numpy.ndarray`]
A list of 1D training arrays (vectors) to train the PCA projection matrix with.
projector_file : str
A writable file, into which the PCA projection matrix (as a :py:class:`bob.learn.linear.Machine`) and the eigenvalues will be written.
"""
# Assure that all data are 1D
[self._check_feature(feature) for feature in training_features]
# Initializes the data
data = numpy.vstack(training_features)
logger.info(" -> Training LinearMachine using PCA")
t = bob.learn.linear.PCATrainer()
self.machine, self.variances = t.train(data)
# For re-shaping, we need to copy...
self.variances = self.variances.copy()
# compute variance percentage, if desired
if isinstance(self.subspace_dim, float):
cummulated = numpy.cumsum(self.variances) / numpy.sum(self.variances)
for index in range(len(cummulated)):
if cummulated[index] > self.subspace_dim:
self.subspace_dim = index
break
self.subspace_dim = index
logger.info(" ... Keeping %d PCA dimensions", self.subspace_dim)
# re-shape machine
self.machine.resize(self.machine.shape[0], self.subspace_dim)
self.variances = numpy.resize(self.variances, (self.subspace_dim))
f = bob.io.base.HDF5File(projector_file, "w")
f.set("Eigenvalues", self.variances)
f.create_group("Machine")
f.cd("/Machine")
self.machine.save(f)
def load_projector(self, projector_file):
"""Reads the PCA projection matrix and the eigenvalues from file.
**Parameters:**
projector_file : str
An existing file, from which the PCA projection matrix and the eigenvalues are read.
"""
# read PCA projector
f = bob.io.base.HDF5File(projector_file)
self.variances = f.read("Eigenvalues")
f.cd("/Machine")
self.machine = bob.learn.linear.Machine(f)
# call base class constructor and register that the algorithm performs a projection
super(PCA, self).__init__(performs_projection=True)
self.subspace_dim = subspace_dimension
self.distance_function = distance_function
self.svd_solver = svd_solver
self.factor = -1
def project(self, feature):
"""project(feature) -> projected
def fit(self, samplesets, checkpoints):
"""
This method should implement the sub-pipeline 0 of the Vanilla Biometrics Pipeline :ref:`_vanilla-pipeline-0`.
Projects the given feature into eigenspace.
It represents the training of background models that an algorithm may need.
**Parameters:**
feature : 1D :py:class:`numpy.ndarray`
The 1D feature to be projected.
Parameters
----------
**Returns:**
samplesets: :py:class:`bob.pipelines.sample.sample.SampleSet`
Set of samples used to train a background model
projected : 1D :py:class:`numpy.ndarray`
The ``feature`` projected into eigenspace.
"""
self._check_feature(feature)
# Projects the data
return self.machine(feature)
checkpoint: str
If provided, must the path leading to a location where this
model should be saved at (complete path without extension) -
currently, it needs to be provided because of existing
serialization requirements (see bob/bob.io.base#106), but
checkpointing will still work as expected.
"""
def enroll(self, enroll_features):
"""enroll(enroll_features) -> model
pca = sklearn.decomposition.PCA(self.subspace_dim, svd_solver=self.svd_solver)
samples_array = self._stack_samples_2_ndarray(samplesets)
logger.info(
"Training PCA with samples of shape {0}".format(samples_array.shape)
)
pca.fit(samples_array)
Enrolls the model by storing all given input vectors.
# TODO: save the shit
**Parameters:**
return pca
enroll_features : [1D :py:class:`numpy.ndarray`]
The list of projected features to enroll the model from.
def project_one_sample(self, background_model, data):
if data.ndim == 1:
return background_model.transform(data.reshape(1, -1))
**Returns:**
return background_model.transform(data)
model : 2D :py:class:`numpy.ndarray`
The enrolled model.
"""
assert len(enroll_features)
[self._check_feature(feature, True) for feature in enroll_features]
# just store all the features
return numpy.vstack(enroll_features)
def enroll_one_sample(self, data):
return numpy.mean(data, axis=0)
def score_one_sample(self, biometric_reference, data):
"""It handles the score computation for one sample
def score(self, model, probe):
"""score(model, probe) -> float
Parameters
----------
Computes the distance of the model to the probe using the distance function specified in the constructor.
biometric_reference : list
Biometric reference to be compared
**Parameters:**
data : list
Data to be compared
model : 2D :py:class:`numpy.ndarray`
The model storing all enrollment features.
Returns
-------
probe : 1D :py:class:`numpy.ndarray`
The probe feature vector in eigenspace.
scores : list
For each sample in a probe, returns as many scores as there are
samples in the probe, together with the probe's and the
relevant reference's subject identifiers.
**Returns:**
"""
score : float
A similarity value between ``model`` and ``probe``
"""
self._check_feature(probe, True)
# return the negative distance (as a similarity measure)
if len(model.shape) == 2:
# we have multiple models, so we use the multiple model scoring
return self.score_for_multiple_models(model, probe)
elif self.uses_variances:
# single model, single probe (multiple probes have already been handled)
return self.factor * self.distance_function(model, probe, self.variances)
else:
# single model, single probe (multiple probes have already been handled)
return self.factor * self.distance_function(model, probe)
return self.factor * self.distance_function(biometric_reference, data)
# re-define unused functions, just so that they do not get documented
def train_enroller(*args,**kwargs): raise NotImplementedError()
def load_enroller(*args,**kwargs): pass
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment