PCA.py 7.08 KB
 Manuel Günther committed May 04, 2015 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 ``````#!/usr/bin/env python # vim: set fileencoding=utf-8 : # Manuel Guenther import bob.learn.linear import bob.io.base import numpy import scipy.spatial from .Algorithm import Algorithm import logging logger = logging.getLogger("bob.bio.base") class PCA (Algorithm): `````` 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 `````` """Performs PCA on the given data. This algorithm computes a PCA projection (:py:class:`bob.learn.linear.PCATrainer`) on the given training features, projects the features to face space and computes the distance of two projected features in face space. For eaxmple, the eigenface algorithm as proposed by [TP91]_ can be run with this class. **Parameters:** subspace_dimension : int or float If specified as ``int``, defines the number of eigenvectors used in the PCA projection matrix. If specified as ``float`` (between 0 and 1), the number of eigenvectors is calculated such that the given percentage of variance is kept. distance_function : function A function taking two parameters and returns a float. If ``uses_variances`` is set to ``True``, the function is provided with a third parameter, which is the vector of variances (aka. eigenvalues). is_distance_function : bool Set this flag to ``False`` if the given ``distance_function`` computes a similarity value (i.e., higher values are better) use_variances : bool If set to ``True``, the ``distance_function`` is provided with a third argument, which is the vector of variances (aka. eigenvalues). """ `````` Manuel Günther committed May 04, 2015 39 40 41 42 43 44 45 46 47 48 `````` def __init__( self, subspace_dimension, # if int, number of subspace dimensions; if float, percentage of variance to keep distance_function = scipy.spatial.distance.euclidean, is_distance_function = True, uses_variances = False, **kwargs # parameters directly sent to the base class ): `````` 49 `````` # call base class constructor and register that the algorithm performs a projection `````` Manuel Günther committed May 04, 2015 50 51 52 53 54 55 56 57 58 59 60 61 `````` Algorithm.__init__( self, performs_projection = True, subspace_dimension = subspace_dimension, distance_function = str(distance_function), is_distance_function = is_distance_function, uses_variances = uses_variances, **kwargs ) `````` Manuel Günther committed May 05, 2015 62 63 64 65 66 67 68 `````` self.subspace_dim = subspace_dimension self.machine = None self.distance_function = distance_function self.factor = -1. if is_distance_function else 1. self.uses_variances = uses_variances `````` Manuel Günther committed May 06, 2015 69 `````` def _check_feature(self, feature, projected=False): `````` Manuel Günther committed May 06, 2015 70 `````` """Checks that the features are appropriate""" `````` Manuel Günther committed May 13, 2015 71 `````` if not isinstance(feature, numpy.ndarray) or feature.ndim != 1 or feature.dtype != numpy.float64: `````` Manuel Günther committed May 05, 2015 72 `````` raise ValueError("The given feature is not appropriate") `````` Manuel Günther committed May 06, 2015 73 74 75 `````` index = 1 if projected else 0 if self.machine is not None and feature.shape[0] != self.machine.shape[index]: raise ValueError("The given feature is expected to have %d elements, but it has %d" % (self.machine.shape[index], feature.shape[0])) `````` Manuel Günther committed May 04, 2015 76 77 78 `````` def train_projector(self, training_features, projector_file): `````` 79 80 81 82 83 84 85 86 87 88 `````` """Generates the PCA covariance matrix and writes it into the given projector_file. **Parameters:** training_features : [1D :py:class:`numpy.ndarray`] A list of 1D training arrays (vectors) to train the PCA projection matrix with. projector_file : str A writable file, into which the PCA projection matrix (as a :py:class:`bob.learn.linear.Machine`) and the eigenvalues will be written. """ `````` Manuel Günther committed May 05, 2015 89 90 `````` # Assure that all data are 1D [self._check_feature(feature) for feature in training_features] `````` Manuel Günther committed May 04, 2015 91 `````` `````` Manuel Günther committed May 05, 2015 92 93 `````` # Initializes the data data = numpy.vstack(training_features) `````` Manuel Günther committed May 04, 2015 94 95 `````` logger.info(" -> Training LinearMachine using PCA") t = bob.learn.linear.PCATrainer() `````` Manuel Günther committed May 05, 2015 96 `````` self.machine, self.variances = t.train(data) `````` Manuel Günther committed May 04, 2015 97 `````` # For re-shaping, we need to copy... `````` Manuel Günther committed May 05, 2015 98 `````` self.variances = self.variances.copy() `````` Manuel Günther committed May 04, 2015 99 100 `````` # compute variance percentage, if desired `````` Manuel Günther committed May 05, 2015 101 102 `````` if isinstance(self.subspace_dim, float): cummulated = numpy.cumsum(self.variances) / numpy.sum(self.variances) `````` Manuel Günther committed May 04, 2015 103 `````` for index in range(len(cummulated)): `````` Manuel Günther committed May 05, 2015 104 105 `````` if cummulated[index] > self.subspace_dim: self.subspace_dim = index `````` Manuel Günther committed May 04, 2015 106 `````` break `````` Manuel Günther committed May 05, 2015 107 108 `````` self.subspace_dim = index logger.info(" ... Keeping %d PCA dimensions", self.subspace_dim) `````` Manuel Günther committed May 04, 2015 109 `````` # re-shape machine `````` Manuel Günther committed May 05, 2015 110 111 `````` self.machine.resize(self.machine.shape[0], self.subspace_dim) self.variances.resize(self.subspace_dim) `````` Manuel Günther committed May 04, 2015 112 113 `````` f = bob.io.base.HDF5File(projector_file, "w") `````` Manuel Günther committed May 05, 2015 114 `````` f.set("Eigenvalues", self.variances) `````` Manuel Günther committed May 04, 2015 115 116 `````` f.create_group("Machine") f.cd("/Machine") `````` Manuel Günther committed May 05, 2015 117 `````` self.machine.save(f) `````` Manuel Günther committed May 04, 2015 118 119 120 `````` def load_projector(self, projector_file): `````` 121 122 123 124 125 126 127 `````` """Reads the PCA projection matrix and the eigenvalues from file. **Parameters:** projector_file : str An existing file, from which the PCA projection matrix and the eigenvalues are read. """ `````` Manuel Günther committed May 04, 2015 128 129 `````` # read PCA projector f = bob.io.base.HDF5File(projector_file) `````` Manuel Günther committed May 05, 2015 130 `````` self.variances = f.read("Eigenvalues") `````` Manuel Günther committed May 04, 2015 131 `````` f.cd("/Machine") `````` Manuel Günther committed May 05, 2015 132 133 `````` self.machine = bob.learn.linear.Machine(f) `````` Manuel Günther committed May 04, 2015 134 135 `````` def project(self, feature): `````` 136 137 138 139 140 141 142 143 144 145 146 147 148 149 `````` """project(feature) -> projected Projects the given feature into eigenspace. **Parameters:** feature : 1D :py:class:`numpy.ndarray` The 1D feature to be projected. **Returns:** projected : 1D :py:class:`numpy.ndarray` The ``feature`` projected into eigenspace. """ `````` Manuel Günther committed May 05, 2015 150 `````` self._check_feature(feature) `````` Manuel Günther committed May 04, 2015 151 `````` # Projects the data `````` Manuel Günther committed May 05, 2015 152 153 `````` return self.machine(feature) `````` Manuel Günther committed May 04, 2015 154 155 `````` def enroll(self, enroll_features): `````` 156 157 158 159 160 161 162 163 164 165 166 167 168 169 `````` """enroll(enroll_features) -> model Enrolls the model by storing all given input vectors. **Parameters:** enroll_features : [1D :py:class:`numpy.ndarray`] The list of projected features to enroll the model from. **Returns:** model : 2D :py:class:`numpy.ndarray` The enrolled model. """ `````` Manuel Günther committed May 04, 2015 170 `````` assert len(enroll_features) `````` Manuel Günther committed May 06, 2015 171 `````` [self._check_feature(feature, True) for feature in enroll_features] `````` Manuel Günther committed May 04, 2015 172 `````` # just store all the features `````` Manuel Günther committed May 05, 2015 173 `````` return numpy.vstack(enroll_features) `````` Manuel Günther committed May 04, 2015 174 175 176 `````` def score(self, model, probe): `````` 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 `````` """score(model, probe) -> float Computes the distance of the model to the probe using the distance function specified in the constructor. **Parameters:** model : 2D :py:class:`numpy.ndarray` The model storing all enrollment features. probe : 1D :py:class:`numpy.ndarray` The probe feature vector in eigenspace. **Returns:** score : float A similarity value between ``model`` and ``probe`` """ `````` Manuel Günther committed May 06, 2015 195 `````` self._check_feature(probe, True) `````` Manuel Günther committed May 04, 2015 196 197 198 199 `````` # return the negative distance (as a similarity measure) if len(model.shape) == 2: # we have multiple models, so we use the multiple model scoring return self.score_for_multiple_models(model, probe) `````` Manuel Günther committed May 05, 2015 200 `````` elif self.uses_variances: `````` Manuel Günther committed May 04, 2015 201 `````` # single model, single probe (multiple probes have already been handled) `````` Manuel Günther committed May 05, 2015 202 `````` return self.factor * self.distance_function(model, probe, self.variances) `````` Manuel Günther committed May 04, 2015 203 204 `````` else: # single model, single probe (multiple probes have already been handled) `````` Manuel Günther committed May 05, 2015 205 `````` return self.factor * self.distance_function(model, probe) `````` Manuel Günther committed Jun 23, 2015 206 207 208 209 `````` # re-define unused functions, just so that they do not get documented def train_enroller(): raise NotImplementedError() def load_enroller(): raise NotImplementedError()``````