PCA.py 7.08 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
#!/usr/bin/env python
# vim: set fileencoding=utf-8 :
# Manuel Guenther <Manuel.Guenther@idiap.ch>

import bob.learn.linear
import bob.io.base

import numpy
import scipy.spatial

from .Algorithm import Algorithm

import logging
logger = logging.getLogger("bob.bio.base")

class PCA (Algorithm):
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
  """Performs PCA on the given data.

  This algorithm computes a PCA projection (:py:class:`bob.learn.linear.PCATrainer`) on the given training features, projects the features to face space and computes the distance of two projected features in face space.
  For eaxmple, the eigenface algorithm as proposed by [TP91]_ can be run with this class.

  **Parameters:**

  subspace_dimension : int or float
    If specified as ``int``, defines the number of eigenvectors used in the PCA projection matrix.
    If specified as ``float`` (between 0 and 1), the number of eigenvectors is calculated such that the given percentage of variance is kept.

  distance_function : function
    A function taking two parameters and returns a float.
    If ``uses_variances`` is set to ``True``, the function is provided with a third parameter, which is the vector of variances (aka. eigenvalues).

  is_distance_function : bool
    Set this flag to ``False`` if the given ``distance_function`` computes a similarity value (i.e., higher values are better)

  use_variances : bool
    If set to ``True``, the ``distance_function`` is provided with a third argument, which is the vector of variances (aka. eigenvalues).

  """
39
40
41
42
43
44
45
46
47
48

  def __init__(
      self,
      subspace_dimension,  # if int, number of subspace dimensions; if float, percentage of variance to keep
      distance_function = scipy.spatial.distance.euclidean,
      is_distance_function = True,
      uses_variances = False,
      **kwargs  # parameters directly sent to the base class
  ):

49
    # call base class constructor and register that the algorithm performs a projection
50
51
52
53
54
55
56
57
58
59
60
61
    Algorithm.__init__(
        self,
        performs_projection = True,

        subspace_dimension = subspace_dimension,
        distance_function = str(distance_function),
        is_distance_function = is_distance_function,
        uses_variances = uses_variances,

        **kwargs
    )

Manuel Günther's avatar
Manuel Günther committed
62
63
64
65
66
67
68
    self.subspace_dim = subspace_dimension
    self.machine = None
    self.distance_function = distance_function
    self.factor = -1. if is_distance_function else 1.
    self.uses_variances = uses_variances


69
  def _check_feature(self, feature, projected=False):
70
    """Checks that the features are appropriate"""
71
    if not isinstance(feature, numpy.ndarray) or feature.ndim != 1 or feature.dtype != numpy.float64:
Manuel Günther's avatar
Manuel Günther committed
72
      raise ValueError("The given feature is not appropriate")
73
74
75
    index = 1 if projected else 0
    if self.machine is not None and feature.shape[0] != self.machine.shape[index]:
      raise ValueError("The given feature is expected to have %d elements, but it has %d" % (self.machine.shape[index], feature.shape[0]))
76
77
78


  def train_projector(self, training_features, projector_file):
79
80
81
82
83
84
85
86
87
88
    """Generates the PCA covariance matrix and writes it into the given projector_file.

    **Parameters:**

    training_features : [1D :py:class:`numpy.ndarray`]
      A list of 1D training arrays (vectors) to train the PCA projection matrix with.

    projector_file : str
      A writable file, into which the PCA projection matrix (as a :py:class:`bob.learn.linear.Machine`) and the eigenvalues will be written.
    """
Manuel Günther's avatar
Manuel Günther committed
89
90
    # Assure that all data are 1D
    [self._check_feature(feature) for feature in training_features]
91

Manuel Günther's avatar
Manuel Günther committed
92
93
    # Initializes the data
    data = numpy.vstack(training_features)
94
95
    logger.info("  -> Training LinearMachine using PCA")
    t = bob.learn.linear.PCATrainer()
Manuel Günther's avatar
Manuel Günther committed
96
    self.machine, self.variances = t.train(data)
97
    # For re-shaping, we need to copy...
Manuel Günther's avatar
Manuel Günther committed
98
    self.variances = self.variances.copy()
99
100

    # compute variance percentage, if desired
Manuel Günther's avatar
Manuel Günther committed
101
102
    if isinstance(self.subspace_dim, float):
      cummulated = numpy.cumsum(self.variances) / numpy.sum(self.variances)
103
      for index in range(len(cummulated)):
Manuel Günther's avatar
Manuel Günther committed
104
105
        if cummulated[index] > self.subspace_dim:
          self.subspace_dim = index
106
          break
Manuel Günther's avatar
Manuel Günther committed
107
108
      self.subspace_dim = index
    logger.info("    ... Keeping %d PCA dimensions", self.subspace_dim)
109
    # re-shape machine
Manuel Günther's avatar
Manuel Günther committed
110
111
    self.machine.resize(self.machine.shape[0], self.subspace_dim)
    self.variances.resize(self.subspace_dim)
112
113

    f = bob.io.base.HDF5File(projector_file, "w")
Manuel Günther's avatar
Manuel Günther committed
114
    f.set("Eigenvalues", self.variances)
115
116
    f.create_group("Machine")
    f.cd("/Machine")
Manuel Günther's avatar
Manuel Günther committed
117
    self.machine.save(f)
118
119
120


  def load_projector(self, projector_file):
121
122
123
124
125
126
127
    """Reads the PCA projection matrix and the eigenvalues from file.

    **Parameters:**

    projector_file : str
      An existing file, from which the PCA projection matrix and the eigenvalues are read.
    """
128
129
    # read PCA projector
    f = bob.io.base.HDF5File(projector_file)
Manuel Günther's avatar
Manuel Günther committed
130
    self.variances = f.read("Eigenvalues")
131
    f.cd("/Machine")
Manuel Günther's avatar
Manuel Günther committed
132
133
    self.machine = bob.learn.linear.Machine(f)

134
135

  def project(self, feature):
136
137
138
139
140
141
142
143
144
145
146
147
148
149
    """project(feature) -> projected

    Projects the given feature into eigenspace.

    **Parameters:**

    feature : 1D :py:class:`numpy.ndarray`
      The 1D feature to be projected.

    **Returns:**

    projected : 1D :py:class:`numpy.ndarray`
      The ``feature`` projected into eigenspace.
    """
Manuel Günther's avatar
Manuel Günther committed
150
    self._check_feature(feature)
151
    # Projects the data
Manuel Günther's avatar
Manuel Günther committed
152
153
    return self.machine(feature)

154
155

  def enroll(self, enroll_features):
156
157
158
159
160
161
162
163
164
165
166
167
168
169
    """enroll(enroll_features) -> model

    Enrolls the model by storing all given input vectors.

    **Parameters:**

    enroll_features : [1D :py:class:`numpy.ndarray`]
      The list of projected features to enroll the model from.

    **Returns:**

    model : 2D :py:class:`numpy.ndarray`
      The enrolled model.
    """
170
    assert len(enroll_features)
171
    [self._check_feature(feature, True) for feature in enroll_features]
172
    # just store all the features
Manuel Günther's avatar
Manuel Günther committed
173
    return numpy.vstack(enroll_features)
174
175
176


  def score(self, model, probe):
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
    """score(model, probe) -> float

    Computes the distance of the model to the probe using the distance function specified in the constructor.

    **Parameters:**

    model : 2D :py:class:`numpy.ndarray`
      The model storing all enrollment features.

    probe : 1D :py:class:`numpy.ndarray`
      The probe feature vector in eigenspace.

    **Returns:**

    score : float
      A similarity value between ``model`` and ``probe``

    """
195
    self._check_feature(probe, True)
196
197
198
199
    # return the negative distance (as a similarity measure)
    if len(model.shape) == 2:
      # we have multiple models, so we use the multiple model scoring
      return self.score_for_multiple_models(model, probe)
Manuel Günther's avatar
Manuel Günther committed
200
    elif self.uses_variances:
201
      # single model, single probe (multiple probes have already been handled)
Manuel Günther's avatar
Manuel Günther committed
202
      return self.factor * self.distance_function(model, probe, self.variances)
203
204
    else:
      # single model, single probe (multiple probes have already been handled)
Manuel Günther's avatar
Manuel Günther committed
205
      return self.factor * self.distance_function(model, probe)
Manuel Günther's avatar
Manuel Günther committed
206
207
208
209

  # re-define unused functions, just so that they do not get documented
  def train_enroller(): raise NotImplementedError()
  def load_enroller(): raise NotImplementedError()