Added first algorithm (PCA) and extractor (Linearize)

02b51788 · Manuel Günther · faad4b36 · 02b51788 · 02b51788 · 02b51788
Commit 02b51788 authored 10 years ago by Manuel Günther
--- a/bob/bio/base/algorithm/PCA.py
+++ b/bob/bio/base/algorithm/PCA.py
+#!/usr/bin/env python
+# vim: set fileencoding=utf-8 :
+# Manuel Guenther <Manuel.Guenther@idiap.ch>
+import bob.learn.linear
+import bob.io.base
+import numpy
+import scipy.spatial
+from .Algorithm import Algorithm
+import logging
+logger = logging.getLogger("bob.bio.base")
+class PCA (Algorithm):
+  """Tool for computing eigenfaces"""
+  def __init__(
+      self,
+      subspace_dimension,  # if int, number of subspace dimensions; if float, percentage of variance to keep
+      distance_function = scipy.spatial.distance.euclidean,
+      is_distance_function = True,
+      uses_variances = False,
+      **kwargs  # parameters directly sent to the base class
+  ):
+    """Initializes the PCA Algorithm with the given setup"""
+    # call base class constructor and register that the tool performs a projection
+    Algorithm.__init__(
+        self,
+        performs_projection = True,
+        subspace_dimension = subspace_dimension,
+        distance_function = str(distance_function),
+        is_distance_function = is_distance_function,
+        uses_variances = uses_variances,
+        **kwargs
+    )
+    self.m_subspace_dim = subspace_dimension
+    self.m_machine = None
+    self.m_distance_function = distance_function
+    self.m_factor = -1. if is_distance_function else 1.
+    self.m_uses_variances = uses_variances
+  def train_projector(self, training_features, projector_file):
+    """Generates the PCA covariance matrix"""
+    # Initializes the data
+    data = numpy.vstack([feature.flatten() for feature in training_features])
+    logger.info("  -> Training LinearMachine using PCA")
+    t = bob.learn.linear.PCATrainer()
+    self.m_machine, self.m_variances = t.train(data)
+    # For re-shaping, we need to copy...
+    self.m_variances = self.m_variances.copy()
+    # compute variance percentage, if desired
+    if isinstance(self.m_subspace_dim, float):
+      cummulated = numpy.cumsum(self.m_variances) / numpy.sum(self.m_variances)
+      for index in range(len(cummulated)):
+        if cummulated[index] > self.m_subspace_dim:
+          self.m_subspace_dim = index
+          break
+      self.m_subspace_dim = index
+    logger.info("    ... Keeping %d PCA dimensions", self.m_subspace_dim)
+    # re-shape machine
+    self.m_machine.resize(self.m_machine.shape[0], self.m_subspace_dim)
+    self.m_variances.resize(self.m_subspace_dim)
+    f = bob.io.base.HDF5File(projector_file, "w")
+    f.set("Eigenvalues", self.m_variances)
+    f.create_group("Machine")
+    f.cd("/Machine")
+    self.m_machine.save(f)
+  def load_projector(self, projector_file):
+    """Reads the PCA projection matrix from file"""
+    # read PCA projector
+    f = bob.io.base.HDF5File(projector_file)
+    self.m_variances = f.read("Eigenvalues")
+    f.cd("/Machine")
+    self.m_machine = bob.learn.linear.Machine(f)
+    # Allocates an array for the projected data
+    self.m_projected_feature = numpy.ndarray(self.m_machine.shape[1], numpy.float64)
+  def project(self, feature):
+    """Projects the data using the stored covariance matrix"""
+    # Projects the data
+    self.m_machine(feature, self.m_projected_feature)
+    # return the projected data
+    return self.m_projected_feature
+  def enroll(self, enroll_features):
+    """Enrolls the model by computing an average of the given input vectors"""
+    assert len(enroll_features)
+    # just store all the features
+    model = numpy.zeros((len(enroll_features), enroll_features[0].shape[0]), numpy.float64)
+    for n, feature in enumerate(enroll_features):
+      model[n,:] += feature[:]
+    # return enrolled model
+    return model
+  def score(self, model, probe):
+    """Computes the distance of the model to the probe using the distance function taken from the config file"""
+    # return the negative distance (as a similarity measure)
+    if len(model.shape) == 2:
+      # we have multiple models, so we use the multiple model scoring
+      return self.score_for_multiple_models(model, probe)
+    elif self.m_uses_variances:
+      # single model, single probe (multiple probes have already been handled)
+      return self.m_factor * self.m_distance_function(model, probe, self.m_variances)
+    else:
+      # single model, single probe (multiple probes have already been handled)
+      return self.m_factor * self.m_distance_function(model, probe)
--- a/bob/bio/base/algorithm/__init__.py
+++ b/bob/bio/base/algorithm/__init__.py
 from .Algorithm import Algorithm
+from .PCA import PCA
--- a/bob/bio/base/config/algorithm/pca.py
+++ b/bob/bio/base/config/algorithm/pca.py
+#!/usr/bin/env python
+import bob.bio.base
+import scipy.spatial
+algorithm = bob.bio.base.algorithm.PCA(
+    subspace_dimension = 30,
+    distance_function = scipy.spatial.distance.euclidean,
+    is_distance_function = True
+)
--- a/bob/bio/base/config/extractor/linearize.py
+++ b/bob/bio/base/config/extractor/linearize.py
+#!/usr/bin/env python
+import bob.bio.base
+# Linearization of the data to a vector, no data type specified
+extractor = bob.bio.base.extractor.Linearize()
--- a/bob/bio/base/extractor/Linearize.py
+++ b/bob/bio/base/extractor/Linearize.py
+#!/usr/bin/env python
+# vim: set fileencoding=utf-8 :
+# @author: Manuel Guenther <Manuel.Guenther@idiap.ch>
+# @date: Fri Oct 26 17:05:40 CEST 2012
+#
+# Copyright (C) 2011-2012 Idiap Research Institute, Martigny, Switzerland
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, version 3 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+from .Extractor import Extractor
+import numpy
+class Linearize (Extractor):
+  """Extracts features by simply concatenating all pixels of the data into one long vector"""
+  def __init__(self, dtype=None):
+    """If the ``dtype`` parameter is given, it specifies the data type that is enforced for the features."""
+    Extractor.__init__(self, dtype = dtype)
+    self.dtype = dtype
+  def __call__(self, data):
+    """Takes data of arbitrary dimensions and linearizes it into a 1D vector; enforcing the data type, if desired"""
+    linear = numpy.reshape(data, data.size)
+    if self.dtype is not None:
+      linear = linear.astype(self.dtype)
+    return linear
--- a/bob/bio/base/extractor/__init__.py
+++ b/bob/bio/base/extractor/__init__.py
@@ -5,3 +5,4 @@
 """Basic features for biometric recognition"""
 from .Extractor import Extractor
+from .Linearize import Linearize
--- a/bob/bio/base/test/test_extractor.py
+++ b/bob/bio/base/test/test_extractor.py
+import bob.bio.base
+import bob.io.base.test_utils
+import os
+import numpy
+from . import utils
+def test_linearize():
+  # load extractor
+  extractor = bob.bio.base.load_resource("linearize", "extractor")
+  # generate input
+  data = utils.random_training_set((10,10), 1)[0]
+  assert len(data.shape) == 2
+  # extract features
+  extracted = extractor(data)
+  assert len(extracted.shape) == 1
+  assert extracted.shape[0] == data.shape[0] * data.shape[1]
+  assert extracted.dtype == data.dtype
+  # test IO
+  filename = bob.io.base.test_utils.temporary_filename()
+  try:
+    extractor.write_feature(extracted, filename)
+    extracted2 = extractor.read_feature(filename)
+    assert (extracted == extracted2).all()
+  finally:
+    os.remove(filename)
+  # extract with dtype
+  extractor = bob.bio.base.extractor.Linearize(dtype=numpy.complex128)
+  extracted = extractor(data)
+  assert len(extracted.shape) == 1
+  assert extracted.shape[0] == data.shape[0] * data.shape[1]
+  assert extracted.dtype == numpy.complex128