From e92c046c5a2d447085077a406586457901f4448b Mon Sep 17 00:00:00 2001
From: Tiago Freitas Pereira <tiagofrepereira@gmail.com>
Date: Tue, 10 Mar 2020 15:30:00 +0100
Subject: [PATCH] Reverted to legacy code

---
 bob/bio/base/algorithm/PCA.py | 230 +++++++++++++++++++++++-----------
 1 file changed, 158 insertions(+), 72 deletions(-)

diff --git a/bob/bio/base/algorithm/PCA.py b/bob/bio/base/algorithm/PCA.py
index 425e0986..a856292d 100644
--- a/bob/bio/base/algorithm/PCA.py
+++ b/bob/bio/base/algorithm/PCA.py
@@ -1,20 +1,20 @@
 #!/usr/bin/env python
 # vim: set fileencoding=utf-8 :
-# Tiago de Freitas Pereira <tiago.pereira@idiap.ch>
+# Manuel Guenther <Manuel.Guenther@idiap.ch>
 
+import bob.learn.linear
+import bob.io.base
 
-from bob.bio.base.pipelines.vanilla_biometrics.blocks import VanillaBiometricsAlgoritm
-import sklearn.decomposition
-from scipy.spatial.distance import euclidean
 import numpy
+import scipy.spatial
 
-import logging
+from .Algorithm import Algorithm
 
+import logging
 logger = logging.getLogger("bob.bio.base")
 
-
-class PCA(VanillaBiometricsAlgoritm):
-    """Performs a principal component analysis (PCA) on the given data.
+class PCA (Algorithm):
+  """Performs a principal component analysis (PCA) on the given data.
 
   This algorithm computes a PCA projection (:py:class:`bob.learn.linear.PCATrainer`) on the given training features, projects the features to eigenspace and computes the distance of two projected features in eigenspace.
   For example, the eigenface algorithm as proposed by [TP91]_ can be run with this class.
@@ -29,95 +29,181 @@ class PCA(VanillaBiometricsAlgoritm):
     A function taking two parameters and returns a float.
     If ``uses_variances`` is set to ``True``, the function is provided with a third parameter, which is the vector of variances (aka. eigenvalues).
 
-  svd_solver: std
-    The way to solve the eigen value problem
+  is_distance_function : bool
+    Set this flag to ``False`` if the given ``distance_function`` computes a similarity value (i.e., higher values are better)
 
-  factor: float
-     Multiplication factor used for the scoring stage
+  use_variances : bool
+    If set to ``True``, the ``distance_function`` is provided with a third argument, which is the vector of variances (aka. eigenvalues).
 
   kwargs : ``key=value`` pairs
     A list of keyword arguments directly passed to the :py:class:`Algorithm` base class constructor.
   """
 
-    def __init__(
-        self,
-        subspace_dimension,  # if int, number of subspace dimensions; if float, percentage of variance to keep
-        distance_function=euclidean,
-        svd_solver="auto",
-        factor=-1,
-        **kwargs,  # parameters directly sent to the base class
-    ):
+  def __init__(
+      self,
+      subspace_dimension,  # if int, number of subspace dimensions; if float, percentage of variance to keep
+      distance_function = scipy.spatial.distance.euclidean,
+      is_distance_function = True,
+      uses_variances = False,
+      **kwargs  # parameters directly sent to the base class
+  ):
+
+    # call base class constructor and register that the algorithm performs a projection
+    super(PCA, self).__init__(
+        performs_projection = True,
+
+        subspace_dimension = subspace_dimension,
+        distance_function = str(distance_function),
+        is_distance_function = is_distance_function,
+        uses_variances = uses_variances,
+
+        **kwargs
+    )
+
+    self.subspace_dim = subspace_dimension
+    self.machine = None
+    self.distance_function = distance_function
+    self.factor = -1. if is_distance_function else 1.
+    self.uses_variances = uses_variances
+
+
+  def _check_feature(self, feature, projected=False):
+    """Checks that the features are appropriate"""
+    if not isinstance(feature, numpy.ndarray) or feature.ndim != 1 or feature.dtype != numpy.float64:
+      raise ValueError("The given feature is not appropriate")
+    index = 1 if projected else 0
+    if self.machine is not None and feature.shape[0] != self.machine.shape[index]:
+      raise ValueError("The given feature is expected to have %d elements, but it has %d" % (self.machine.shape[index], feature.shape[0]))
+
+
+  def train_projector(self, training_features, projector_file):
+    """Generates the PCA covariance matrix and writes it into the given projector_file.
+
+    **Parameters:**
+
+    training_features : [1D :py:class:`numpy.ndarray`]
+      A list of 1D training arrays (vectors) to train the PCA projection matrix with.
+
+    projector_file : str
+      A writable file, into which the PCA projection matrix (as a :py:class:`bob.learn.linear.Machine`) and the eigenvalues will be written.
+    """
+    # Assure that all data are 1D
+    [self._check_feature(feature) for feature in training_features]
+
+    # Initializes the data
+    data = numpy.vstack(training_features)
+    logger.info("  -> Training LinearMachine using PCA")
+    t = bob.learn.linear.PCATrainer()
+    self.machine, self.variances = t.train(data)
+    # For re-shaping, we need to copy...
+    self.variances = self.variances.copy()
+
+    # compute variance percentage, if desired
+    if isinstance(self.subspace_dim, float):
+      cummulated = numpy.cumsum(self.variances) / numpy.sum(self.variances)
+      for index in range(len(cummulated)):
+        if cummulated[index] > self.subspace_dim:
+          self.subspace_dim = index
+          break
+      self.subspace_dim = index
+    logger.info("    ... Keeping %d PCA dimensions", self.subspace_dim)
+    # re-shape machine
+    self.machine.resize(self.machine.shape[0], self.subspace_dim)
+    self.variances = numpy.resize(self.variances, (self.subspace_dim))
+
+    f = bob.io.base.HDF5File(projector_file, "w")
+    f.set("Eigenvalues", self.variances)
+    f.create_group("Machine")
+    f.cd("/Machine")
+    self.machine.save(f)
+
+
+  def load_projector(self, projector_file):
+    """Reads the PCA projection matrix and the eigenvalues from file.
+
+    **Parameters:**
+
+    projector_file : str
+      An existing file, from which the PCA projection matrix and the eigenvalues are read.
+    """
+    # read PCA projector
+    f = bob.io.base.HDF5File(projector_file)
+    self.variances = f.read("Eigenvalues")
+    f.cd("/Machine")
+    self.machine = bob.learn.linear.Machine(f)
 
-        # call base class constructor and register that the algorithm performs a projection
-        super(PCA, self).__init__(performs_projection=True)
 
-        self.subspace_dim = subspace_dimension
-        self.distance_function = distance_function
-        self.svd_solver = svd_solver
-        self.factor = -1
+  def project(self, feature):
+    """project(feature) -> projected
 
-    def fit(self, samplesets, checkpoints):
-        """
-        This method should implement the sub-pipeline 0 of the Vanilla Biometrics Pipeline :ref:`_vanilla-pipeline-0`.
+    Projects the given feature into eigenspace.
 
-        It represents the training of background models that an algorithm may need.
+    **Parameters:**
+
+    feature : 1D :py:class:`numpy.ndarray`
+      The 1D feature to be projected.
 
-        Parameters
-        ----------
+    **Returns:**
 
-            samplesets: :py:class:`bob.pipelines.sample.sample.SampleSet`
-                         Set of samples used to train a background model
+    projected : 1D :py:class:`numpy.ndarray`
+      The ``feature`` projected into eigenspace.
+    """
+    self._check_feature(feature)
+    # Projects the data
+    return self.machine(feature)
 
 
-            checkpoint: str
-                If provided, must the path leading to a location where this
-                model should be saved at (complete path without extension) -
-                currently, it needs to be provided because of existing
-                serialization requirements (see bob/bob.io.base#106), but
-                checkpointing will still work as expected.
-         
-        """
+  def enroll(self, enroll_features):
+    """enroll(enroll_features) -> model
 
-        pca = sklearn.decomposition.PCA(self.subspace_dim, svd_solver=self.svd_solver)
-        samples_array = self._stack_samples_2_ndarray(samplesets)
-        logger.info(
-            "Training PCA with samples of shape {0}".format(samples_array.shape)
-        )
-        pca.fit(samples_array)
+    Enrolls the model by storing all given input vectors.
 
-        # TODO: save the shit
+    **Parameters:**
 
-        return pca
+    enroll_features : [1D :py:class:`numpy.ndarray`]
+      The list of projected features to enroll the model from.
 
-    def project_one_sample(self, background_model, data):
-        if data.ndim == 1:
-            return background_model.transform(data.reshape(1, -1))
+    **Returns:**
 
-        return background_model.transform(data)
+    model : 2D :py:class:`numpy.ndarray`
+      The enrolled model.
+    """
+    assert len(enroll_features)
+    [self._check_feature(feature, True) for feature in enroll_features]
+    # just store all the features
+    return numpy.vstack(enroll_features)
 
-    def enroll_one_sample(self, data):
-        return numpy.mean(data, axis=0)
 
-    def score_one_sample(self, biometric_reference, data):
-        """It handles the score computation for one sample
+  def score(self, model, probe):
+    """score(model, probe) -> float
 
-        Parameters
-        ----------
+    Computes the distance of the model to the probe using the distance function specified in the constructor.
 
-            biometric_reference : list
-                Biometric reference to be compared
+    **Parameters:**
 
-            data : list
-                Data to be compared
+    model : 2D :py:class:`numpy.ndarray`
+      The model storing all enrollment features.
 
-        Returns
-        -------
+    probe : 1D :py:class:`numpy.ndarray`
+      The probe feature vector in eigenspace.
 
-            scores : list
-                For each sample in a probe, returns as many scores as there are
-                samples in the probe, together with the probe's and the
-                relevant reference's subject identifiers.
+    **Returns:**
 
-        """
+    score : float
+      A similarity value between ``model`` and ``probe``
+    """
+    self._check_feature(probe, True)
+    # return the negative distance (as a similarity measure)
+    if len(model.shape) == 2:
+      # we have multiple models, so we use the multiple model scoring
+      return self.score_for_multiple_models(model, probe)
+    elif self.uses_variances:
+      # single model, single probe (multiple probes have already been handled)
+      return self.factor * self.distance_function(model, probe, self.variances)
+    else:
+      # single model, single probe (multiple probes have already been handled)
+      return self.factor * self.distance_function(model, probe)
 
-        return self.factor * self.distance_function(biometric_reference, data)
+  # re-define unused functions, just so that they do not get documented
+  def train_enroller(*args,**kwargs): raise NotImplementedError()
+  def load_enroller(*args,**kwargs): pass
-- 
GitLab