From a8746ea4ee6791ef64a9990b1829b3bfe94b5e24 Mon Sep 17 00:00:00 2001
From: Tiago Freitas Pereira <tiagofrepereira@gmail.com>
Date: Sun, 25 Feb 2018 20:57:44 +0100
Subject: [PATCH] Appended the metadata in the algorithm

[sphinx] Documented the metadata info

Fixed issue in the projector metadata
---
 bob/bio/base/test/dummy/algorithm.py | 27 ++++++++++++++++++++++++-
 bob/bio/base/test/test_scripts.py    |  2 +-
 bob/bio/base/tools/algorithm.py      | 30 ++++++++++++++++++++++------
 bob/bio/base/tools/scoring.py        | 12 +++++++----
 doc/implementation.rst               | 21 +++++++++++++++++++
 setup.py                             |  1 +
 6 files changed, 81 insertions(+), 12 deletions(-)

diff --git a/bob/bio/base/test/dummy/algorithm.py b/bob/bio/base/test/dummy/algorithm.py
index d1d25d48..ab427bae 100644
--- a/bob/bio/base/test/dummy/algorithm.py
+++ b/bob/bio/base/test/dummy/algorithm.py
@@ -1,6 +1,6 @@
 import scipy.spatial
 import bob.io.base
-
+import numpy
 from bob.bio.base.algorithm import Algorithm
 
 _data = [5., 6., 7., 8., 9.]
@@ -57,3 +57,28 @@ class DummyAlgorithm (Algorithm):
     return scipy.spatial.distance.euclidean(model, probe)
 
 algorithm = DummyAlgorithm()
+
+
+class DummyAlgorithmMetadata (DummyAlgorithm):
+
+  def train_projector(self, train_files, projector_file, metadata=None):
+    """Does nothing, simply converts the data type of the data, ignoring any annotation."""
+    assert metadata is not None
+    return super(DummyAlgorithmMetadata, self).train_projector(train_files, projector_file)
+
+  def enroll(self, enroll_features, metadata=None):
+    # Cheking if the all the metadata are from the same client_id
+    assert numpy.alltrue([metadata[0].client_id == m.client_id for m in metadata])
+    #assert metadata is not None
+    return super(DummyAlgorithmMetadata, self).enroll(enroll_features)
+
+  def score(self, model, probe, metadata=None):
+    """Returns the Euclidean distance between model and probe"""
+    assert metadata is not None
+    return super(DummyAlgorithmMetadata, self).score(model, probe)
+
+  def project(self, feature, metadata=None):
+    assert metadata is not None
+    return super(DummyAlgorithmMetadata, self).project(feature)
+
+algorithm_metadata = DummyAlgorithmMetadata()
diff --git a/bob/bio/base/test/test_scripts.py b/bob/bio/base/test/test_scripts.py
index 60948798..072a431b 100644
--- a/bob/bio/base/test/test_scripts.py
+++ b/bob/bio/base/test/test_scripts.py
@@ -150,7 +150,7 @@ def test_verify_resources_metadata():
       '-d', 'dummy',
       '-p', 'dummy_metadata',
       '-e', 'dummy_metadata',
-      '-a', 'dummy',
+      '-a', 'dummy_metadata',
       '--zt-norm',
       '--allow-missing-files',
       '-vs', 'test_resource',
diff --git a/bob/bio/base/tools/algorithm.py b/bob/bio/base/tools/algorithm.py
index 06a5a1be..ff2d69ff 100644
--- a/bob/bio/base/tools/algorithm.py
+++ b/bob/bio/base/tools/algorithm.py
@@ -2,6 +2,7 @@ import bob.io.base
 import os
 
 import logging
+import inspect
 logger = logging.getLogger("bob.bio.base")
 
 from .FileSelector import FileSelector
@@ -45,6 +46,7 @@ def train_projector(algorithm, extractor, allow_missing_files = False, force = F
     bob.io.base.create_directories_safe(os.path.dirname(fs.projector_file))
     # train projector
     logger.info("- Projection: loading training data")
+
     train_files = fs.training_list('extracted', 'train_projector', arrange_by_client = algorithm.split_training_features_by_client)
     train_features = read_features(train_files, extractor, algorithm.split_training_features_by_client, allow_missing_files)
     if algorithm.split_training_features_by_client:
@@ -53,8 +55,11 @@ def train_projector(algorithm, extractor, allow_missing_files = False, force = F
       logger.info("- Projection: training projector '%s' using %d training files: ", fs.projector_file, len(train_files))
 
     # perform training
-    algorithm.train_projector(train_features, fs.projector_file)
-
+    if "metadata" in inspect.getargspec(algorithm.train_projector).args:
+      metadata = fs.database.training_files('train_projector', algorithm.split_training_features_by_client)
+      algorithm.train_projector(train_features, fs.projector_file, metadata=metadata)
+    else:
+      algorithm.train_projector(train_features, fs.projector_file)
 
 
 def project(algorithm, extractor, groups = None, indices = None, allow_missing_files = False, force = False):
@@ -99,6 +104,7 @@ def project(algorithm, extractor, groups = None, indices = None, allow_missing_f
 
   feature_files = fs.feature_list(groups=groups)
   projected_files = fs.projected_list(groups=groups)
+  metadata = fs.original_data_list(groups=groups)
 
   # select a subset of indices to iterate
   if indices is not None:
@@ -129,8 +135,12 @@ def project(algorithm, extractor, groups = None, indices = None, allow_missing_f
       bob.io.base.create_directories_safe(os.path.dirname(projected_file))
       # load feature
       feature = extractor.read_feature(feature_file)
+
       # project feature
-      projected = algorithm.project(feature)
+      if "metadata" in inspect.getargspec(algorithm.project).args:
+        projected = algorithm.project(feature, metadata=metadata)
+      else:
+        projected = algorithm.project(feature)
 
       if projected is None:
         if allow_missing_files:
@@ -264,8 +274,8 @@ def enroll(algorithm, extractor, compute_zt_norm, indices = None, groups = ['dev
         # Removes old file if required
         if not utils.check_file(model_file, force,
                                 algorithm.min_model_file_size):
-          enroll_files = fs.enroll_files(model_id, group, 'projected' if algorithm.use_projected_features_for_enrollment else 'extracted')
 
+          enroll_files = fs.enroll_files(model_id, group, 'projected' if algorithm.use_projected_features_for_enrollment else 'extracted')
           if allow_missing_files:
             enroll_files = utils.filter_missing_files(enroll_files)
             if not enroll_files:
@@ -280,7 +290,11 @@ def enroll(algorithm, extractor, compute_zt_norm, indices = None, groups = ['dev
           # load all files into memory
           enroll_features = [reader.read_feature(enroll_file) for enroll_file in enroll_files]
 
-          model = algorithm.enroll(enroll_features)
+          if "metadata" in inspect.getargspec(algorithm.enroll).args:
+            metadata = fs.database.enroll_files(group=group, model_id=model_id)
+            model = algorithm.enroll(enroll_features, metadata=metadata)
+          else:
+            model = algorithm.enroll(enroll_features)
 
           if model is None:
             if allow_missing_files:
@@ -327,7 +341,11 @@ def enroll(algorithm, extractor, compute_zt_norm, indices = None, groups = ['dev
           # load all files into memory
           t_enroll_features = [reader.read_feature(t_enroll_file) for t_enroll_file in t_enroll_files]
 
-          t_model = algorithm.enroll(t_enroll_features)
+          if "metadata" in inspect.getargspec(algorithm.enroll).args:
+            metadata = fs.database.enroll_files(group=group, model_id=t_model_id)
+            t_model = algorithm.enroll(t_enroll_features, metadata=metadata)
+          else:
+            t_model = algorithm.enroll(t_enroll_features)
 
           if t_model is None:
             if allow_missing_files:
diff --git a/bob/bio/base/tools/scoring.py b/bob/bio/base/tools/scoring.py
index 1649add9..58f5d817 100644
--- a/bob/bio/base/tools/scoring.py
+++ b/bob/bio/base/tools/scoring.py
@@ -5,12 +5,12 @@ import bob.measure
 import numpy
 import os, sys
 import tarfile
+import inspect
 
 import logging
 logger = logging.getLogger("bob.bio.base")
 
 from .FileSelector import FileSelector
-from .extractor import read_features
 from .. import utils
 
 def _scores(algorithm, reader, model, probe_objects, allow_missing_files):
@@ -28,12 +28,12 @@ def _scores(algorithm, reader, model, probe_objects, allow_missing_files):
     return scores
 
   # Loops over the probe sets
-  for i, probe_element in enumerate(probes):
+  for i, probe_element, probe_metadata in zip(range(len(probes)), probes, probe_objects):
     if fs.uses_probe_file_sets():
       assert isinstance(probe_element, list)
       # filter missing files
       if allow_missing_files:
-        probe_element = utils.filter_missing_files(probe_element)
+        probe_element = utils.filter_missing_files(probe_element, probe_objects)
         if not probe_element:
           # we keep the NaN score
           continue
@@ -48,7 +48,11 @@ def _scores(algorithm, reader, model, probe_objects, allow_missing_files):
       # read probe
       probe = reader.read_feature(probe_element)
       # compute score
-      scores[0,i] = algorithm.score(model, probe)
+      if "metadata" in inspect.getargspec(algorithm.score).args:
+        scores[0, i] = algorithm.score(model, probe, metadata=probe_metadata)
+      else:
+        scores[0, i] = algorithm.score(model, probe)
+
   # Returns the scores
   return scores
 
diff --git a/doc/implementation.rst b/doc/implementation.rst
index 55908a9a..cccdb118 100644
--- a/doc/implementation.rst
+++ b/doc/implementation.rst
@@ -45,6 +45,13 @@ All of them implement the following two functions:
   .. note::
      When the database does not provide annotations, the ``annotations`` parameter might be ``None``.
 
+  .. note::
+     If necessary, an instance of :py:class:`bob.bio.base.database.BioFile` can be passed to the preprocessor.
+     For that, the method ``__call__`` has a keyword called **metadata**.
+     If this keyword is set in its header, an instance of :py:class:`bob.bio.base.database.BioFile`
+     is shipped via this keyword argument.
+
+
 By default, the data returned by the preprocessor is of type :py:class:`numpy.ndarray`.
 In that case, the base class IO functionality can be used.
 If a class returns data that is **not** of type :py:class:`numpy.ndarray`, it overwrites further functions from :py:class:`bob.bio.base.preprocessor.Preprocessor` that define the IO of your class:
@@ -72,6 +79,13 @@ All extractor classes provide at least the functions:
 * ``__call__(self, data) -> feature``: Extracts the feature from the given preprocessed data.
   By default, the returned feature should be a :py:class:`numpy.ndarray`.
 
+  .. note::
+     If necessary, an instance of :py:class:`bob.bio.base.database.BioFile` can be passed to the extractor.
+     For that, the method ``__call__`` has a keyword called **metadata**.
+     If this keyword is set in its header, an instance of :py:class:`bob.bio.base.database.BioFile`
+     is shipped via this keyword argument.
+
+
 If features are not of type :py:class:`numpy.ndarray`, the ``write_feature`` function is overridden.
 In this case, also the function to read that kind of features needs to be overridden:
 
@@ -180,6 +194,13 @@ These two functions are:
 * ``score_for_multiple_models(self, models, probe)``: In case your model store several features, **call** this function to compute the average (or min, max, ...) of the scores.
 * ``score_for_multiple_probes(self, model, probes)``: By default, the average (or min, max, ...) of the scores for all probes are computed. **Override** this function in case you want different behavior.
 
+  .. note::
+     If necessary, an instance of :py:class:`bob.bio.base.database.BioFile` can be passed to the algorithm.
+     For that, the methods ``train_projector``, ``project``, ``enroll`` and ``score`` have a keyword called **metadata**.
+     If this keyword is set in its header, an instance of :py:class:`bob.bio.base.database.BioFile`
+     is shipped via this keyword argument.
+
+
 
 Implemented Tools
 -----------------
diff --git a/setup.py b/setup.py
index 4ddf3c93..d33d06d3 100644
--- a/setup.py
+++ b/setup.py
@@ -106,6 +106,7 @@ setup(
 
       'bob.bio.algorithm': [
         'dummy             = bob.bio.base.test.dummy.algorithm:algorithm', # for test purposes only
+        'dummy_metadata    = bob.bio.base.test.dummy.algorithm:algorithm_metadata',
         'distance-euclidean = bob.bio.base.config.algorithm.distance_euclidean:algorithm',
         'distance-cosine   = bob.bio.base.config.algorithm.distance_cosine:algorithm',
         'distance-hamming   = bob.bio.base.config.algorithm.distance_hamming:algorithm',
-- 
GitLab