From 8dbf58447d3d3be2ad07f73760073f8352a5c5e6 Mon Sep 17 00:00:00 2001
From: Tiago Freitas Pereira <tiagofrepereira@gmail.com>
Date: Tue, 10 Mar 2020 15:30:38 +0100
Subject: [PATCH] Base code for the comparator

---
 .../vanilla_biometrics/comparator.py          | 269 ++++++++++++++++++
 .../pipelines/vanilla_biometrics/pipeline.py  |   2 -
 bob/bio/base/test/test_vanilla_biometrics.py  | 109 +++++++
 3 files changed, 378 insertions(+), 2 deletions(-)
 create mode 100644 bob/bio/base/pipelines/vanilla_biometrics/comparator.py
 create mode 100644 bob/bio/base/test/test_vanilla_biometrics.py

diff --git a/bob/bio/base/pipelines/vanilla_biometrics/comparator.py b/bob/bio/base/pipelines/vanilla_biometrics/comparator.py
new file mode 100644
index 00000000..9dafd257
--- /dev/null
+++ b/bob/bio/base/pipelines/vanilla_biometrics/comparator.py
@@ -0,0 +1,269 @@
+#!/usr/bin/env python
+# vim: set fileencoding=utf-8 :
+# @author: Tiago de Freitas Pereira <tiago.pereira@idiap.ch>
+# @author: Andre Anjos <andre.anjos@idiap.ch>
+
+from bob.pipelines.sample import Sample, SampleSet, DelayedSample
+import numpy
+
+
+class Comparator(object):
+    """Describes a base biometric comparator for the Vanilla Biometrics Pipeline :ref:`_bob.bio.base.struct_bio_rec_sys`_.
+
+    biometric model enrollement, via ``enroll()`` and scoring, with
+    ``score()``.
+
+    """
+
+    def __init__(self):
+        pass
+
+    def _stack_samples_2_ndarray(self, samplesets, stack_per_sampleset=False):
+        """
+        Stack a set of :py:class:`bob.pipelines.sample.sample.SampleSet`
+        and convert them to :py:class:`numpy.ndarray`
+
+        Parameters
+        ----------
+
+            samplesets: :py:class:`bob.pipelines.sample.sample.SampleSet`
+                         Set of samples to be stackted
+
+            stack_per_sampleset: bool
+                If true will return a list of :py:class:`numpy.ndarray`, each one for a sample set
+
+        """
+
+        if stack_per_sampleset:
+            # TODO: Make it more efficient
+            all_data = []
+            for sampleset in samplesets:
+                all_data.append(
+                    numpy.array([sample.data for sample in sampleset.samples])
+                )
+            return all_data
+        else:
+            return numpy.array(
+                [
+                    sample.data
+                    for sampleset in samplesets
+                    for sample in sampleset.samples
+                ]
+            )
+
+
+    def enroll_samples(
+        self, references, background_model=None, checkpoint=None, *args, **kwargs
+    ):
+        """This method should implement the sub-pipeline 1 of the Vanilla Biometrics Pipeline :ref:`_vanilla-pipeline-1`.
+
+        It handles the creation of biometric references
+
+        Parameters
+        ----------
+            references : list
+                A list of :py:class:`SampleSet` objects to be used for
+                creating biometric references.  The sets must be identified
+                with a unique id and a path, for eventual checkpointing.
+
+            background_model : 
+                Object containing the background model
+
+            checkpoint : str, None
+                If passed and not ``None``, then it is considered to be the
+                path of a directory containing possible cached values for each
+                of the references in this experiment.  If that is the case, the
+                values are loaded from there and not recomputed.
+
+            *args, **kwargs :
+                Extra parameters that can be used to hook-up processing graph
+                dependencies, but are currently ignored
+
+        """
+
+        retval = []
+        for k in references:
+            # compute on-the-fly            
+            data = [s.data for s in k.samples]
+            retval.append(Sample(self.enroll(data), parent=k))
+
+        return retval
+
+    def write_biometric_reference(self, biometric_reference, filename):
+        """Writes the enrolled model to the given file.
+        In this base class implementation:
+
+        - If the given model has a 'save' attribute, it calls ``model.save(bob.io.base.HDF5File(model_file), 'w')``.
+          In this case, the given model_file might be either a file name or a :py:class:`bob.io.base.HDF5File`.
+        - Otherwise, it uses :py:func:`bob.io.base.save` to do that.
+
+        If you have a different format, please overwrite this function.
+
+        **Parameters:**
+
+        model : object
+          A model as returned by the :py:meth:`enroll` function, which should be written.
+
+        model_file : str or :py:class:`bob.io.base.HDF5File`
+          The file open for writing, or the file name to write to.
+        """
+        import h5py
+
+        with h5py.File(filename, "w") as f:
+            f.create_dataset("biometric_reference", data=biometric_reference)
+
+    def read_biometric_reference(self, filename):
+        import h5py
+
+        with h5py.File(filename, "r") as f:
+            data = f["biometric_reference"].value
+        return data
+
+    def enroll(self, data,  **kwargs):
+        """
+        It handles the creation of ONE biometric reference for the vanilla ppipeline
+
+        Parameters
+        ----------
+
+            data:
+                Data used for the creation of ONE BIOMETRIC REFERENCE        
+
+        """
+
+        raise NotImplemented("Please, implement me")
+
+
+    def score_samples(self, probes, references, background_model=None, *args, **kwargs):
+        """Scores a new sample against multiple (potential) references
+
+        Parameters
+        ----------
+
+            probes : list
+                A list of :py:class:`SampleSet` objects to be used for
+                scoring the input references
+
+            references : list
+                A list of :py:class:`Sample` objects to be used for
+                scoring the input probes, must have an ``id`` attribute that
+                will be used to cross-reference which probes need to be scored.
+
+            background_model : 
+                Path pointing to stored model on disk
+
+            *args, **kwargs :
+                Extra parameters that can be used to hook-up processing graph
+                dependencies, but are currently ignored
+
+
+        Returns
+        -------
+
+            scores : list
+                For each sample in a probe, returns as many scores as there are
+                samples in the probe, together with the probe's and the
+                relevant reference's subject identifiers.
+
+        """
+
+        retval = []
+        for p in probes:
+            #data = numpy.vstack([s for s in p.samples])
+            data = [s.data for s in p.samples]
+
+
+            for subprobe_id, (s, parent) in enumerate(zip(data, p.samples)):
+                # each sub-probe in the probe needs to be checked
+                subprobe_scores = []
+                for ref in [r for r in references if r.key in p.references]:
+                    subprobe_scores.append(
+                        Sample(self.score(ref.data, s), parent=ref)
+                    )
+                subprobe = SampleSet(subprobe_scores, parent=p)
+                subprobe.subprobe_id = subprobe_id
+                retval.append(subprobe)
+        return retval
+
+
+    def score(self, biometric_reference, data,  **kwargs):
+        """It handles the score computation for one sample
+
+        Parameters
+        ----------
+
+            biometric_reference : list
+                Biometric reference to be compared
+
+            data : list
+                Data to be compared
+
+        Returns
+        -------
+
+            scores : list
+                For each sample in a probe, returns as many scores as there are
+                samples in the probe, together with the probe's and the
+                relevant reference's subject identifiers.
+
+        """
+        raise NotImplemented("Please, implement me")
+
+
+import scipy.spatial.distance
+from sklearn.utils.validation import check_array
+class DistanceComparator(Comparator):
+
+    def __init__(self,distance_function = scipy.spatial.distance.euclidean,factor=1):
+
+        self.distance_function = distance_function
+        self.factor = factor
+
+
+    def enroll(self, enroll_features,  **kwargs):
+        """enroll(enroll_features) -> model
+
+        Enrolls the model by storing all given input vectors.
+
+        Parameters:
+        -----------
+
+        ``enroll_features`` : [:py:class:`numpy.ndarray`]
+          The list of projected features to enroll the model from.
+
+        Returns:
+        --------
+
+        ``model`` : 2D :py:class:`numpy.ndarray`
+	      The enrolled model.
+        """
+
+        enroll_features = check_array(enroll_features, allow_nd=True)
+
+        return numpy.mean(enroll_features, axis=0)
+
+
+    def score(self, model, probe,  **kwargs):
+        """score(model, probe) -> float
+
+        Computes the distance of the model to the probe using the distance function specified in the constructor.
+
+        Parameters:
+	    -----------
+
+        ``model`` : 2D :py:class:`numpy.ndarray`
+          The model storing all enrollment features
+
+        ``probe`` : :py:class:`numpy.ndarray`
+          The probe feature vector
+
+        Returns:
+        --------
+
+        ``score`` : float
+          A similarity value between ``model`` and ``probe``
+        """
+
+        probe = probe.flatten()
+        # return the negative distance (as a similarity measure)
+        return self.factor * self.distance_function(model, probe)
diff --git a/bob/bio/base/pipelines/vanilla_biometrics/pipeline.py b/bob/bio/base/pipelines/vanilla_biometrics/pipeline.py
index 121608d1..767c1f64 100644
--- a/bob/bio/base/pipelines/vanilla_biometrics/pipeline.py
+++ b/bob/bio/base/pipelines/vanilla_biometrics/pipeline.py
@@ -364,5 +364,3 @@ def compute_scores(
     ## for each model and then associate them here.
     all_references = dask.delayed(list)(references)
     return db.map_partitions(algorithm.score, all_references, background_model, checkpoints.get("probes", {}).get("scores")  )
-
-
diff --git a/bob/bio/base/test/test_vanilla_biometrics.py b/bob/bio/base/test/test_vanilla_biometrics.py
new file mode 100644
index 00000000..08262242
--- /dev/null
+++ b/bob/bio/base/test/test_vanilla_biometrics.py
@@ -0,0 +1,109 @@
+#!/usr/bin/env python
+# vim: set fileencoding=utf-8 :
+# @author: Tiago de Freitas Pereira <tiago.pereira@idiap.ch>
+
+from bob.pipelines.sample import Sample, SampleSet, DelayedSample
+import os
+import numpy
+import tempfile
+from sklearn.utils.validation import check_is_fitted
+
+
+#from bob.bio.base.processor import Linearize, SampleLinearize, CheckpointSampleLinearize
+
+
+class DummyDatabase:
+
+    def __init__(self, delayed=False, n_references=10, n_probes=10, dim=10, one_d = True):
+        self.delayed = delayed
+        self.dim = dim
+        self.n_references = n_references
+        self.n_probes = n_probes
+        self.one_d = one_d
+
+
+    def _create_random_1dsamples(self, n_samples, offset, dim):        
+        return [ Sample(numpy.random.rand(dim), key=i) for i in range(offset,offset+n_samples) ]
+
+    def _create_random_2dsamples(self, n_samples, offset, dim):        
+        return [ Sample(numpy.random.rand(dim, dim), key=i) for i in range(offset,offset+n_samples) ]
+
+    def _create_random_sample_set(self, n_sample_set=10, n_samples=2):
+
+        # Just generate random samples
+        sample_set = [SampleSet(samples=[], key=i) for i in range(n_sample_set)]
+
+        offset = 0        
+        for s in sample_set:
+            if self.one_d:
+                s.samples = self._create_random_1dsamples(n_samples, offset, self.dim)
+            else:
+                s.samples = self._create_random_2dsamples(n_samples, offset, self.dim)
+
+            offset += n_samples
+            pass
+
+        return sample_set
+
+
+    def background_model_samples(self):
+        return self._create_random_sample_set()
+
+
+    def references(self):
+        return self._create_random_sample_set(self.n_references, self.dim)
+
+
+    def probes(self):
+        probes = self._create_random_sample_set(self.n_probes, self.dim)
+        for p in probes:
+            p.references = list(range(self.n_references))
+        return probes
+
+
+from bob.bio.base.pipelines.vanilla_biometrics.comparator import DistanceComparator
+def test_distance_comparator():
+    
+    n_references = 10
+    dim = 10
+    n_probes = 10
+    database = DummyDatabase(delayed=False, n_references=n_references, n_probes=n_probes, dim=10, one_d = True)
+    references = database.references()    
+    probes = database.probes()
+
+    pass
+
+    comparator = DistanceComparator()
+    references = comparator.enroll_samples(references)
+    assert len(references)== n_references
+    assert references[0].data.shape == (dim,)
+
+    probes = database.probes()
+    scores = comparator.score_samples(probes, references)
+    
+    assert len(scores) == n_probes*n_references
+    assert len(scores[0].samples)==n_references
+    
+    
+
+    ## Test the transformer only
+    #transformer = Linearize()
+    #X = numpy.zeros(shape=(10,10))
+    #X_tr = transformer.transform(X)
+    #assert X_tr.shape == (100,)
+
+
+    ## Test wrapped in to a Sample
+    #sample = Sample(X, key="1")
+    #transformer = SampleLinearize()
+    #X_tr = transformer.transform([sample])
+    #assert X_tr[0].data.shape == (100,)
+
+    ## Test checkpoint    
+    #with tempfile.TemporaryDirectory() as d:
+        #transformer = CheckpointSampleLinearize(features_dir=d)
+        #X_tr =  transformer.transform([sample])
+        #assert X_tr[0].data.shape == (100,)
+        #assert os.path.exists(os.path.join(d, "1.h5"))
+
+
-- 
GitLab