Changed the scoring function to dump one score per sampleset.

d458eba4 · Tiago de Freitas Pereira · c95c92d6 · d458eba4 · d458eba4 · d458eba4
Commit d458eba4 authored 4 years ago by Tiago de Freitas Pereira
--- a/bob/bio/base/pipelines/vanilla_biometrics/__init__.py
+++ b/bob/bio/base/pipelines/vanilla_biometrics/__init__.py
-from .pipelines import VanillaBiometricsPipeline
+from .pipelines import VanillaBiometricsPipeline, ZNormVanillaBiometricsPipeline
+
 from .biometric_algorithms import Distance
 from .score_writers import FourColumnsScoreWriter, CSVScoreWriter
 from .wrappers import BioAlgorithmCheckpointWrapper, BioAlgorithmDaskWrapper, dask_vanilla_biometrics

--- a/bob/bio/base/pipelines/vanilla_biometrics/abstract_classes.py
+++ b/bob/bio/base/pipelines/vanilla_biometrics/abstract_classes.py
@@ -5,6 +5,15 @@
 from abc import ABCMeta, abstractmethod
 from bob.pipelines.sample import Sample, SampleSet, DelayedSample
 import functools
+import numpy as np
+
+
+def average_scores(scores):
+    """
+    Given a :any:`numpy.ndarray` coming from multiple probes, 
+    average them
+    """
+    return np.mean(scores, axis=0)


 class BioAlgorithm(metaclass=ABCMeta):
@@ -13,10 +22,17 @@ class BioAlgorithm(metaclass=ABCMeta):
    biometric model enrollement, via ``enroll()`` and scoring, with
    ``score()``.

+    Parameters
+    ----------
+
+        score_reduction_operation: ``collections.callable``
+           Callable containing the score reduction function to be applied in the samples in a sampleset
+
    """

-    def __init__(self, **kwargs):
+    def __init__(self, score_reduction_operation=average_scores,**kwargs):
        self.stacked_biometric_references = None
+        self.score_reduction_operation = average_scores

    def enroll_samples(self, biometric_references):
        """This method should implement the sub-pipeline 1 of the Vanilla Biometrics Pipeline :ref:`_vanilla-pipeline-1`.
@@ -113,53 +129,63 @@ class BioAlgorithm(metaclass=ABCMeta):
        biometric_references,
        allow_scoring_with_all_biometric_references,
    ):
-        """Given a sampleset for probing, compute the scores and returns a sample set with the scores
+        """Given one sampleset for probing, compute the scores and returns a sample set with the scores
        """

-        # Stacking the samples from a sampleset
-        data = [s.data for s in sampleset.samples]
-
-        # Compute scores for each sample inside of the sample set
-        # TODO: In some cases we want to compute 1 score per sampleset (IJB-C)
-        # We should add an aggregator function here so we can properly aggregator samples from
-        # a sampleset either after or before scoring.
-        # To be honest, this should be the default behavior
-        retval = []
-        for subprobe_id, (s, parent) in enumerate(zip(data, sampleset.samples)):
-            # Creating one sample per comparison
-            subprobe_scores = []
+        scores_biometric_references = []
+        if allow_scoring_with_all_biometric_references:
+            # Optimized scoring
+            # This is useful when you scoring function can be compared with a
+            # static batch of biometric references
+            total_scores = []
+            for probe_sample in sampleset:

-            if allow_scoring_with_all_biometric_references:
                # Multiple scoring
                if self.stacked_biometric_references is None:
                    self.stacked_biometric_references = [
                        ref.data for ref in biometric_references
                    ]
                scores = self.score_multiple_biometric_references(
-                    self.stacked_biometric_references, s
+                    self.stacked_biometric_references, probe_sample.data
                )
-
-                # Wrapping the scores in samples
-                for ref, score in zip(biometric_references, scores):
-                    subprobe_scores.append(Sample(score, parent=ref))
-            else:
-
+                total_scores.append(scores)
+
+            # Reducing them
+            total_scores = self.score_reduction_operation(total_scores)
+
+            # Wrapping the scores in samples
+            for ref, score in zip(biometric_references, total_scores):
+                scores_biometric_references.append(Sample(score, parent=ref))
+
+        else:
+            # Non optimizing scoring
+            # There are some protocols where each probe has
+            # to be scored with a specific list of biometric_references
+            total_scores = []
+            for probe_sample in sampleset:
+                scores = []
                for ref in [
-                    r for r in biometric_references if r.key in sampleset.references
+                    r for r in biometric_references if str(r.subject) in sampleset.references
                ]:
-                    score = self.score(ref.data, s)
-                    subprobe_scores.append(Sample(score, parent=ref))
-
-            # Fetching metadata from the probe
-            kwargs = dict(
-                (metadata, sampleset.__dict__[metadata])
-                for metadata in sampleset.__dict__.keys()
-                if metadata not in ["samples", "key", "data", "load", "_data"]
-            )
-            subprobe = SampleSet(subprobe_scores, parent=parent, **kwargs)
-            retval.append(subprobe)
+                    scores.append(self.score(ref.data, probe_sample.data))
+                total_scores.append(scores)
+
+            total_scores = self.score_reduction_operation(np.array(total_scores))
+
+            for ref, score in zip([
+                r for r in biometric_references if str(r.subject) in sampleset.references
+            ], total_scores):
+
+                scores_biometric_references.append(Sample(score, parent=ref))
+
+        # Fetching metadata from the probe
+        kwargs = dict(
+            (metadata, sampleset.__dict__[metadata])
+            for metadata in sampleset.__dict__.keys()
+            if metadata not in ["samples", "key", "data", "load", "_data"]
+        )
+        return SampleSet(scores_biometric_references, parent=sampleset, **kwargs)

-        return retval

    @abstractmethod
    def score(self, biometric_reference, data):

--- a/bob/bio/base/pipelines/vanilla_biometrics/pipelines.py
+++ b/bob/bio/base/pipelines/vanilla_biometrics/pipelines.py
@@ -120,7 +120,7 @@ class VanillaBiometricsPipeline(object):
        biometric_reference_features = self.transformer.transform(
            biometric_reference_samples
        )
-        
+
        biometric_references = self.biometric_algorithm.enroll_samples(
            biometric_reference_features
        )
@@ -137,7 +137,7 @@ class VanillaBiometricsPipeline(object):

        # probes is a list of SampleSets
        probe_features = self.transformer.transform(probe_samples)
-        
+
        scores = self.biometric_algorithm.score_samples(
            probe_features,
            biometric_references,
@@ -146,3 +146,69 @@ class VanillaBiometricsPipeline(object):

        # scores is a list of Samples
        return scores
+
+
+class ZNormVanillaBiometricsPipeline(VanillaBiometricsPipeline):
+    def __init__(self, vanilla_biometrics_pipeline):
+        self.vanilla_biometrics_pipeline = vanilla_biometrics_pipeline
+
+    def __call__(
+        self,
+        background_model_samples,
+        biometric_reference_samples,
+        probe_samples,
+        zprobe_samples,
+        allow_scoring_with_all_biometric_references=False,
+    ):
+
+        self.transformer = self.train_background_model(background_model_samples)
+
+        # Create biometric samples
+        biometric_references = self.create_biometric_reference(
+            biometric_reference_samples
+        )
+
+        raw_scores = self.vanilla_biometrics_pipeline(
+            background_model_samples,
+            biometric_reference_samples,
+            probe_samples,
+            allow_scoring_with_all_biometric_references,
+        )
+
+        return self.compute_znorm_scores(
+            zprobe_samples, raw_scores, biometric_references
+        )
+
+    def train_background_model(self, background_model_samples):
+        return self.vanilla_biometrics_pipeline.train_background_model(
+            background_model_samples
+        )
+
+    def create_biometric_reference(self, biometric_reference_samples):
+        return self.vanilla_biometrics_pipeline.create_biometric_reference(
+            biometric_reference_samples
+        )
+
+    def compute_scores(
+        self,
+        probe_samples,
+        biometric_references,
+        allow_scoring_with_all_biometric_references=False,
+    ):
+
+        return self.vanilla_biometrics_pipeline.compute_scores(
+            probe_samples,
+            biometric_references,
+            allow_scoring_with_all_biometric_references,
+        )
+
+    def compute_znorm_scores(self, zprobe_samples, probe_scores, biometric_references):
+        
+        import ipdb; ipdb.set_trace()
+
+        z_scores = self.vanilla_biometrics_pipeline.compute_scores(
+            zprobe_samples, biometric_references
+        )
+
+
+        pass
--- a/bob/bio/base/pipelines/vanilla_biometrics/score_writers.py
+++ b/bob/bio/base/pipelines/vanilla_biometrics/score_writers.py
@@ -20,33 +20,30 @@ class FourColumnsScoreWriter(ScoreWriter):
        Write scores and returns a :any:`bob.pipelines.DelayedSample` containing
        the instruction to open the score file
        """
+
        os.makedirs(path, exist_ok=True)
        checkpointed_scores = []

-        for probe in probe_sampleset:
-
-            lines = [
-                "{0} {1} {2} {3}\n".format(
-                    biometric_reference.subject,
-                    probe.subject,
-                    probe.key,
-                    biometric_reference.data,
-                )
-                for biometric_reference in probe
-            ]
-            filename = os.path.join(path, str(probe.subject)) + ".txt"
-            open(filename, "w").writelines(lines)
-            checkpointed_scores.append(
-                SampleSet(
-                    [
-                        DelayedSample(
-                            functools.partial(self.read, filename), parent=probe
-                        )
-                    ],
-                    parent=probe,
-                )
+        lines = [
+            "{0} {1} {2} {3}\n".format(
+                biometric_reference.subject,
+                probe_sampleset.subject,
+                probe_sampleset.key,
+                biometric_reference.data,
            )
-        return checkpointed_scores
+            for biometric_reference in probe_sampleset
+        ]
+        filename = os.path.join(path, str(probe_sampleset.subject)) + ".txt"
+        open(filename, "w").writelines(lines)
+
+        return SampleSet(
+            [
+                DelayedSample(
+                    functools.partial(self.read, filename), parent=probe_sampleset
+                )
+            ],
+            parent=probe_sampleset,
+        )

    def read(self, path):
        """
@@ -54,16 +51,15 @@ class FourColumnsScoreWriter(ScoreWriter):
        """
        return open(path).readlines()

-    def concatenate_write_scores(self, samplesets_list, filename):
+    def concatenate_write_scores(self, samplesets, filename):
        """
        Given a list of samplsets, write them all in a single file
        """
        os.makedirs(os.path.dirname(filename), exist_ok=True)
        f = open(filename, "w")
-        for samplesets in samplesets_list:
-            for sset in samplesets:
-                for s in sset:
-                    f.writelines(s.data)
+        for sset in samplesets:
+            for scores in sset:
+                f.writelines(scores.data)


 class CSVScoreWriter(ScoreWriter):
@@ -115,40 +111,36 @@ class CSVScoreWriter(ScoreWriter):
        os.makedirs(path, exist_ok=True)
        checkpointed_scores = []

-        header, probe_dict, bioref_dict = create_csv_header(probe_sampleset[0])
+        header, probe_dict, bioref_dict = create_csv_header(probe_sampleset)

-        for probe in probe_sampleset:
-            filename = os.path.join(path, str(probe.subject)) + ".csv"
-            with open(filename, "w") as f:
+        filename = os.path.join(path, str(probe_sampleset.subject)) + ".csv"
+        with open(filename, "w") as f:

-                csv_write = csv.writer(f)
-                csv_write.writerow(header)
+            csv_write = csv.writer(f)
+            csv_write.writerow(header)

-                rows = []
-                probe_row = [str(probe.key)] + [
-                    str(probe.__dict__[k]) for k in probe_dict.keys()
+            rows = []
+            probe_row = [str(probe_sampleset.key)] + [
+                str(probe_sampleset.__dict__[k]) for k in probe_dict.keys()
+            ]
+
+            for biometric_reference in probe_sampleset:
+                bio_ref_row = [
+                    str(biometric_reference.__dict__[k])
+                    for k in list(bioref_dict.keys()) + ["data"]
                ]

-                for biometric_reference in probe:
-                    bio_ref_row = [
-                        str(biometric_reference.__dict__[k])
-                        for k in list(bioref_dict.keys()) + ["data"]
-                    ]
-
-                    rows.append(probe_row + bio_ref_row)
-
-                csv_write.writerows(rows)
-                checkpointed_scores.append(
-                    SampleSet(
-                        [
-                            DelayedSample(
-                                functools.partial(self.read, filename), parent=probe
-                            )
-                        ],
-                        parent=probe,
+                rows.append(probe_row + bio_ref_row)
+
+            csv_write.writerows(rows)
+            return SampleSet(
+                [
+                    DelayedSample(
+                        functools.partial(self.read, filename), parent=probe_sampleset
                    )
-                )
-        return checkpointed_scores
+                ],
+                parent=probe_sampleset,
+            )

    def read(self, path):
        """
@@ -156,7 +148,7 @@ class CSVScoreWriter(ScoreWriter):
        """
        return open(path).readlines()

-    def concatenate_write_scores(self, samplesets_list, filename):
+    def concatenate_write_scores(self, samplesets, filename):
        """
        Given a list of samplsets, write them all in a single file
        """
@@ -167,8 +159,8 @@ class CSVScoreWriter(ScoreWriter):
        base_dir = os.path.splitext(filename)[0]
        os.makedirs(base_dir, exist_ok=True)
        f = None
-        for i, samplesets in enumerate(samplesets_list):
-            if i% self.n_sample_sets==0:
+        for i, sset in enumerate(samplesets):
+            if i % self.n_sample_sets == 0:
                if f is not None:
                    f.close()
                    del f
@@ -176,10 +168,9 @@ class CSVScoreWriter(ScoreWriter):
                filename = os.path.join(base_dir, f"chunk_{i}.csv")
                f = open(filename, "w")

-            for sset in samplesets:
-                for s in sset:
-                    if i==0:
-                        f.writelines(s.data)
-                    else:
-                        f.writelines(s.data[1:])
-            samplesets_list[i] = None
\ No newline at end of file
+            for scores in sset:
+                if i == 0:
+                    f.writelines(scores.data)
+                else:
+                    f.writelines(scores.data[1:])
+            sset.samples = None
--- a/bob/bio/base/test/test_vanilla_biometrics.py
+++ b/bob/bio/base/test/test_vanilla_biometrics.py
@@ -61,10 +61,10 @@ class DummyDatabase:
            for i in range(offset, offset + n_samples)
        ]

-    def _create_random_sample_set(self, n_sample_set=10, n_samples=2):
+    def _create_random_sample_set(self, n_sample_set=10, n_samples=2, seed=10):

        # Just generate random samples
-        np.random.seed(10)
+        np.random.seed(seed)
        sample_set = [
            SampleSet(
                samples=[],
@@ -89,21 +89,30 @@ class DummyDatabase:
        return sample_set

    def background_model_samples(self):
-        samples = [sset.samples for sset in self._create_random_sample_set()]
+        samples = [sset.samples for sset in self._create_random_sample_set(seed=10)]
        return list(itertools.chain(*samples))

    def references(self):
-        return self._create_random_sample_set(self.n_references, self.dim)
+        return self._create_random_sample_set(self.n_references, self.dim, seed=11)

    def probes(self):
        probes = []

-        probes = self._create_random_sample_set(n_sample_set=10, n_samples=1)
+        probes = self._create_random_sample_set(n_sample_set=10, n_samples=1, seed=12)
        for p in probes:
-            p.references = list(range(self.n_references))
+            p.references = [str(r) for r in list(range(self.n_references))]

        return probes

+    def zprobes(self):
+        zprobes = []
+
+        zprobes = self._create_random_sample_set(n_sample_set=10, n_samples=1, seed=13)
+        for p in zprobes:
+            p.references = [str(r) for r in list(range(self.n_references))]
+
+        return zprobes
+
    @property
    def allow_scoring_with_all_biometric_references(self):
        return True
@@ -140,7 +149,7 @@ def test_on_memory():

    with tempfile.TemporaryDirectory() as dir_name:

-        def run_pipeline(with_dask):
+        def run_pipeline(with_dask, allow_scoring_with_all_biometric_references):
            database = DummyDatabase()

            transformer = _make_transformer(dir_name)
@@ -160,23 +169,24 @@ def test_on_memory():
                database.background_model_samples(),
                database.references(),
                database.probes(),
-                allow_scoring_with_all_biometric_references=database.allow_scoring_with_all_biometric_references,
+                allow_scoring_with_all_biometric_references=allow_scoring_with_all_biometric_references,
            )

            if with_dask:
                scores = scores.compute(scheduler="single-threaded")

            assert len(scores) == 10
-            for probe_ssets in scores:
-                for probe in probe_ssets:
-                    assert len(probe) == 10
+            for sample_scores in scores:
+                assert len(sample_scores) == 10
+                for score in sample_scores:
+                    assert isinstance(score.data, float)

-        run_pipeline(False)
-        run_pipeline(False)  # Testing checkpoint
+        run_pipeline(False, True)
+        run_pipeline(False, False)  # Testing checkpoint
        shutil.rmtree(dir_name)  # Deleting the cache so it runs again from scratch
        os.makedirs(dir_name, exist_ok=True)
-        run_pipeline(True)
-        run_pipeline(True)  # Testing checkpoint
+        run_pipeline(True, True)
+        run_pipeline(True, True)  # Testing checkpoint


 def test_checkpoint_bioalg_as_transformer():
@@ -207,6 +217,7 @@ def test_checkpoint_bioalg_as_transformer():
                database.probes(),
                allow_scoring_with_all_biometric_references=database.allow_scoring_with_all_biometric_references,
            )
+
            if with_dask:
                scores = scores.compute(scheduler="single-threaded")