From f1473028d3d24ed47c96a01cc4dccc52ac888a78 Mon Sep 17 00:00:00 2001
From: Tiago Freitas Pereira <tiagofrepereira@gmail.com>
Date: Wed, 6 May 2020 19:00:43 +0200
Subject: [PATCH] Implemented ZT-Normalization. More tests should be
 implemented.

---
 .../pipelines/vanilla_biometrics/__init__.py  |   2 +-
 .../vanilla_biometrics/abstract_classes.py    |   1 +
 .../pipelines/vanilla_biometrics/pipelines.py | 158 ++++++++++++++++--
 .../pipelines/vanilla_biometrics/wrappers.py  |  95 +++++++++--
 bob/bio/base/test/test_transformers.py        |   2 +-
 bob/bio/base/test/test_vanilla_biometrics.py  |  10 +-
 .../test_vanilla_biometrics_score_norm.py     |  75 +++++++++
 7 files changed, 313 insertions(+), 30 deletions(-)
 create mode 100644 bob/bio/base/test/test_vanilla_biometrics_score_norm.py

diff --git a/bob/bio/base/pipelines/vanilla_biometrics/__init__.py b/bob/bio/base/pipelines/vanilla_biometrics/__init__.py
index 66dfd01e..dcd7b9e9 100644
--- a/bob/bio/base/pipelines/vanilla_biometrics/__init__.py
+++ b/bob/bio/base/pipelines/vanilla_biometrics/__init__.py
@@ -1,4 +1,4 @@
-from .pipelines import VanillaBiometricsPipeline, ZNormVanillaBiometricsPipeline
+from .pipelines import VanillaBiometricsPipeline, ZTNormVanillaBiometricsPipeline
 
 from .biometric_algorithms import Distance
 from .score_writers import FourColumnsScoreWriter, CSVScoreWriter
diff --git a/bob/bio/base/pipelines/vanilla_biometrics/abstract_classes.py b/bob/bio/base/pipelines/vanilla_biometrics/abstract_classes.py
index 94dabb87..56c64bca 100644
--- a/bob/bio/base/pipelines/vanilla_biometrics/abstract_classes.py
+++ b/bob/bio/base/pipelines/vanilla_biometrics/abstract_classes.py
@@ -121,6 +121,7 @@ class BioAlgorithm(metaclass=ABCMeta):
                     allow_scoring_with_all_biometric_references=allow_scoring_with_all_biometric_references,
                 )
             )
+
         return retval
 
     def _score_sample_set(
diff --git a/bob/bio/base/pipelines/vanilla_biometrics/pipelines.py b/bob/bio/base/pipelines/vanilla_biometrics/pipelines.py
index 50326721..e8f430a0 100644
--- a/bob/bio/base/pipelines/vanilla_biometrics/pipelines.py
+++ b/bob/bio/base/pipelines/vanilla_biometrics/pipelines.py
@@ -11,6 +11,7 @@ for bob.bio experiments
 import logging
 import numpy
 from .score_writers import FourColumnsScoreWriter
+from .wrappers import BioAlgorithmZTNormWrapper
 
 
 logger = logging.getLogger(__name__)
@@ -107,7 +108,7 @@ class VanillaBiometricsPipeline(object):
         )
 
         # Scores all probes
-        scores = self.compute_scores(
+        scores, _ = self.compute_scores(
             probe_samples,
             biometric_references,
             allow_scoring_with_all_biometric_references,
@@ -160,15 +161,55 @@ class VanillaBiometricsPipeline(object):
         )
 
         # scores is a list of Samples
-        return scores
+        return scores, probe_features
 
     def write_scores(self, scores):
         return self.score_writer.write(scores)
 
 
-class ZNormVanillaBiometricsPipeline(VanillaBiometricsPipeline):
-    def __init__(self, vanilla_biometrics_pipeline):
+class ZTNormVanillaBiometricsPipeline(object):
+    """
+    Apply Z, T or ZT Score normalization on top of VanillaBiometric Pipeline
+
+    Reference bibliography from: A Generative Model for Score Normalization in Speaker Recognition
+    https://arxiv.org/pdf/1709.09868.pdf
+
+
+    Example
+    -------
+       >>> transformer = make_pipeline([])
+       >>> biometric_algorithm = Distance()
+       >>> vanilla_biometrics_pipeline = VanillaBiometricsPipeline(transformer, biometric_algorithm)
+       >>> zt_pipeline = ZTNormVanillaBiometricsPipeline(vanilla_biometrics_pipeline)
+       >>> zt_pipeline(...)
+
+    Parameters
+    ----------
+
+        vanilla_biometrics_pipeline: :any:`VanillaBiometricsPipeline`
+          An instance :any:`VanillaBiometricsPipeline` to the wrapped with score normalization
+
+        z_norm: bool
+          If True, applies ZScore normalization on top of raw scores.
+
+        t_norm: bool
+          If True, applies TScore normalization on top of raw scores.
+          If both, z_norm and t_norm are true, it applies score normalization
+
+    """
+
+
+    def __init__(self, vanilla_biometrics_pipeline, z_norm=True, t_norm=True):
         self.vanilla_biometrics_pipeline = vanilla_biometrics_pipeline
+        # Wrapping with ZTNorm
+        self.vanilla_biometrics_pipeline.biometric_algorithm = BioAlgorithmZTNormWrapper(
+            self.vanilla_biometrics_pipeline.biometric_algorithm
+        )
+        self.z_norm = z_norm
+        self.t_norm = t_norm
+
+        if not z_norm and not t_norm:
+            raise ValueError("Both z_norm and t_norm are False. No normalization will be applied")
 
     def __call__(
         self,
@@ -176,6 +217,7 @@ class ZNormVanillaBiometricsPipeline(VanillaBiometricsPipeline):
         biometric_reference_samples,
         probe_samples,
         zprobe_samples,
+        t_biometric_reference_samples,
         allow_scoring_with_all_biometric_references=False,
     ):
 
@@ -186,17 +228,45 @@ class ZNormVanillaBiometricsPipeline(VanillaBiometricsPipeline):
             biometric_reference_samples
         )
 
-        raw_scores = self.vanilla_biometrics_pipeline(
-            background_model_samples,
-            biometric_reference_samples,
+        raw_scores, probe_features = self.compute_scores(
             probe_samples,
+            biometric_references,
+            allow_scoring_with_all_biometric_references,
+        )
+
+        # Z NORM
+        if self.z_norm:
+            z_normed_scores, z_probe_features = self.compute_znorm_scores(
+                zprobe_samples,
+                raw_scores,
+                biometric_references,
+                allow_scoring_with_all_biometric_references,
+            )
+        if not self.t_norm:
+            return z_normed_scores
+
+        # T NORM
+        t_normed_scores, t_biometric_references = self.compute_tnorm_scores(
+            t_biometric_reference_samples,
+            probe_features,
+            raw_scores,
             allow_scoring_with_all_biometric_references,
         )
+        if not self.z_norm:
+            return t_normed_scores
 
-        return self.compute_znorm_scores(
-            zprobe_samples, raw_scores, biometric_references
+
+        # ZT NORM
+        zt_normed_scores = self.compute_ztnorm_scores(
+            z_probe_features,
+            t_biometric_references,
+            z_normed_scores,
+            t_normed_scores,
+            allow_scoring_with_all_biometric_references,
         )
 
+        return zt_normed_scores
+
     def train_background_model(self, background_model_samples):
         return self.vanilla_biometrics_pipeline.train_background_model(
             background_model_samples
@@ -220,10 +290,74 @@ class ZNormVanillaBiometricsPipeline(VanillaBiometricsPipeline):
             allow_scoring_with_all_biometric_references,
         )
 
-    def compute_znorm_scores(self, zprobe_samples, probe_scores, biometric_references):
+    def compute_znorm_scores(
+        self,
+        zprobe_samples,
+        probe_scores,
+        biometric_references,
+        allow_scoring_with_all_biometric_references=False,
+    ):
 
-        z_scores = self.vanilla_biometrics_pipeline.compute_scores(
+        z_scores, z_probe_features = self.compute_scores(
             zprobe_samples, biometric_references
         )
 
-        pass
+        z_normed_scores = self.vanilla_biometrics_pipeline.biometric_algorithm.compute_norm_scores(
+            z_scores, probe_scores, allow_scoring_with_all_biometric_references,
+        )
+
+        return z_normed_scores, z_probe_features
+
+    def compute_tnorm_scores(
+        self,
+        t_biometric_reference_samples,
+        probe_features,
+        probe_scores,
+        allow_scoring_with_all_biometric_references=False,
+    ):
+
+        t_biometric_references = self.create_biometric_reference(
+            t_biometric_reference_samples
+        )
+
+        # Reusing the probe features
+        t_scores = self.vanilla_biometrics_pipeline.biometric_algorithm.score_samples(
+            probe_features,
+            t_biometric_references,
+            allow_scoring_with_all_biometric_references=allow_scoring_with_all_biometric_references,
+        )
+
+        t_normed_scores = self.vanilla_biometrics_pipeline.biometric_algorithm.compute_norm_scores(
+            t_scores, probe_scores, allow_scoring_with_all_biometric_references,
+        )
+
+        return t_normed_scores, t_biometric_references
+
+    def compute_ztnorm_scores(self,
+            z_probe_features,
+            t_biometric_references,
+            z_normed_scores,
+            t_normed_scores,
+            allow_scoring_with_all_biometric_references=False
+            ):
+
+
+        # Reusing the zprobe_features and t_biometric_references
+        zt_scores = self.vanilla_biometrics_pipeline.biometric_algorithm.score_samples(
+            z_probe_features,
+            t_biometric_references,
+            allow_scoring_with_all_biometric_references=allow_scoring_with_all_biometric_references,
+        )
+
+        # Z Normalizing the T-normed scores
+        z_normed_t_normed = self.vanilla_biometrics_pipeline.biometric_algorithm.compute_norm_scores(
+            zt_scores, t_normed_scores, allow_scoring_with_all_biometric_references,
+        )
+
+        # (Z Normalizing the T-normed scores) the Z normed scores
+        zt_normed_scores = self.vanilla_biometrics_pipeline.biometric_algorithm.compute_norm_scores(
+            z_normed_t_normed, z_normed_scores, allow_scoring_with_all_biometric_references,
+        )
+
+
+        return zt_normed_scores
diff --git a/bob/bio/base/pipelines/vanilla_biometrics/wrappers.py b/bob/bio/base/pipelines/vanilla_biometrics/wrappers.py
index 3ab3d83f..00f5a1f2 100644
--- a/bob/bio/base/pipelines/vanilla_biometrics/wrappers.py
+++ b/bob/bio/base/pipelines/vanilla_biometrics/wrappers.py
@@ -1,4 +1,4 @@
-from bob.pipelines import DelayedSample, SampleSet
+from bob.pipelines import DelayedSample, SampleSet, Sample
 import bob.io.base
 import os
 import dask
@@ -7,6 +7,7 @@ from .score_writers import FourColumnsScoreWriter
 from .abstract_classes import BioAlgorithm
 import pickle
 import bob.pipelines as mario
+import numpy as np
 
 
 class BioAlgorithmCheckpointWrapper(BioAlgorithm):
@@ -35,13 +36,7 @@ class BioAlgorithmCheckpointWrapper(BioAlgorithm):
 
     """
 
-    def __init__(
-        self,
-        biometric_algorithm,
-        base_dir,
-        force=False,
-        **kwargs
-    ):
+    def __init__(self, biometric_algorithm, base_dir, force=False, **kwargs):
         super().__init__(**kwargs)
 
         self.biometric_reference_dir = os.path.join(base_dir, "biometric_references")
@@ -116,11 +111,7 @@ class BioAlgorithmCheckpointWrapper(BioAlgorithm):
             )
             self.write_scores(scored_sample_set.samples, path)
             scored_sample_set = SampleSet(
-                [
-                    DelayedSample(
-                        functools.partial(_load, path), parent=sampleset
-                    )
-                ],
+                [DelayedSample(functools.partial(_load, path), parent=sampleset)],
                 parent=sampleset,
             )
         else:
@@ -199,8 +190,82 @@ def dask_vanilla_biometrics(vanila_biometrics_pipeline, npartitions=None):
 
     def _write_scores(scores):
         return scores.map_partitions(vanila_biometrics_pipeline.write_scores_on_dask)
-    vanila_biometrics_pipeline.write_scores_on_dask = vanila_biometrics_pipeline.write_scores
-    vanila_biometrics_pipeline.write_scores = _write_scores
 
+    vanila_biometrics_pipeline.write_scores_on_dask = (
+        vanila_biometrics_pipeline.write_scores
+    )
+    vanila_biometrics_pipeline.write_scores = _write_scores
 
     return vanila_biometrics_pipeline
+
+
+class BioAlgorithmZTNormWrapper(BioAlgorithm):
+    """
+    Wraps an algorithm with Z-Norm scores
+    """
+
+    def __init__(self, biometric_algorithm, **kwargs):
+
+        self.biometric_algorithm = biometric_algorithm
+        super().__init__(**kwargs)
+
+    def enroll(self, enroll_features):
+        return self.biometric_algorithm.enroll(enroll_features)
+
+    def score(self, biometric_reference, data):
+        return self.biometric_algorithm.score(biometric_reference, data)
+
+    def score_multiple_biometric_references(self, biometric_references, data):
+        return self.biometric_algorithm.score_multiple_biometric_references(
+            biometric_references, data
+        )
+
+    def compute_norm_scores(
+        self,
+        base_norm_scores,
+        probe_scores,
+        allow_scoring_with_all_biometric_references=False,
+    ):
+        """
+        Base normalization function
+        """
+
+        def _norm(score, mu, std):
+            return (score - mu) / std
+
+        score_floats = np.array([s.data for sset in base_norm_scores for s in sset])
+        mu = np.mean(score_floats)
+        std = np.std(score_floats)
+
+        # Normalizing
+        normed_score_samples = []
+        for probe in probe_scores:
+            sampleset = SampleSet([], parent=probe)
+            for biometric_reference_score in probe:
+                score = _norm(biometric_reference_score.data, mu, std)
+                new_sample = Sample(score, parent=biometric_reference_score)
+                sampleset.samples.append(new_sample)
+            normed_score_samples.append(sampleset)
+
+        return normed_score_samples
+
+
+    def compute_ztnorm_scores(
+        self,
+        z_probe_features,
+        t_biometrics_references,
+        z_scores,
+        t_scores,
+        probe_scores,
+        allow_scoring_with_all_biometric_references=False
+    ):
+
+        # TxZ scores
+        txz_scores_sset = self.biometric_algorithm.score_samples(
+            z_probe_features,
+            t_biometrics_references,
+            allow_scoring_with_all_biometric_references,
+        )
+   
+
+        pass
diff --git a/bob/bio/base/test/test_transformers.py b/bob/bio/base/test/test_transformers.py
index 0bd43dca..730c679a 100644
--- a/bob/bio/base/test/test_transformers.py
+++ b/bob/bio/base/test/test_transformers.py
@@ -33,7 +33,7 @@ class FakePreprocesor(Preprocessor):
 
 class FakeExtractor(Extractor):
     def __call__(self, data):
-        return data.flatten()
+        return data.flatten()[0:10] # Selecting the first 10 features
 
 
 class FakeExtractorFittable(Extractor):
diff --git a/bob/bio/base/test/test_vanilla_biometrics.py b/bob/bio/base/test/test_vanilla_biometrics.py
index b8751e88..a4a6d889 100644
--- a/bob/bio/base/test/test_vanilla_biometrics.py
+++ b/bob/bio/base/test/test_vanilla_biometrics.py
@@ -107,12 +107,20 @@ class DummyDatabase:
     def zprobes(self):
         zprobes = []
 
-        zprobes = self._create_random_sample_set(n_sample_set=10, n_samples=1, seed=13)
+        zprobes = self._create_random_sample_set(n_sample_set=10, n_samples=1, seed=14)
         for p in zprobes:
+            p.subject = "z-" + str(p.subject)
             p.references = [str(r) for r in list(range(self.n_references))]
 
         return zprobes
 
+
+    def treferences(self):
+        t_sset = self._create_random_sample_set(self.n_references, self.dim, seed=15)
+        for t in t_sset:
+            t.subject = "t_" + str(t.subject)
+        return t_sset
+
     @property
     def allow_scoring_with_all_biometric_references(self):
         return True
diff --git a/bob/bio/base/test/test_vanilla_biometrics_score_norm.py b/bob/bio/base/test/test_vanilla_biometrics_score_norm.py
new file mode 100644
index 00000000..eab3ed4a
--- /dev/null
+++ b/bob/bio/base/test/test_vanilla_biometrics_score_norm.py
@@ -0,0 +1,75 @@
+#!/usr/bin/env python
+# vim: set fileencoding=utf-8 :
+# @author: Tiago de Freitas Pereira <tiago.pereira@idiap.ch>
+
+from bob.pipelines import Sample, SampleSet, DelayedSample
+import os
+import numpy as np
+import tempfile
+from sklearn.pipeline import make_pipeline
+from bob.bio.base.wrappers import wrap_bob_legacy
+
+from bob.bio.base.test.test_transformers import (
+    FakePreprocesor,
+    FakeExtractor,
+    FakeAlgorithm,
+)
+from bob.bio.base.test.test_vanilla_biometrics import DummyDatabase, _make_transformer
+
+
+from bob.bio.base.pipelines.vanilla_biometrics import (
+    Distance,
+    VanillaBiometricsPipeline,
+    ZTNormVanillaBiometricsPipeline,
+    BioAlgorithmCheckpointWrapper,
+    dask_vanilla_biometrics,
+    BioAlgorithmLegacy,
+)
+
+import bob.pipelines as mario
+import uuid
+import shutil
+import itertools
+
+
+def test_znorm_on_memory():
+
+    with tempfile.TemporaryDirectory() as dir_name:
+
+        def run_pipeline(with_dask):
+
+            database = DummyDatabase(one_d=False)
+
+            transformer = _make_transformer(dir_name)
+
+            biometric_algorithm = Distance()
+
+            vanilla_biometrics_pipeline = ZTNormVanillaBiometricsPipeline(
+                VanillaBiometricsPipeline(transformer, biometric_algorithm)
+            )
+
+            if with_dask:
+                vanilla_biometrics_pipeline = dask_vanilla_biometrics(
+                    vanilla_biometrics_pipeline, npartitions=2
+                )
+
+            scores = vanilla_biometrics_pipeline(
+                database.background_model_samples(),
+                database.references(),
+                database.probes(),
+                database.zprobes(),
+                database.treferences(),
+                allow_scoring_with_all_biometric_references=database.allow_scoring_with_all_biometric_references,
+            )
+
+            if with_dask:
+                scores = scores.compute(scheduler="single-threaded")
+
+            assert len(scores) == 10
+
+        run_pipeline(False)
+        #run_pipeline(False)  # Testing checkpoint
+        # shutil.rmtree(dir_name)  # Deleting the cache so it runs again from scratch
+        # os.makedirs(dir_name, exist_ok=True)
+        # run_pipeline(True)
+        # run_pipeline(True)  # Testing checkpoint
-- 
GitLab