From d9f5534b840e7b87cdccbe68eadb3d8209c9ab07 Mon Sep 17 00:00:00 2001
From: Tiago Freitas Pereira <tiagofrepereira@gmail.com>
Date: Fri, 22 May 2020 22:00:52 +0200
Subject: [PATCH] Polishing ZT-Norm

---
 .../pipelines/vanilla_biometrics/zt_norm.py   | 62 ++++---------------
 bob/bio/base/script/vanilla_biometrics.py     | 15 ++---
 .../test_vanilla_biometrics_score_norm.py     | 20 ++----
 3 files changed, 27 insertions(+), 70 deletions(-)

diff --git a/bob/bio/base/pipelines/vanilla_biometrics/zt_norm.py b/bob/bio/base/pipelines/vanilla_biometrics/zt_norm.py
index 0de212ec..0b58fb88 100644
--- a/bob/bio/base/pipelines/vanilla_biometrics/zt_norm.py
+++ b/bob/bio/base/pipelines/vanilla_biometrics/zt_norm.py
@@ -10,7 +10,7 @@ from bob.pipelines import DelayedSample, Sample, SampleSet
 import numpy as np
 import dask
 import functools
-import pickle
+import cloudpickle
 import os
 from .score_writers import FourColumnsScoreWriter
 import logging
@@ -78,6 +78,7 @@ class ZTNormPipeline(object):
         allow_scoring_with_all_biometric_references=False,
     ):
 
+
         self.transformer = self.train_background_model(background_model_samples)
 
         # Create biometric samples
@@ -128,11 +129,6 @@ class ZTNormPipeline(object):
             allow_scoring_with_all_biometric_references,
         )
         
-
-        # TODO: Do the score write
-        #if self.vanilla_biometrics_pipeline.score_writer is not None:
-        #    return self.write_scores(scores)
-
         return raw_scores, z_normed_scores, t_normed_scores, zt_normed_scores
 
     def train_background_model(self, background_model_samples):
@@ -166,8 +162,6 @@ class ZTNormPipeline(object):
         allow_scoring_with_all_biometric_references=False,
     ):
 
-        # zprobe_samples = self._inject_references(zprobe_samples, biometric_references)
-
         z_scores, z_probe_features = self.compute_scores(
             zprobe_samples, biometric_references
         )
@@ -214,10 +208,6 @@ class ZTNormPipeline(object):
         allow_scoring_with_all_biometric_references=False,
     ):
 
-        # z_probe_features = self._inject_references(
-        #    z_probe_features, t_biometric_references
-        # )
-
         # Reusing the zprobe_features and t_biometric_references
         zt_scores = self.vanilla_biometrics_pipeline.biometric_algorithm.score_samples(
             z_probe_features,
@@ -258,20 +248,6 @@ class ZTNorm(object):
         # Axi1 1=TNORM
         return (score - mu) / std
 
-        """
-        if axis == 1:
-            return (
-                score
-                - np.tile(mu.reshape(N, 1), (1, score.shape[1]))
-            ) / np.tile(std.reshape(N, 1), (1, score.shape[1]))
-        else:
-            return (
-                score
-                - np.tile(mu.reshape(1, N), (score.shape[0], 1))
-            ) / np.tile(std.reshape(1, N), (score.shape[0], 1))
-        """
-        
-
     def _compute_std(self, mu, norm_base_scores, axis=1):
         # Reference: https://gitlab.idiap.ch/bob/bob.learn.em/-/blob/master/bob/learn/em/test/test_ztnorm.py
         # Axis 0=ZNORM
@@ -317,16 +293,10 @@ class ZTNorm(object):
         axis=0 computes CORRECTLY the statistics for ZNorm
         axis=1 computes CORRECTLY the statistics for TNorm
         """
-
         # Dumping all scores
-        if isinstance(sampleset_for_norm[0][0], DelayedSample):
-            score_floats = np.array(
-                [f.data for sset in sampleset_for_norm for s in sset for f in s.data]
-            )
-        else:
-            score_floats = np.array(
-                [s.data for sset in sampleset_for_norm for s in sset]
-            )
+        score_floats = np.array(
+              [s.data for sset in sampleset_for_norm for s in sset]
+        )
 
         # Reshaping in PROBE vs BIOMETRIC_REFERENCES
         n_probes = len(sampleset_for_norm)
@@ -334,23 +304,18 @@ class ZTNorm(object):
         score_floats = score_floats.reshape((n_probes, n_references))
 
         # AXIS ON THE MODELS
-        big_mu = np.mean(score_floats, axis=axis)
-        #big_std = np.std(score_floats, axis=axis)
+        big_mu = np.mean(score_floats, axis=axis)        
         big_std = self._compute_std(big_mu, score_floats, axis=axis)
 
         # Creating statistics structure with subject id as the key
         stats = {}
         if axis == 0:
-            # TODO: NEED TO SOLVE THIS FETCHING.
-            # IT SHOULD BE TRANSPARENT
-            if isinstance(sampleset_for_norm[0][0], DelayedSample):
-                sset = sampleset_for_norm[0].samples[0].data
-            else:
-                sset = sampleset_for_norm[0]
-
-            for mu, std, s in zip(big_mu, big_std, sset):
-                stats[s.subject] = {"big_mu": mu, "big_std": std}
+            # Z-Norm is one statistic per biometric references
+            biometric_reference_subjects = [br.subject for br in sampleset_for_norm[0]]
+            for mu, std, s in zip(big_mu, big_std, biometric_reference_subjects):
+                stats[s] = {"big_mu": mu, "big_std": std}
         else:
+            # T-Norm is one statistic per probe
             for mu, std, sset in zip(big_mu, big_std, sampleset_for_norm):
                 stats[sset.subject] = {"big_mu": mu, "big_std": std}
 
@@ -359,7 +324,6 @@ class ZTNorm(object):
     def _znorm_samplesets(self, probe_scores, stats):
         # Normalizing
         # TODO: THIS TENDS TO BE EXTREMLY SLOW
-
         z_normed_score_samples = []
         for probe_score in probe_scores:
             z_normed_score_samples.append(self._apply_znorm(probe_score, stats))
@@ -497,10 +461,10 @@ class ZTNormCheckpointWrapper(object):
 
     def _write_scores(self, samples, path):
         os.makedirs(os.path.dirname(path), exist_ok=True)
-        open(path, "wb").write(pickle.dumps(samples))
+        open(path, "wb").write(cloudpickle.dumps(samples))
 
     def _load(self, path):
-        return pickle.loads(open(path, "rb").read())
+        return cloudpickle.loads(open(path, "rb").read())
 
     def _apply_znorm(self, probe_score, stats):
 
diff --git a/bob/bio/base/script/vanilla_biometrics.py b/bob/bio/base/script/vanilla_biometrics.py
index 89516f79..f54d3f64 100644
--- a/bob/bio/base/script/vanilla_biometrics.py
+++ b/bob/bio/base/script/vanilla_biometrics.py
@@ -157,7 +157,7 @@ def vanilla_biometrics(
     """
 
     def _compute_scores(result, dask_client):
-        if isinstance(result, Delayed):
+        if isinstance(result, Delayed) or isinstance(result, dask.bag.Bag):
             if dask_client is not None:
                 result = result.compute(scheduler=dask_client)
             else:
@@ -254,11 +254,12 @@ def vanilla_biometrics(
             def _build_filename(score_file_name, suffix):
                 return os.path.join(score_file_name, suffix)
 
-            # Running RAW_SCORES
+            # Running RAW_SCORES            
             raw_scores = _post_process_scores(
                 pipeline, raw_scores, _build_filename(score_file_name, "raw_scores")
             )
-            _compute_scores(raw_scores, dask_client)
+
+            _ = _compute_scores(raw_scores, dask_client)
 
             # Z-SCORES
             z_normed_scores = _post_process_scores(
@@ -266,7 +267,7 @@ def vanilla_biometrics(
                 z_normed_scores,
                 _build_filename(score_file_name, "z_normed_scores"),
             )
-            _compute_scores(z_normed_scores, dask_client)
+            _ = _compute_scores(z_normed_scores, dask_client)
 
             # T-SCORES
             t_normed_scores = _post_process_scores(
@@ -274,7 +275,7 @@ def vanilla_biometrics(
                 t_normed_scores,
                 _build_filename(score_file_name, "t_normed_scores"),
             )
-            _compute_scores(t_normed_scores, dask_client)
+            _ = _compute_scores(t_normed_scores, dask_client)
 
             # ZT-SCORES
             zt_normed_scores = _post_process_scores(
@@ -282,7 +283,7 @@ def vanilla_biometrics(
                 zt_normed_scores,
                 _build_filename(score_file_name, "zt_normed_scores"),
             )
-            _compute_scores(zt_normed_scores, dask_client)
+            _ = _compute_scores(zt_normed_scores, dask_client)
 
         else:
 
@@ -296,7 +297,7 @@ def vanilla_biometrics(
             post_processed_scores = _post_process_scores(
                 pipeline, result, score_file_name
             )
-            _compute_scores(post_processed_scores, dask_client)
+            _ = _compute_scores(post_processed_scores, dask_client)
 
     if dask_client is not None:
         dask_client.shutdown()
diff --git a/bob/bio/base/test/test_vanilla_biometrics_score_norm.py b/bob/bio/base/test/test_vanilla_biometrics_score_norm.py
index 99b57dba..eb9f1593 100644
--- a/bob/bio/base/test/test_vanilla_biometrics_score_norm.py
+++ b/bob/bio/base/test/test_vanilla_biometrics_score_norm.py
@@ -41,9 +41,9 @@ import copy
 def zt_norm_stubs(references, probes, t_references, z_probes):
     def _norm(scores, norm_base_scores, axis=1):
         mu = np.mean(norm_base_scores, axis=axis)
-        
-        #old = True
-        #if old:
+
+        # old = True
+        # if old:
         #    std = np.std(norm_base_scores, axis=axis)
         #    if axis == 1:
         #        return ((scores.T - mu) / std).T
@@ -96,7 +96,6 @@ def zt_norm_stubs(references, probes, t_references, z_probes):
                 )
             ) / np.tile(std.reshape(1, norm_base_scores.shape[1]), (scores.shape[0], 1))
 
-
     n_reference = references.shape[0]
     n_probes = probes.shape[0]
     n_t_references = t_references.shape[0]
@@ -159,14 +158,7 @@ def test_norm_mechanics():
     def _dump_scores_from_samples(scores, shape):
         # We have to transpose because the tests are BIOMETRIC_REFERENCES vs PROBES
         # and bob.bio.base is PROBES vs BIOMETRIC_REFERENCES
-        if isinstance(scores[0][0], DelayedSample):
-            return (
-                np.array([f.data for sset in scores for s in sset for f in s.data])
-                .reshape(shape)
-                .T
-            )
-        else:
-            return np.array([s.data for sset in scores for s in sset]).reshape(shape).T
+        return np.array([s.data for sset in scores for s in sset]).reshape(shape).T
 
     with tempfile.TemporaryDirectory() as dir_name:
 
@@ -394,8 +386,8 @@ def test_norm_mechanics():
     # With checkpoing
     run(False, with_checkpoint=True)
     run(False, with_checkpoint=True)
-    # shutil.rmtree(dir_name)  # Deleting the cache so it runs again from scratch
-    # os.makedirs(dir_name, exist_ok=True)
+    shutil.rmtree(dir_name)  # Deleting the cache so it runs again from scratch
+    os.makedirs(dir_name, exist_ok=True)
 
     # With dask
     run(True)  # On memory
-- 
GitLab