From d9f5534b840e7b87cdccbe68eadb3d8209c9ab07 Mon Sep 17 00:00:00 2001 From: Tiago Freitas Pereira <tiagofrepereira@gmail.com> Date: Fri, 22 May 2020 22:00:52 +0200 Subject: [PATCH] Polishing ZT-Norm --- .../pipelines/vanilla_biometrics/zt_norm.py | 62 ++++--------------- bob/bio/base/script/vanilla_biometrics.py | 15 ++--- .../test_vanilla_biometrics_score_norm.py | 20 ++---- 3 files changed, 27 insertions(+), 70 deletions(-) diff --git a/bob/bio/base/pipelines/vanilla_biometrics/zt_norm.py b/bob/bio/base/pipelines/vanilla_biometrics/zt_norm.py index 0de212ec..0b58fb88 100644 --- a/bob/bio/base/pipelines/vanilla_biometrics/zt_norm.py +++ b/bob/bio/base/pipelines/vanilla_biometrics/zt_norm.py @@ -10,7 +10,7 @@ from bob.pipelines import DelayedSample, Sample, SampleSet import numpy as np import dask import functools -import pickle +import cloudpickle import os from .score_writers import FourColumnsScoreWriter import logging @@ -78,6 +78,7 @@ class ZTNormPipeline(object): allow_scoring_with_all_biometric_references=False, ): + self.transformer = self.train_background_model(background_model_samples) # Create biometric samples @@ -128,11 +129,6 @@ class ZTNormPipeline(object): allow_scoring_with_all_biometric_references, ) - - # TODO: Do the score write - #if self.vanilla_biometrics_pipeline.score_writer is not None: - # return self.write_scores(scores) - return raw_scores, z_normed_scores, t_normed_scores, zt_normed_scores def train_background_model(self, background_model_samples): @@ -166,8 +162,6 @@ class ZTNormPipeline(object): allow_scoring_with_all_biometric_references=False, ): - # zprobe_samples = self._inject_references(zprobe_samples, biometric_references) - z_scores, z_probe_features = self.compute_scores( zprobe_samples, biometric_references ) @@ -214,10 +208,6 @@ class ZTNormPipeline(object): allow_scoring_with_all_biometric_references=False, ): - # z_probe_features = self._inject_references( - # z_probe_features, t_biometric_references - # ) - # Reusing the zprobe_features and t_biometric_references zt_scores = self.vanilla_biometrics_pipeline.biometric_algorithm.score_samples( z_probe_features, @@ -258,20 +248,6 @@ class ZTNorm(object): # Axi1 1=TNORM return (score - mu) / std - """ - if axis == 1: - return ( - score - - np.tile(mu.reshape(N, 1), (1, score.shape[1])) - ) / np.tile(std.reshape(N, 1), (1, score.shape[1])) - else: - return ( - score - - np.tile(mu.reshape(1, N), (score.shape[0], 1)) - ) / np.tile(std.reshape(1, N), (score.shape[0], 1)) - """ - - def _compute_std(self, mu, norm_base_scores, axis=1): # Reference: https://gitlab.idiap.ch/bob/bob.learn.em/-/blob/master/bob/learn/em/test/test_ztnorm.py # Axis 0=ZNORM @@ -317,16 +293,10 @@ class ZTNorm(object): axis=0 computes CORRECTLY the statistics for ZNorm axis=1 computes CORRECTLY the statistics for TNorm """ - # Dumping all scores - if isinstance(sampleset_for_norm[0][0], DelayedSample): - score_floats = np.array( - [f.data for sset in sampleset_for_norm for s in sset for f in s.data] - ) - else: - score_floats = np.array( - [s.data for sset in sampleset_for_norm for s in sset] - ) + score_floats = np.array( + [s.data for sset in sampleset_for_norm for s in sset] + ) # Reshaping in PROBE vs BIOMETRIC_REFERENCES n_probes = len(sampleset_for_norm) @@ -334,23 +304,18 @@ class ZTNorm(object): score_floats = score_floats.reshape((n_probes, n_references)) # AXIS ON THE MODELS - big_mu = np.mean(score_floats, axis=axis) - #big_std = np.std(score_floats, axis=axis) + big_mu = np.mean(score_floats, axis=axis) big_std = self._compute_std(big_mu, score_floats, axis=axis) # Creating statistics structure with subject id as the key stats = {} if axis == 0: - # TODO: NEED TO SOLVE THIS FETCHING. - # IT SHOULD BE TRANSPARENT - if isinstance(sampleset_for_norm[0][0], DelayedSample): - sset = sampleset_for_norm[0].samples[0].data - else: - sset = sampleset_for_norm[0] - - for mu, std, s in zip(big_mu, big_std, sset): - stats[s.subject] = {"big_mu": mu, "big_std": std} + # Z-Norm is one statistic per biometric references + biometric_reference_subjects = [br.subject for br in sampleset_for_norm[0]] + for mu, std, s in zip(big_mu, big_std, biometric_reference_subjects): + stats[s] = {"big_mu": mu, "big_std": std} else: + # T-Norm is one statistic per probe for mu, std, sset in zip(big_mu, big_std, sampleset_for_norm): stats[sset.subject] = {"big_mu": mu, "big_std": std} @@ -359,7 +324,6 @@ class ZTNorm(object): def _znorm_samplesets(self, probe_scores, stats): # Normalizing # TODO: THIS TENDS TO BE EXTREMLY SLOW - z_normed_score_samples = [] for probe_score in probe_scores: z_normed_score_samples.append(self._apply_znorm(probe_score, stats)) @@ -497,10 +461,10 @@ class ZTNormCheckpointWrapper(object): def _write_scores(self, samples, path): os.makedirs(os.path.dirname(path), exist_ok=True) - open(path, "wb").write(pickle.dumps(samples)) + open(path, "wb").write(cloudpickle.dumps(samples)) def _load(self, path): - return pickle.loads(open(path, "rb").read()) + return cloudpickle.loads(open(path, "rb").read()) def _apply_znorm(self, probe_score, stats): diff --git a/bob/bio/base/script/vanilla_biometrics.py b/bob/bio/base/script/vanilla_biometrics.py index 89516f79..f54d3f64 100644 --- a/bob/bio/base/script/vanilla_biometrics.py +++ b/bob/bio/base/script/vanilla_biometrics.py @@ -157,7 +157,7 @@ def vanilla_biometrics( """ def _compute_scores(result, dask_client): - if isinstance(result, Delayed): + if isinstance(result, Delayed) or isinstance(result, dask.bag.Bag): if dask_client is not None: result = result.compute(scheduler=dask_client) else: @@ -254,11 +254,12 @@ def vanilla_biometrics( def _build_filename(score_file_name, suffix): return os.path.join(score_file_name, suffix) - # Running RAW_SCORES + # Running RAW_SCORES raw_scores = _post_process_scores( pipeline, raw_scores, _build_filename(score_file_name, "raw_scores") ) - _compute_scores(raw_scores, dask_client) + + _ = _compute_scores(raw_scores, dask_client) # Z-SCORES z_normed_scores = _post_process_scores( @@ -266,7 +267,7 @@ def vanilla_biometrics( z_normed_scores, _build_filename(score_file_name, "z_normed_scores"), ) - _compute_scores(z_normed_scores, dask_client) + _ = _compute_scores(z_normed_scores, dask_client) # T-SCORES t_normed_scores = _post_process_scores( @@ -274,7 +275,7 @@ def vanilla_biometrics( t_normed_scores, _build_filename(score_file_name, "t_normed_scores"), ) - _compute_scores(t_normed_scores, dask_client) + _ = _compute_scores(t_normed_scores, dask_client) # ZT-SCORES zt_normed_scores = _post_process_scores( @@ -282,7 +283,7 @@ def vanilla_biometrics( zt_normed_scores, _build_filename(score_file_name, "zt_normed_scores"), ) - _compute_scores(zt_normed_scores, dask_client) + _ = _compute_scores(zt_normed_scores, dask_client) else: @@ -296,7 +297,7 @@ def vanilla_biometrics( post_processed_scores = _post_process_scores( pipeline, result, score_file_name ) - _compute_scores(post_processed_scores, dask_client) + _ = _compute_scores(post_processed_scores, dask_client) if dask_client is not None: dask_client.shutdown() diff --git a/bob/bio/base/test/test_vanilla_biometrics_score_norm.py b/bob/bio/base/test/test_vanilla_biometrics_score_norm.py index 99b57dba..eb9f1593 100644 --- a/bob/bio/base/test/test_vanilla_biometrics_score_norm.py +++ b/bob/bio/base/test/test_vanilla_biometrics_score_norm.py @@ -41,9 +41,9 @@ import copy def zt_norm_stubs(references, probes, t_references, z_probes): def _norm(scores, norm_base_scores, axis=1): mu = np.mean(norm_base_scores, axis=axis) - - #old = True - #if old: + + # old = True + # if old: # std = np.std(norm_base_scores, axis=axis) # if axis == 1: # return ((scores.T - mu) / std).T @@ -96,7 +96,6 @@ def zt_norm_stubs(references, probes, t_references, z_probes): ) ) / np.tile(std.reshape(1, norm_base_scores.shape[1]), (scores.shape[0], 1)) - n_reference = references.shape[0] n_probes = probes.shape[0] n_t_references = t_references.shape[0] @@ -159,14 +158,7 @@ def test_norm_mechanics(): def _dump_scores_from_samples(scores, shape): # We have to transpose because the tests are BIOMETRIC_REFERENCES vs PROBES # and bob.bio.base is PROBES vs BIOMETRIC_REFERENCES - if isinstance(scores[0][0], DelayedSample): - return ( - np.array([f.data for sset in scores for s in sset for f in s.data]) - .reshape(shape) - .T - ) - else: - return np.array([s.data for sset in scores for s in sset]).reshape(shape).T + return np.array([s.data for sset in scores for s in sset]).reshape(shape).T with tempfile.TemporaryDirectory() as dir_name: @@ -394,8 +386,8 @@ def test_norm_mechanics(): # With checkpoing run(False, with_checkpoint=True) run(False, with_checkpoint=True) - # shutil.rmtree(dir_name) # Deleting the cache so it runs again from scratch - # os.makedirs(dir_name, exist_ok=True) + shutil.rmtree(dir_name) # Deleting the cache so it runs again from scratch + os.makedirs(dir_name, exist_ok=True) # With dask run(True) # On memory -- GitLab