From f1473028d3d24ed47c96a01cc4dccc52ac888a78 Mon Sep 17 00:00:00 2001 From: Tiago Freitas Pereira <tiagofrepereira@gmail.com> Date: Wed, 6 May 2020 19:00:43 +0200 Subject: [PATCH] Implemented ZT-Normalization. More tests should be implemented. --- .../pipelines/vanilla_biometrics/__init__.py | 2 +- .../vanilla_biometrics/abstract_classes.py | 1 + .../pipelines/vanilla_biometrics/pipelines.py | 158 ++++++++++++++++-- .../pipelines/vanilla_biometrics/wrappers.py | 95 +++++++++-- bob/bio/base/test/test_transformers.py | 2 +- bob/bio/base/test/test_vanilla_biometrics.py | 10 +- .../test_vanilla_biometrics_score_norm.py | 75 +++++++++ 7 files changed, 313 insertions(+), 30 deletions(-) create mode 100644 bob/bio/base/test/test_vanilla_biometrics_score_norm.py diff --git a/bob/bio/base/pipelines/vanilla_biometrics/__init__.py b/bob/bio/base/pipelines/vanilla_biometrics/__init__.py index 66dfd01e..dcd7b9e9 100644 --- a/bob/bio/base/pipelines/vanilla_biometrics/__init__.py +++ b/bob/bio/base/pipelines/vanilla_biometrics/__init__.py @@ -1,4 +1,4 @@ -from .pipelines import VanillaBiometricsPipeline, ZNormVanillaBiometricsPipeline +from .pipelines import VanillaBiometricsPipeline, ZTNormVanillaBiometricsPipeline from .biometric_algorithms import Distance from .score_writers import FourColumnsScoreWriter, CSVScoreWriter diff --git a/bob/bio/base/pipelines/vanilla_biometrics/abstract_classes.py b/bob/bio/base/pipelines/vanilla_biometrics/abstract_classes.py index 94dabb87..56c64bca 100644 --- a/bob/bio/base/pipelines/vanilla_biometrics/abstract_classes.py +++ b/bob/bio/base/pipelines/vanilla_biometrics/abstract_classes.py @@ -121,6 +121,7 @@ class BioAlgorithm(metaclass=ABCMeta): allow_scoring_with_all_biometric_references=allow_scoring_with_all_biometric_references, ) ) + return retval def _score_sample_set( diff --git a/bob/bio/base/pipelines/vanilla_biometrics/pipelines.py b/bob/bio/base/pipelines/vanilla_biometrics/pipelines.py index 50326721..e8f430a0 100644 --- a/bob/bio/base/pipelines/vanilla_biometrics/pipelines.py +++ b/bob/bio/base/pipelines/vanilla_biometrics/pipelines.py @@ -11,6 +11,7 @@ for bob.bio experiments import logging import numpy from .score_writers import FourColumnsScoreWriter +from .wrappers import BioAlgorithmZTNormWrapper logger = logging.getLogger(__name__) @@ -107,7 +108,7 @@ class VanillaBiometricsPipeline(object): ) # Scores all probes - scores = self.compute_scores( + scores, _ = self.compute_scores( probe_samples, biometric_references, allow_scoring_with_all_biometric_references, @@ -160,15 +161,55 @@ class VanillaBiometricsPipeline(object): ) # scores is a list of Samples - return scores + return scores, probe_features def write_scores(self, scores): return self.score_writer.write(scores) -class ZNormVanillaBiometricsPipeline(VanillaBiometricsPipeline): - def __init__(self, vanilla_biometrics_pipeline): +class ZTNormVanillaBiometricsPipeline(object): + """ + Apply Z, T or ZT Score normalization on top of VanillaBiometric Pipeline + + Reference bibliography from: A Generative Model for Score Normalization in Speaker Recognition + https://arxiv.org/pdf/1709.09868.pdf + + + Example + ------- + >>> transformer = make_pipeline([]) + >>> biometric_algorithm = Distance() + >>> vanilla_biometrics_pipeline = VanillaBiometricsPipeline(transformer, biometric_algorithm) + >>> zt_pipeline = ZTNormVanillaBiometricsPipeline(vanilla_biometrics_pipeline) + >>> zt_pipeline(...) + + Parameters + ---------- + + vanilla_biometrics_pipeline: :any:`VanillaBiometricsPipeline` + An instance :any:`VanillaBiometricsPipeline` to the wrapped with score normalization + + z_norm: bool + If True, applies ZScore normalization on top of raw scores. + + t_norm: bool + If True, applies TScore normalization on top of raw scores. + If both, z_norm and t_norm are true, it applies score normalization + + """ + + + def __init__(self, vanilla_biometrics_pipeline, z_norm=True, t_norm=True): self.vanilla_biometrics_pipeline = vanilla_biometrics_pipeline + # Wrapping with ZTNorm + self.vanilla_biometrics_pipeline.biometric_algorithm = BioAlgorithmZTNormWrapper( + self.vanilla_biometrics_pipeline.biometric_algorithm + ) + self.z_norm = z_norm + self.t_norm = t_norm + + if not z_norm and not t_norm: + raise ValueError("Both z_norm and t_norm are False. No normalization will be applied") def __call__( self, @@ -176,6 +217,7 @@ class ZNormVanillaBiometricsPipeline(VanillaBiometricsPipeline): biometric_reference_samples, probe_samples, zprobe_samples, + t_biometric_reference_samples, allow_scoring_with_all_biometric_references=False, ): @@ -186,17 +228,45 @@ class ZNormVanillaBiometricsPipeline(VanillaBiometricsPipeline): biometric_reference_samples ) - raw_scores = self.vanilla_biometrics_pipeline( - background_model_samples, - biometric_reference_samples, + raw_scores, probe_features = self.compute_scores( probe_samples, + biometric_references, + allow_scoring_with_all_biometric_references, + ) + + # Z NORM + if self.z_norm: + z_normed_scores, z_probe_features = self.compute_znorm_scores( + zprobe_samples, + raw_scores, + biometric_references, + allow_scoring_with_all_biometric_references, + ) + if not self.t_norm: + return z_normed_scores + + # T NORM + t_normed_scores, t_biometric_references = self.compute_tnorm_scores( + t_biometric_reference_samples, + probe_features, + raw_scores, allow_scoring_with_all_biometric_references, ) + if not self.z_norm: + return t_normed_scores - return self.compute_znorm_scores( - zprobe_samples, raw_scores, biometric_references + + # ZT NORM + zt_normed_scores = self.compute_ztnorm_scores( + z_probe_features, + t_biometric_references, + z_normed_scores, + t_normed_scores, + allow_scoring_with_all_biometric_references, ) + return zt_normed_scores + def train_background_model(self, background_model_samples): return self.vanilla_biometrics_pipeline.train_background_model( background_model_samples @@ -220,10 +290,74 @@ class ZNormVanillaBiometricsPipeline(VanillaBiometricsPipeline): allow_scoring_with_all_biometric_references, ) - def compute_znorm_scores(self, zprobe_samples, probe_scores, biometric_references): + def compute_znorm_scores( + self, + zprobe_samples, + probe_scores, + biometric_references, + allow_scoring_with_all_biometric_references=False, + ): - z_scores = self.vanilla_biometrics_pipeline.compute_scores( + z_scores, z_probe_features = self.compute_scores( zprobe_samples, biometric_references ) - pass + z_normed_scores = self.vanilla_biometrics_pipeline.biometric_algorithm.compute_norm_scores( + z_scores, probe_scores, allow_scoring_with_all_biometric_references, + ) + + return z_normed_scores, z_probe_features + + def compute_tnorm_scores( + self, + t_biometric_reference_samples, + probe_features, + probe_scores, + allow_scoring_with_all_biometric_references=False, + ): + + t_biometric_references = self.create_biometric_reference( + t_biometric_reference_samples + ) + + # Reusing the probe features + t_scores = self.vanilla_biometrics_pipeline.biometric_algorithm.score_samples( + probe_features, + t_biometric_references, + allow_scoring_with_all_biometric_references=allow_scoring_with_all_biometric_references, + ) + + t_normed_scores = self.vanilla_biometrics_pipeline.biometric_algorithm.compute_norm_scores( + t_scores, probe_scores, allow_scoring_with_all_biometric_references, + ) + + return t_normed_scores, t_biometric_references + + def compute_ztnorm_scores(self, + z_probe_features, + t_biometric_references, + z_normed_scores, + t_normed_scores, + allow_scoring_with_all_biometric_references=False + ): + + + # Reusing the zprobe_features and t_biometric_references + zt_scores = self.vanilla_biometrics_pipeline.biometric_algorithm.score_samples( + z_probe_features, + t_biometric_references, + allow_scoring_with_all_biometric_references=allow_scoring_with_all_biometric_references, + ) + + # Z Normalizing the T-normed scores + z_normed_t_normed = self.vanilla_biometrics_pipeline.biometric_algorithm.compute_norm_scores( + zt_scores, t_normed_scores, allow_scoring_with_all_biometric_references, + ) + + # (Z Normalizing the T-normed scores) the Z normed scores + zt_normed_scores = self.vanilla_biometrics_pipeline.biometric_algorithm.compute_norm_scores( + z_normed_t_normed, z_normed_scores, allow_scoring_with_all_biometric_references, + ) + + + return zt_normed_scores diff --git a/bob/bio/base/pipelines/vanilla_biometrics/wrappers.py b/bob/bio/base/pipelines/vanilla_biometrics/wrappers.py index 3ab3d83f..00f5a1f2 100644 --- a/bob/bio/base/pipelines/vanilla_biometrics/wrappers.py +++ b/bob/bio/base/pipelines/vanilla_biometrics/wrappers.py @@ -1,4 +1,4 @@ -from bob.pipelines import DelayedSample, SampleSet +from bob.pipelines import DelayedSample, SampleSet, Sample import bob.io.base import os import dask @@ -7,6 +7,7 @@ from .score_writers import FourColumnsScoreWriter from .abstract_classes import BioAlgorithm import pickle import bob.pipelines as mario +import numpy as np class BioAlgorithmCheckpointWrapper(BioAlgorithm): @@ -35,13 +36,7 @@ class BioAlgorithmCheckpointWrapper(BioAlgorithm): """ - def __init__( - self, - biometric_algorithm, - base_dir, - force=False, - **kwargs - ): + def __init__(self, biometric_algorithm, base_dir, force=False, **kwargs): super().__init__(**kwargs) self.biometric_reference_dir = os.path.join(base_dir, "biometric_references") @@ -116,11 +111,7 @@ class BioAlgorithmCheckpointWrapper(BioAlgorithm): ) self.write_scores(scored_sample_set.samples, path) scored_sample_set = SampleSet( - [ - DelayedSample( - functools.partial(_load, path), parent=sampleset - ) - ], + [DelayedSample(functools.partial(_load, path), parent=sampleset)], parent=sampleset, ) else: @@ -199,8 +190,82 @@ def dask_vanilla_biometrics(vanila_biometrics_pipeline, npartitions=None): def _write_scores(scores): return scores.map_partitions(vanila_biometrics_pipeline.write_scores_on_dask) - vanila_biometrics_pipeline.write_scores_on_dask = vanila_biometrics_pipeline.write_scores - vanila_biometrics_pipeline.write_scores = _write_scores + vanila_biometrics_pipeline.write_scores_on_dask = ( + vanila_biometrics_pipeline.write_scores + ) + vanila_biometrics_pipeline.write_scores = _write_scores return vanila_biometrics_pipeline + + +class BioAlgorithmZTNormWrapper(BioAlgorithm): + """ + Wraps an algorithm with Z-Norm scores + """ + + def __init__(self, biometric_algorithm, **kwargs): + + self.biometric_algorithm = biometric_algorithm + super().__init__(**kwargs) + + def enroll(self, enroll_features): + return self.biometric_algorithm.enroll(enroll_features) + + def score(self, biometric_reference, data): + return self.biometric_algorithm.score(biometric_reference, data) + + def score_multiple_biometric_references(self, biometric_references, data): + return self.biometric_algorithm.score_multiple_biometric_references( + biometric_references, data + ) + + def compute_norm_scores( + self, + base_norm_scores, + probe_scores, + allow_scoring_with_all_biometric_references=False, + ): + """ + Base normalization function + """ + + def _norm(score, mu, std): + return (score - mu) / std + + score_floats = np.array([s.data for sset in base_norm_scores for s in sset]) + mu = np.mean(score_floats) + std = np.std(score_floats) + + # Normalizing + normed_score_samples = [] + for probe in probe_scores: + sampleset = SampleSet([], parent=probe) + for biometric_reference_score in probe: + score = _norm(biometric_reference_score.data, mu, std) + new_sample = Sample(score, parent=biometric_reference_score) + sampleset.samples.append(new_sample) + normed_score_samples.append(sampleset) + + return normed_score_samples + + + def compute_ztnorm_scores( + self, + z_probe_features, + t_biometrics_references, + z_scores, + t_scores, + probe_scores, + allow_scoring_with_all_biometric_references=False + ): + + # TxZ scores + txz_scores_sset = self.biometric_algorithm.score_samples( + z_probe_features, + t_biometrics_references, + allow_scoring_with_all_biometric_references, + ) + + + pass diff --git a/bob/bio/base/test/test_transformers.py b/bob/bio/base/test/test_transformers.py index 0bd43dca..730c679a 100644 --- a/bob/bio/base/test/test_transformers.py +++ b/bob/bio/base/test/test_transformers.py @@ -33,7 +33,7 @@ class FakePreprocesor(Preprocessor): class FakeExtractor(Extractor): def __call__(self, data): - return data.flatten() + return data.flatten()[0:10] # Selecting the first 10 features class FakeExtractorFittable(Extractor): diff --git a/bob/bio/base/test/test_vanilla_biometrics.py b/bob/bio/base/test/test_vanilla_biometrics.py index b8751e88..a4a6d889 100644 --- a/bob/bio/base/test/test_vanilla_biometrics.py +++ b/bob/bio/base/test/test_vanilla_biometrics.py @@ -107,12 +107,20 @@ class DummyDatabase: def zprobes(self): zprobes = [] - zprobes = self._create_random_sample_set(n_sample_set=10, n_samples=1, seed=13) + zprobes = self._create_random_sample_set(n_sample_set=10, n_samples=1, seed=14) for p in zprobes: + p.subject = "z-" + str(p.subject) p.references = [str(r) for r in list(range(self.n_references))] return zprobes + + def treferences(self): + t_sset = self._create_random_sample_set(self.n_references, self.dim, seed=15) + for t in t_sset: + t.subject = "t_" + str(t.subject) + return t_sset + @property def allow_scoring_with_all_biometric_references(self): return True diff --git a/bob/bio/base/test/test_vanilla_biometrics_score_norm.py b/bob/bio/base/test/test_vanilla_biometrics_score_norm.py new file mode 100644 index 00000000..eab3ed4a --- /dev/null +++ b/bob/bio/base/test/test_vanilla_biometrics_score_norm.py @@ -0,0 +1,75 @@ +#!/usr/bin/env python +# vim: set fileencoding=utf-8 : +# @author: Tiago de Freitas Pereira <tiago.pereira@idiap.ch> + +from bob.pipelines import Sample, SampleSet, DelayedSample +import os +import numpy as np +import tempfile +from sklearn.pipeline import make_pipeline +from bob.bio.base.wrappers import wrap_bob_legacy + +from bob.bio.base.test.test_transformers import ( + FakePreprocesor, + FakeExtractor, + FakeAlgorithm, +) +from bob.bio.base.test.test_vanilla_biometrics import DummyDatabase, _make_transformer + + +from bob.bio.base.pipelines.vanilla_biometrics import ( + Distance, + VanillaBiometricsPipeline, + ZTNormVanillaBiometricsPipeline, + BioAlgorithmCheckpointWrapper, + dask_vanilla_biometrics, + BioAlgorithmLegacy, +) + +import bob.pipelines as mario +import uuid +import shutil +import itertools + + +def test_znorm_on_memory(): + + with tempfile.TemporaryDirectory() as dir_name: + + def run_pipeline(with_dask): + + database = DummyDatabase(one_d=False) + + transformer = _make_transformer(dir_name) + + biometric_algorithm = Distance() + + vanilla_biometrics_pipeline = ZTNormVanillaBiometricsPipeline( + VanillaBiometricsPipeline(transformer, biometric_algorithm) + ) + + if with_dask: + vanilla_biometrics_pipeline = dask_vanilla_biometrics( + vanilla_biometrics_pipeline, npartitions=2 + ) + + scores = vanilla_biometrics_pipeline( + database.background_model_samples(), + database.references(), + database.probes(), + database.zprobes(), + database.treferences(), + allow_scoring_with_all_biometric_references=database.allow_scoring_with_all_biometric_references, + ) + + if with_dask: + scores = scores.compute(scheduler="single-threaded") + + assert len(scores) == 10 + + run_pipeline(False) + #run_pipeline(False) # Testing checkpoint + # shutil.rmtree(dir_name) # Deleting the cache so it runs again from scratch + # os.makedirs(dir_name, exist_ok=True) + # run_pipeline(True) + # run_pipeline(True) # Testing checkpoint -- GitLab