diff --git a/bob/bio/base/pipelines/vanilla_biometrics/wrappers.py b/bob/bio/base/pipelines/vanilla_biometrics/wrappers.py index 157c0def30ece6b451796f480306906f821526ad..fed15da90ea07dfff467b338e193da49378eeec5 100644 --- a/bob/bio/base/pipelines/vanilla_biometrics/wrappers.py +++ b/bob/bio/base/pipelines/vanilla_biometrics/wrappers.py @@ -38,16 +38,24 @@ class BioAlgorithmCheckpointWrapper(BioAlgorithm): """ - def __init__(self, biometric_algorithm, base_dir, force=False, **kwargs): + def __init__(self, biometric_algorithm, base_dir, group=None, force=False, **kwargs): super().__init__(**kwargs) - self.biometric_reference_dir = os.path.join(base_dir, "biometric_references") - self.score_dir = os.path.join(base_dir, "scores") + self.base_dir = base_dir + self.set_score_references_path(group) + self.biometric_algorithm = biometric_algorithm self.force = force self._biometric_reference_extension = ".hdf5" - self._score_extension = ".pkl" - self.base_dir = base_dir + self._score_extension = ".pkl" + + def set_score_references_path(self, group): + if group is None: + self.biometric_reference_dir = os.path.join(self.base_dir, "biometric_references") + self.score_dir = os.path.join(self.base_dir, "scores") + else: + self.biometric_reference_dir = os.path.join(self.base_dir, group, "biometric_references") + self.score_dir = os.path.join(self.base_dir, group, "scores") def enroll(self, enroll_features): return self.biometric_algorithm.enroll(enroll_features) @@ -190,6 +198,9 @@ class BioAlgorithmDaskWrapper(BioAlgorithm): biometric_references, data ) + def set_score_references_path(self, group): + self.biometric_algorithm.set_score_references_path(group) + def dask_vanilla_biometrics(pipeline, npartitions=None, partition_size=None): """ diff --git a/bob/bio/base/pipelines/vanilla_biometrics/zt_norm.py b/bob/bio/base/pipelines/vanilla_biometrics/zt_norm.py index 0b58fb88e0a93115f5cb5aa95c5b289409fdfa79..38b66e73b65f2758479d3a1be05c693ecf4a3760 100644 --- a/bob/bio/base/pipelines/vanilla_biometrics/zt_norm.py +++ b/bob/bio/base/pipelines/vanilla_biometrics/zt_norm.py @@ -47,6 +47,17 @@ class ZTNormPipeline(object): If True, applies TScore normalization on top of raw scores. If both, z_norm and t_norm are true, it applies score normalization + score_writer: + + adaptive_score_fraction: float + Set the proportion of the impostor scores used to compute :math:`\mu` and :math:`\std` for the T normalization + This is also called as adaptative T-Norm (https://ieeexplore.ieee.org/document/1415220) or + Top-Norm (https://ieeexplore.ieee.org/document/4013533) + + adaptive_score_descending_sort bool + It true, during the Top-norm statistics computations, sort the scores in descending order + + """ def __init__( @@ -55,9 +66,13 @@ class ZTNormPipeline(object): z_norm=True, t_norm=True, score_writer=FourColumnsScoreWriter("./scores.txt"), + adaptive_score_fraction=1.0, + adaptive_score_descending_sort=True, ): self.vanilla_biometrics_pipeline = vanilla_biometrics_pipeline - self.ztnorm_solver = ZTNorm() + self.ztnorm_solver = ZTNorm( + adaptive_score_fraction, adaptive_score_descending_sort + ) self.z_norm = z_norm self.t_norm = t_norm @@ -78,7 +93,6 @@ class ZTNormPipeline(object): allow_scoring_with_all_biometric_references=False, ): - self.transformer = self.train_background_model(background_model_samples) # Create biometric samples @@ -110,6 +124,7 @@ class ZTNormPipeline(object): return z_normed_scores # T NORM + t_normed_scores, t_scores, t_biometric_references = self.compute_tnorm_scores( t_biometric_reference_samples, probe_features, @@ -128,8 +143,17 @@ class ZTNormPipeline(object): t_scores, allow_scoring_with_all_biometric_references, ) - - return raw_scores, z_normed_scores, t_normed_scores, zt_normed_scores + + # S-norm + s_normed_scores = self.compute_snorm_scores(z_normed_scores, t_normed_scores) + + return ( + raw_scores, + z_normed_scores, + t_normed_scores, + zt_normed_scores, + s_normed_scores, + ) def train_background_model(self, background_model_samples): return self.vanilla_biometrics_pipeline.train_background_model( @@ -194,7 +218,7 @@ class ZTNormPipeline(object): ) t_normed_scores = self.ztnorm_solver.compute_tnorm_scores( - probe_scores, t_scores, t_biometric_references + probe_scores, t_scores, t_biometric_references, ) return t_normed_scores, t_scores, t_biometric_references @@ -217,22 +241,31 @@ class ZTNormPipeline(object): # Z Normalizing the T-normed scores z_normed_t_normed = self.ztnorm_solver.compute_znorm_scores( - t_scores, zt_scores, t_biometric_references + t_scores, zt_scores, t_biometric_references, ) # (Z Normalizing the T-normed scores) the Z normed scores zt_normed_scores = self.ztnorm_solver.compute_tnorm_scores( - z_normed_scores, z_normed_t_normed, t_biometric_references + z_normed_scores, z_normed_t_normed, t_biometric_references, ) return zt_normed_scores + def compute_snorm_scores(self, znormed_scores, tnormed_scores): + + s_normed_scores = self.ztnorm_solver.compute_snorm_scores( + znormed_scores, tnormed_scores + ) + + return s_normed_scores + def write_scores(self, scores): return self.vanilla_biometrics_pipeline.write_scores(scores) def post_process(self, score_paths, filename): return self.vanilla_biometrics_pipeline.post_process(score_paths, filename) + class ZTNorm(object): """ Computes Z, T and ZT Score Normalization of a :any:`BioAlgorithm` @@ -240,8 +273,24 @@ class ZTNorm(object): Reference bibliography from: A Generative Model for Score Normalization in Speaker Recognition https://arxiv.org/pdf/1709.09868.pdf + + Parameters + ---------- + + adaptive_score_fraction: float + Set the proportion of the impostor scores used to compute :math:`\mu` and :math:`\std` for the T normalization + This is also called as adaptative T-Norm (https://ieeexplore.ieee.org/document/1415220) or + Top-Norm (https://ieeexplore.ieee.org/document/4013533) + + adaptive_score_descending_sort bool + It true, during the Top-norm statistics computations, sort the scores in descending order + """ + def __init__(self, adaptive_score_fraction, adaptive_score_descending_sort): + self.adaptive_score_fraction = adaptive_score_fraction + self.adaptive_score_descending_sort = adaptive_score_descending_sort + def _norm(self, score, mu, std): # Reference: https://gitlab.idiap.ch/bob/bob.learn.em/-/blob/master/bob/learn/em/test/test_ztnorm.py # Axis 0=ZNORM @@ -294,9 +343,7 @@ class ZTNorm(object): axis=1 computes CORRECTLY the statistics for TNorm """ # Dumping all scores - score_floats = np.array( - [s.data for sset in sampleset_for_norm for s in sset] - ) + score_floats = np.array([s.data for sset in sampleset_for_norm for s in sset]) # Reshaping in PROBE vs BIOMETRIC_REFERENCES n_probes = len(sampleset_for_norm) @@ -304,8 +351,25 @@ class ZTNorm(object): score_floats = score_floats.reshape((n_probes, n_references)) # AXIS ON THE MODELS - big_mu = np.mean(score_floats, axis=axis) - big_std = self._compute_std(big_mu, score_floats, axis=axis) + + proportion = int( + np.floor(score_floats.shape[axis] * self.adaptive_score_fraction) + ) + + + sorted_scores = ( + -np.sort(-score_floats, axis=axis) + if self.adaptive_score_descending_sort + else np.sort(score_floats, axis=axis) + ) + + if axis == 0: + top_scores = sorted_scores[0:proportion, :] + else: + top_scores = sorted_scores[:, 0:proportion] + + big_mu = np.mean(top_scores, axis=axis) + big_std = self._compute_std(big_mu, top_scores, axis=axis) # Creating statistics structure with subject id as the key stats = {} @@ -390,10 +454,33 @@ class ZTNorm(object): Base T-normalization function """ - stats = self._compute_stats(sampleset_for_tnorm, t_biometric_references, axis=1) + stats = self._compute_stats( + sampleset_for_tnorm, t_biometric_references, axis=1, + ) return self._tnorm_samplesets(probe_scores, stats) + def _snorm(self, z_score, t_score): + return 0.5 * (z_score + t_score) + + def _snorm_samplesets(self, znormed_scores, tnormed_scores): + + s_normed_samplesets = [] + for z, t in zip(znormed_scores, tnormed_scores): + s_normed_scores = SampleSet([], parent=z) + for b_z, b_t in zip(z, t): + score = self._snorm(b_z.data, b_t.data) + + new_sample = Sample(score, parent=b_z) + s_normed_scores.samples.append(new_sample) + s_normed_samplesets.append(s_normed_scores) + + return s_normed_samplesets + + def compute_snorm_scores(self, znormed_scores, tnormed_scores): + + return self._snorm_samplesets(znormed_scores, tnormed_scores) + class ZTNormDaskWrapper(object): """ @@ -435,6 +522,11 @@ class ZTNormDaskWrapper(object): return probe_scores.map_partitions(self.ztnorm._tnorm_samplesets, stats) + def compute_snorm_scores(self, znormed_scores, tnormed_scores): + return znormed_scores.map_partitions( + self.ztnorm._snorm_samplesets, tnormed_scores + ) + class ZTNormCheckpointWrapper(object): """ @@ -458,7 +550,6 @@ class ZTNormCheckpointWrapper(object): self.force = force self.base_dir = base_dir - def _write_scores(self, samples, path): os.makedirs(os.path.dirname(path), exist_ok=True) open(path, "wb").write(cloudpickle.dumps(samples)) @@ -488,6 +579,28 @@ class ZTNormCheckpointWrapper(object): return z_normed_score + def _apply_tnorm(self, probe_score, stats): + + path = os.path.join(self.tnorm_score_path, str(probe_score.key) + ".pkl") + + if self.force or not os.path.exists(path): + t_normed_score = self.ztnorm._apply_tnorm(probe_score, path) + + self.write_scores(t_normed_score.samples) + + t_normed_score = SampleSet( + [ + DelayedSample( + functools.partial(self._load, path), parent=probe_score + ) + ], + parent=probe_score, + ) + else: + t_normed_score = SampleSet(self._load(path), parent=probe_score) + + return t_normed_score + def compute_znorm_scores( self, probe_scores, sampleset_for_znorm, biometric_references ): @@ -499,11 +612,13 @@ class ZTNormCheckpointWrapper(object): def compute_tnorm_scores( self, probe_scores, sampleset_for_tnorm, t_biometric_references ): - return self.ztnorm.compute_tnorm_scores( probe_scores, sampleset_for_tnorm, t_biometric_references ) + def compute_snorm_scores(self, znormed_scores, tnormed_scores): + return self.ztnorm.compute_snorm_scores(znormed_scores, tnormed_scores) + def _compute_stats(self, sampleset_for_norm, biometric_references, axis=0): return self.ztnorm._compute_stats( sampleset_for_norm, biometric_references, axis=axis @@ -514,3 +629,6 @@ class ZTNormCheckpointWrapper(object): def _tnorm_samplesets(self, probe_scores, stats): return self.ztnorm._tnorm_samplesets(probe_scores, stats) + + def _snorm_samplesets(self, probe_scores, stats): + return self.ztnorm._snorm_samplesets(probe_scores, stats) diff --git a/bob/bio/base/test/test_vanilla_biometrics_score_norm.py b/bob/bio/base/test/test_vanilla_biometrics_score_norm.py index eb9f1593ce7eb3e01f4d6ec35ed2dee5bcb77ae7..3871f641cd3a72ec6f007c83fee8722576e880d4 100644 --- a/bob/bio/base/test/test_vanilla_biometrics_score_norm.py +++ b/bob/bio/base/test/test_vanilla_biometrics_score_norm.py @@ -127,7 +127,11 @@ def zt_norm_stubs(references, probes, t_references, z_probes): zt_normed_scores = _norm(z_normed_scores, z_t_scores, axis=0) assert zt_normed_scores.shape == (n_reference, n_probes) - return raw_scores, z_normed_scores, t_normed_scores, zt_normed_scores + s_normed_scores = (z_normed_scores+t_normed_scores)*0.5 + assert s_normed_scores.shape == (n_reference, n_probes) + + + return raw_scores, z_normed_scores, t_normed_scores, zt_normed_scores, s_normed_scores def test_norm_mechanics(): @@ -191,6 +195,7 @@ def test_norm_mechanics(): z_normed_scores_ref, t_normed_scores_ref, zt_normed_scores_ref, + s_normed_scores_ref, ) = zt_norm_stubs(references, probes, t_references, z_probes) ############ @@ -249,7 +254,7 @@ def test_norm_mechanics(): ############# z_vanilla_pipeline = ZTNormPipeline( - vanilla_pipeline, z_norm=True, t_norm=False, + vanilla_pipeline, z_norm=True, t_norm=False ) if with_checkpoint: @@ -280,6 +285,7 @@ def test_norm_mechanics(): ) assert np.allclose(z_normed_scores, z_normed_scores_ref) + ############ # TESTING T-NORM ############# @@ -313,7 +319,7 @@ def test_norm_mechanics(): t_normed_scores = _dump_scores_from_samples( t_normed_score_samples, shape=(n_probes, n_references) - ) + ) assert np.allclose(t_normed_scores, t_normed_scores_ref) ############ @@ -338,6 +344,7 @@ def test_norm_mechanics(): z_normed_score_samples, t_normed_score_samples, zt_normed_score_samples, + s_normed_score_samples, ) = zt_vanilla_pipeline( [], biometric_reference_sample_sets, @@ -360,6 +367,11 @@ def test_norm_mechanics(): scheduler="single-threaded" ) + s_normed_score_samples = s_normed_score_samples.compute( + scheduler="single-threaded" + ) + + raw_scores = _dump_scores_from_samples( raw_score_samples, shape=(n_probes, n_references) ) @@ -380,6 +392,14 @@ def test_norm_mechanics(): ) assert np.allclose(zt_normed_scores, zt_normed_scores_ref) + s_normed_scores = _dump_scores_from_samples( + s_normed_score_samples, shape=(n_probes, n_references) + ) + assert np.allclose(s_normed_scores, s_normed_scores_ref) + + + + # No dask run(False) # On memory @@ -418,7 +438,7 @@ def test_znorm_on_memory(): vanilla_biometrics_pipeline, npartitions=2 ) - raw_scores, z_scores, t_scores, zt_scores = vanilla_biometrics_pipeline( + raw_scores, z_scores, t_scores, zt_scores, s_scores = vanilla_biometrics_pipeline( database.background_model_samples(), database.references(), database.probes(), @@ -448,15 +468,22 @@ def test_znorm_on_memory(): t_scores = _concatenate( vanilla_biometrics_pipeline, t_scores, "scores-dev_tscores" ) + zt_scores = _concatenate( vanilla_biometrics_pipeline, zt_scores, "scores-dev_ztscores" ) + s_scores = _concatenate( + vanilla_biometrics_pipeline, s_scores, "scores-dev_sscores" + ) + if with_dask: raw_scores = raw_scores.compute(scheduler="single-threaded") z_scores = z_scores.compute(scheduler="single-threaded") t_scores = t_scores.compute(scheduler="single-threaded") zt_scores = zt_scores.compute(scheduler="single-threaded") + s_scores = s_scores.compute(scheduler="single-threaded") + if isinstance(score_writer, CSVScoreWriter): n_lines = 51 if with_dask else 101 @@ -465,12 +492,14 @@ def test_znorm_on_memory(): assert len(open(z_scores[0], "r").readlines()) == n_lines assert len(open(t_scores[0], "r").readlines()) == n_lines assert len(open(zt_scores[0], "r").readlines()) == n_lines + assert len(open(s_scores[0], "r").readlines()) == n_lines else: assert len(raw_scores) == 10 assert len(z_scores) == 10 assert len(t_scores) == 10 assert len(zt_scores) == 10 + assert len(s_scores) == 10 run_pipeline(False) run_pipeline(False) # Testing checkpoint diff --git a/bob/bio/base/transformers/preprocessor.py b/bob/bio/base/transformers/preprocessor.py index 1b1e15c2a2c6d4552e791a70b642db6d0653faeb..f91c563cfd8c060531d814e6a40cf13a56cfb7d0 100644 --- a/bob/bio/base/transformers/preprocessor.py +++ b/bob/bio/base/transformers/preprocessor.py @@ -37,3 +37,6 @@ class PreprocessorTransformer(TransformerMixin, BaseEstimator): def _more_tags(self): return {"stateless": True, "requires_fit": False} + + def fit(self, X, y=None): + return self