diff --git a/bob/bio/base/pipelines/vanilla_biometrics/__init__.py b/bob/bio/base/pipelines/vanilla_biometrics/__init__.py index 5f6a882001b489ec6708465963389bf0d2e32380..66dfd01e419e6dc27dc8b7122c18327d080dce1a 100644 --- a/bob/bio/base/pipelines/vanilla_biometrics/__init__.py +++ b/bob/bio/base/pipelines/vanilla_biometrics/__init__.py @@ -1,4 +1,5 @@ -from .pipelines import VanillaBiometricsPipeline +from .pipelines import VanillaBiometricsPipeline, ZNormVanillaBiometricsPipeline + from .biometric_algorithms import Distance from .score_writers import FourColumnsScoreWriter, CSVScoreWriter from .wrappers import BioAlgorithmCheckpointWrapper, BioAlgorithmDaskWrapper, dask_vanilla_biometrics diff --git a/bob/bio/base/pipelines/vanilla_biometrics/abstract_classes.py b/bob/bio/base/pipelines/vanilla_biometrics/abstract_classes.py index 7e99ec91d6617db8390e5e7e3b5e5b1e364c2bc5..be36a7a5b94229fc52d84d38d7f3cebbfc61cec9 100644 --- a/bob/bio/base/pipelines/vanilla_biometrics/abstract_classes.py +++ b/bob/bio/base/pipelines/vanilla_biometrics/abstract_classes.py @@ -5,6 +5,15 @@ from abc import ABCMeta, abstractmethod from bob.pipelines.sample import Sample, SampleSet, DelayedSample import functools +import numpy as np + + +def average_scores(scores): + """ + Given a :any:`numpy.ndarray` coming from multiple probes, + average them + """ + return np.mean(scores, axis=0) class BioAlgorithm(metaclass=ABCMeta): @@ -13,10 +22,17 @@ class BioAlgorithm(metaclass=ABCMeta): biometric model enrollement, via ``enroll()`` and scoring, with ``score()``. + Parameters + ---------- + + score_reduction_operation: ``collections.callable`` + Callable containing the score reduction function to be applied in the samples in a sampleset + """ - def __init__(self, **kwargs): + def __init__(self, score_reduction_operation=average_scores,**kwargs): self.stacked_biometric_references = None + self.score_reduction_operation = average_scores def enroll_samples(self, biometric_references): """This method should implement the sub-pipeline 1 of the Vanilla Biometrics Pipeline :ref:`_vanilla-pipeline-1`. @@ -113,53 +129,63 @@ class BioAlgorithm(metaclass=ABCMeta): biometric_references, allow_scoring_with_all_biometric_references, ): - """Given a sampleset for probing, compute the scores and returns a sample set with the scores + """Given one sampleset for probing, compute the scores and returns a sample set with the scores """ - # Stacking the samples from a sampleset - data = [s.data for s in sampleset.samples] - - # Compute scores for each sample inside of the sample set - # TODO: In some cases we want to compute 1 score per sampleset (IJB-C) - # We should add an aggregator function here so we can properly aggregator samples from - # a sampleset either after or before scoring. - # To be honest, this should be the default behavior - retval = [] - for subprobe_id, (s, parent) in enumerate(zip(data, sampleset.samples)): - # Creating one sample per comparison - subprobe_scores = [] + scores_biometric_references = [] + if allow_scoring_with_all_biometric_references: + # Optimized scoring + # This is useful when you scoring function can be compared with a + # static batch of biometric references + total_scores = [] + for probe_sample in sampleset: - if allow_scoring_with_all_biometric_references: # Multiple scoring if self.stacked_biometric_references is None: self.stacked_biometric_references = [ ref.data for ref in biometric_references ] scores = self.score_multiple_biometric_references( - self.stacked_biometric_references, s + self.stacked_biometric_references, probe_sample.data ) - - # Wrapping the scores in samples - for ref, score in zip(biometric_references, scores): - subprobe_scores.append(Sample(score, parent=ref)) - else: - + total_scores.append(scores) + + # Reducing them + total_scores = self.score_reduction_operation(total_scores) + + # Wrapping the scores in samples + for ref, score in zip(biometric_references, total_scores): + scores_biometric_references.append(Sample(score, parent=ref)) + + else: + # Non optimizing scoring + # There are some protocols where each probe has + # to be scored with a specific list of biometric_references + total_scores = [] + for probe_sample in sampleset: + scores = [] for ref in [ - r for r in biometric_references if r.key in sampleset.references + r for r in biometric_references if str(r.subject) in sampleset.references ]: - score = self.score(ref.data, s) - subprobe_scores.append(Sample(score, parent=ref)) - - # Fetching metadata from the probe - kwargs = dict( - (metadata, sampleset.__dict__[metadata]) - for metadata in sampleset.__dict__.keys() - if metadata not in ["samples", "key", "data", "load", "_data"] - ) - subprobe = SampleSet(subprobe_scores, parent=parent, **kwargs) - retval.append(subprobe) + scores.append(self.score(ref.data, probe_sample.data)) + total_scores.append(scores) + + total_scores = self.score_reduction_operation(np.array(total_scores)) + + for ref, score in zip([ + r for r in biometric_references if str(r.subject) in sampleset.references + ], total_scores): + + scores_biometric_references.append(Sample(score, parent=ref)) + + # Fetching metadata from the probe + kwargs = dict( + (metadata, sampleset.__dict__[metadata]) + for metadata in sampleset.__dict__.keys() + if metadata not in ["samples", "key", "data", "load", "_data"] + ) + return SampleSet(scores_biometric_references, parent=sampleset, **kwargs) - return retval @abstractmethod def score(self, biometric_reference, data): diff --git a/bob/bio/base/pipelines/vanilla_biometrics/pipelines.py b/bob/bio/base/pipelines/vanilla_biometrics/pipelines.py index 434c5208cb168d0d02094438e8454f95d17674b6..5a852a188df6c2c21c12315025e316374f798c5f 100644 --- a/bob/bio/base/pipelines/vanilla_biometrics/pipelines.py +++ b/bob/bio/base/pipelines/vanilla_biometrics/pipelines.py @@ -120,7 +120,7 @@ class VanillaBiometricsPipeline(object): biometric_reference_features = self.transformer.transform( biometric_reference_samples ) - + biometric_references = self.biometric_algorithm.enroll_samples( biometric_reference_features ) @@ -137,7 +137,7 @@ class VanillaBiometricsPipeline(object): # probes is a list of SampleSets probe_features = self.transformer.transform(probe_samples) - + scores = self.biometric_algorithm.score_samples( probe_features, biometric_references, @@ -146,3 +146,69 @@ class VanillaBiometricsPipeline(object): # scores is a list of Samples return scores + + +class ZNormVanillaBiometricsPipeline(VanillaBiometricsPipeline): + def __init__(self, vanilla_biometrics_pipeline): + self.vanilla_biometrics_pipeline = vanilla_biometrics_pipeline + + def __call__( + self, + background_model_samples, + biometric_reference_samples, + probe_samples, + zprobe_samples, + allow_scoring_with_all_biometric_references=False, + ): + + self.transformer = self.train_background_model(background_model_samples) + + # Create biometric samples + biometric_references = self.create_biometric_reference( + biometric_reference_samples + ) + + raw_scores = self.vanilla_biometrics_pipeline( + background_model_samples, + biometric_reference_samples, + probe_samples, + allow_scoring_with_all_biometric_references, + ) + + return self.compute_znorm_scores( + zprobe_samples, raw_scores, biometric_references + ) + + def train_background_model(self, background_model_samples): + return self.vanilla_biometrics_pipeline.train_background_model( + background_model_samples + ) + + def create_biometric_reference(self, biometric_reference_samples): + return self.vanilla_biometrics_pipeline.create_biometric_reference( + biometric_reference_samples + ) + + def compute_scores( + self, + probe_samples, + biometric_references, + allow_scoring_with_all_biometric_references=False, + ): + + return self.vanilla_biometrics_pipeline.compute_scores( + probe_samples, + biometric_references, + allow_scoring_with_all_biometric_references, + ) + + def compute_znorm_scores(self, zprobe_samples, probe_scores, biometric_references): + + import ipdb; ipdb.set_trace() + + z_scores = self.vanilla_biometrics_pipeline.compute_scores( + zprobe_samples, biometric_references + ) + + + pass diff --git a/bob/bio/base/pipelines/vanilla_biometrics/score_writers.py b/bob/bio/base/pipelines/vanilla_biometrics/score_writers.py index 2574406cbce7211d22d17560c843afaeaf19a400..2c41d99fcb881a6ef7e7b27dcc4e567d2ffa49d6 100644 --- a/bob/bio/base/pipelines/vanilla_biometrics/score_writers.py +++ b/bob/bio/base/pipelines/vanilla_biometrics/score_writers.py @@ -20,33 +20,30 @@ class FourColumnsScoreWriter(ScoreWriter): Write scores and returns a :any:`bob.pipelines.DelayedSample` containing the instruction to open the score file """ + os.makedirs(path, exist_ok=True) checkpointed_scores = [] - for probe in probe_sampleset: - - lines = [ - "{0} {1} {2} {3}\n".format( - biometric_reference.subject, - probe.subject, - probe.key, - biometric_reference.data, - ) - for biometric_reference in probe - ] - filename = os.path.join(path, str(probe.subject)) + ".txt" - open(filename, "w").writelines(lines) - checkpointed_scores.append( - SampleSet( - [ - DelayedSample( - functools.partial(self.read, filename), parent=probe - ) - ], - parent=probe, - ) + lines = [ + "{0} {1} {2} {3}\n".format( + biometric_reference.subject, + probe_sampleset.subject, + probe_sampleset.key, + biometric_reference.data, ) - return checkpointed_scores + for biometric_reference in probe_sampleset + ] + filename = os.path.join(path, str(probe_sampleset.subject)) + ".txt" + open(filename, "w").writelines(lines) + + return SampleSet( + [ + DelayedSample( + functools.partial(self.read, filename), parent=probe_sampleset + ) + ], + parent=probe_sampleset, + ) def read(self, path): """ @@ -54,16 +51,15 @@ class FourColumnsScoreWriter(ScoreWriter): """ return open(path).readlines() - def concatenate_write_scores(self, samplesets_list, filename): + def concatenate_write_scores(self, samplesets, filename): """ Given a list of samplsets, write them all in a single file """ os.makedirs(os.path.dirname(filename), exist_ok=True) f = open(filename, "w") - for samplesets in samplesets_list: - for sset in samplesets: - for s in sset: - f.writelines(s.data) + for sset in samplesets: + for scores in sset: + f.writelines(scores.data) class CSVScoreWriter(ScoreWriter): @@ -115,40 +111,36 @@ class CSVScoreWriter(ScoreWriter): os.makedirs(path, exist_ok=True) checkpointed_scores = [] - header, probe_dict, bioref_dict = create_csv_header(probe_sampleset[0]) + header, probe_dict, bioref_dict = create_csv_header(probe_sampleset) - for probe in probe_sampleset: - filename = os.path.join(path, str(probe.subject)) + ".csv" - with open(filename, "w") as f: + filename = os.path.join(path, str(probe_sampleset.subject)) + ".csv" + with open(filename, "w") as f: - csv_write = csv.writer(f) - csv_write.writerow(header) + csv_write = csv.writer(f) + csv_write.writerow(header) - rows = [] - probe_row = [str(probe.key)] + [ - str(probe.__dict__[k]) for k in probe_dict.keys() + rows = [] + probe_row = [str(probe_sampleset.key)] + [ + str(probe_sampleset.__dict__[k]) for k in probe_dict.keys() + ] + + for biometric_reference in probe_sampleset: + bio_ref_row = [ + str(biometric_reference.__dict__[k]) + for k in list(bioref_dict.keys()) + ["data"] ] - for biometric_reference in probe: - bio_ref_row = [ - str(biometric_reference.__dict__[k]) - for k in list(bioref_dict.keys()) + ["data"] - ] - - rows.append(probe_row + bio_ref_row) - - csv_write.writerows(rows) - checkpointed_scores.append( - SampleSet( - [ - DelayedSample( - functools.partial(self.read, filename), parent=probe - ) - ], - parent=probe, + rows.append(probe_row + bio_ref_row) + + csv_write.writerows(rows) + return SampleSet( + [ + DelayedSample( + functools.partial(self.read, filename), parent=probe_sampleset ) - ) - return checkpointed_scores + ], + parent=probe_sampleset, + ) def read(self, path): """ @@ -156,7 +148,7 @@ class CSVScoreWriter(ScoreWriter): """ return open(path).readlines() - def concatenate_write_scores(self, samplesets_list, filename): + def concatenate_write_scores(self, samplesets, filename): """ Given a list of samplsets, write them all in a single file """ @@ -167,8 +159,8 @@ class CSVScoreWriter(ScoreWriter): base_dir = os.path.splitext(filename)[0] os.makedirs(base_dir, exist_ok=True) f = None - for i, samplesets in enumerate(samplesets_list): - if i% self.n_sample_sets==0: + for i, sset in enumerate(samplesets): + if i % self.n_sample_sets == 0: if f is not None: f.close() del f @@ -176,10 +168,9 @@ class CSVScoreWriter(ScoreWriter): filename = os.path.join(base_dir, f"chunk_{i}.csv") f = open(filename, "w") - for sset in samplesets: - for s in sset: - if i==0: - f.writelines(s.data) - else: - f.writelines(s.data[1:]) - samplesets_list[i] = None \ No newline at end of file + for scores in sset: + if i == 0: + f.writelines(scores.data) + else: + f.writelines(scores.data[1:]) + sset.samples = None diff --git a/bob/bio/base/test/test_vanilla_biometrics.py b/bob/bio/base/test/test_vanilla_biometrics.py index ff525ab9e06c428852da76ab889af0b3a3cbff14..6ed9450a9a540c7b2bb5dae602be81d9eb1f28ce 100644 --- a/bob/bio/base/test/test_vanilla_biometrics.py +++ b/bob/bio/base/test/test_vanilla_biometrics.py @@ -61,10 +61,10 @@ class DummyDatabase: for i in range(offset, offset + n_samples) ] - def _create_random_sample_set(self, n_sample_set=10, n_samples=2): + def _create_random_sample_set(self, n_sample_set=10, n_samples=2, seed=10): # Just generate random samples - np.random.seed(10) + np.random.seed(seed) sample_set = [ SampleSet( samples=[], @@ -89,21 +89,30 @@ class DummyDatabase: return sample_set def background_model_samples(self): - samples = [sset.samples for sset in self._create_random_sample_set()] + samples = [sset.samples for sset in self._create_random_sample_set(seed=10)] return list(itertools.chain(*samples)) def references(self): - return self._create_random_sample_set(self.n_references, self.dim) + return self._create_random_sample_set(self.n_references, self.dim, seed=11) def probes(self): probes = [] - probes = self._create_random_sample_set(n_sample_set=10, n_samples=1) + probes = self._create_random_sample_set(n_sample_set=10, n_samples=1, seed=12) for p in probes: - p.references = list(range(self.n_references)) + p.references = [str(r) for r in list(range(self.n_references))] return probes + def zprobes(self): + zprobes = [] + + zprobes = self._create_random_sample_set(n_sample_set=10, n_samples=1, seed=13) + for p in zprobes: + p.references = [str(r) for r in list(range(self.n_references))] + + return zprobes + @property def allow_scoring_with_all_biometric_references(self): return True @@ -140,7 +149,7 @@ def test_on_memory(): with tempfile.TemporaryDirectory() as dir_name: - def run_pipeline(with_dask): + def run_pipeline(with_dask, allow_scoring_with_all_biometric_references): database = DummyDatabase() transformer = _make_transformer(dir_name) @@ -160,23 +169,24 @@ def test_on_memory(): database.background_model_samples(), database.references(), database.probes(), - allow_scoring_with_all_biometric_references=database.allow_scoring_with_all_biometric_references, + allow_scoring_with_all_biometric_references=allow_scoring_with_all_biometric_references, ) if with_dask: scores = scores.compute(scheduler="single-threaded") assert len(scores) == 10 - for probe_ssets in scores: - for probe in probe_ssets: - assert len(probe) == 10 + for sample_scores in scores: + assert len(sample_scores) == 10 + for score in sample_scores: + assert isinstance(score.data, float) - run_pipeline(False) - run_pipeline(False) # Testing checkpoint + run_pipeline(False, True) + run_pipeline(False, False) # Testing checkpoint shutil.rmtree(dir_name) # Deleting the cache so it runs again from scratch os.makedirs(dir_name, exist_ok=True) - run_pipeline(True) - run_pipeline(True) # Testing checkpoint + run_pipeline(True, True) + run_pipeline(True, True) # Testing checkpoint def test_checkpoint_bioalg_as_transformer(): @@ -207,6 +217,7 @@ def test_checkpoint_bioalg_as_transformer(): database.probes(), allow_scoring_with_all_biometric_references=database.allow_scoring_with_all_biometric_references, ) + if with_dask: scores = scores.compute(scheduler="single-threaded")