From 9e3a1f032f170bedb7b1a50bb3149c21c15f59ae Mon Sep 17 00:00:00 2001 From: Tiago Freitas Pereira <tiagofrepereira@gmail.com> Date: Fri, 1 May 2020 15:49:56 +0200 Subject: [PATCH] Created CSV Score writter --- .../vanilla_biometrics/abstract_classes.py | 74 ++++++------- .../vanilla_biometrics/score_writers.py | 101 +++++++++++++++++- 2 files changed, 135 insertions(+), 40 deletions(-) diff --git a/bob/bio/base/pipelines/vanilla_biometrics/abstract_classes.py b/bob/bio/base/pipelines/vanilla_biometrics/abstract_classes.py index 022725bc..7e99ec91 100644 --- a/bob/bio/base/pipelines/vanilla_biometrics/abstract_classes.py +++ b/bob/bio/base/pipelines/vanilla_biometrics/abstract_classes.py @@ -13,15 +13,9 @@ class BioAlgorithm(metaclass=ABCMeta): biometric model enrollement, via ``enroll()`` and scoring, with ``score()``. - Parameters - ---------- - allow_score_multiple_references: bool - If true, your scoring function can be executed by :any:`BioAlgorithm.score_multiple_biometric_references` - """ - def __init__(self, allow_score_multiple_references=False, **kwargs): - self.allow_score_multiple_references = allow_score_multiple_references + def __init__(self, **kwargs): self.stacked_biometric_references = None def enroll_samples(self, biometric_references): @@ -66,7 +60,12 @@ class BioAlgorithm(metaclass=ABCMeta): """ pass - def score_samples(self, probe_features, biometric_references, allow_scoring_with_all_biometric_references=False): + def score_samples( + self, + probe_features, + biometric_references, + allow_scoring_with_all_biometric_references=False, + ): """Scores a new sample against multiple (potential) references Parameters @@ -83,7 +82,7 @@ class BioAlgorithm(metaclass=ABCMeta): allow_scoring_with_all_biometric_references: bool If true will call `self.score_multiple_biometric_references`, at scoring time, to compute scores in one shot with multiple probes. - This optiization is useful when all probes needs to be compared with all biometric references AND + This optimization is useful when all probes needs to be compared with all biometric references AND your scoring function allows this broadcast computation. @@ -92,18 +91,29 @@ class BioAlgorithm(metaclass=ABCMeta): scores : list For each sample in a probe, returns as many scores as there are - samples in the probe, together with the probe's and the + samples in the probe, together with the probes and the relevant reference's subject identifiers. """ retval = [] for p in probe_features: - retval.append(self._score_sample_set(p, biometric_references, allow_scoring_with_all_biometric_references=allow_scoring_with_all_biometric_references)) + retval.append( + self._score_sample_set( + p, + biometric_references, + allow_scoring_with_all_biometric_references=allow_scoring_with_all_biometric_references, + ) + ) return retval - def _score_sample_set(self, sampleset, biometric_references, allow_scoring_with_all_biometric_references): - """Given a sampleset for probing, compute the scores and retures a sample set with the scores + def _score_sample_set( + self, + sampleset, + biometric_references, + allow_scoring_with_all_biometric_references, + ): + """Given a sampleset for probing, compute the scores and returns a sample set with the scores """ # Stacking the samples from a sampleset @@ -111,11 +121,10 @@ class BioAlgorithm(metaclass=ABCMeta): # Compute scores for each sample inside of the sample set # TODO: In some cases we want to compute 1 score per sampleset (IJB-C) - # We should add an agregator function here so we can properlly agregate samples from + # We should add an aggregator function here so we can properly aggregator samples from # a sampleset either after or before scoring. - # To be honest, this should be the default behaviour + # To be honest, this should be the default behavior retval = [] - for subprobe_id, (s, parent) in enumerate(zip(data, sampleset.samples)): # Creating one sample per comparison subprobe_scores = [] @@ -129,9 +138,9 @@ class BioAlgorithm(metaclass=ABCMeta): scores = self.score_multiple_biometric_references( self.stacked_biometric_references, s ) - - # Wrapping the scores in samples - for ref, score in zip(biometric_references, scores): + + # Wrapping the scores in samples + for ref, score in zip(biometric_references, scores): subprobe_scores.append(Sample(score, parent=ref)) else: @@ -141,9 +150,13 @@ class BioAlgorithm(metaclass=ABCMeta): score = self.score(ref.data, s) subprobe_scores.append(Sample(score, parent=ref)) - # Creating one sampleset per probe - subprobe = SampleSet(subprobe_scores, parent=parent) - subprobe.subject = sampleset.subject + # Fetching metadata from the probe + kwargs = dict( + (metadata, sampleset.__dict__[metadata]) + for metadata in sampleset.__dict__.keys() + if metadata not in ["samples", "key", "data", "load", "_data"] + ) + subprobe = SampleSet(subprobe_scores, parent=parent, **kwargs) retval.append(subprobe) return retval @@ -259,7 +272,6 @@ class ScoreWriter(metaclass=ABCMeta): def write(self, sampleset, path): pass - @abstractmethod def read(self, path): pass @@ -267,19 +279,3 @@ class ScoreWriter(metaclass=ABCMeta): @abstractmethod def concatenate_write_scores(self, sampleset, path): pass - - -def create_score_delayed_sample(path, probe): - """ - Write scores in the four columns format - """ - - with open(path, "w") as f: - for score_line in probe.samples: - f.write(score_line.data) - - def load(): - with open(path) as f: - return f.read() - - return DelayedSample(load, parent=probe) diff --git a/bob/bio/base/pipelines/vanilla_biometrics/score_writers.py b/bob/bio/base/pipelines/vanilla_biometrics/score_writers.py index 59c9af58..dfcf7f0b 100644 --- a/bob/bio/base/pipelines/vanilla_biometrics/score_writers.py +++ b/bob/bio/base/pipelines/vanilla_biometrics/score_writers.py @@ -6,6 +6,8 @@ import os from bob.pipelines import SampleSet, DelayedSample from .abstract_classes import ScoreWriter import functools +import csv + class FourColumnsScoreWriter(ScoreWriter): """ @@ -61,4 +63,101 @@ class FourColumnsScoreWriter(ScoreWriter): for samplesets in samplesets_list: for sset in samplesets: for s in sset: - f.writelines(s.data) \ No newline at end of file + f.writelines(s.data) + + +class CSVScoreWriter(ScoreWriter): + """ + Read and write scores in CSV format, shipping all metadata with the scores + """ + + def write(self, probe_sampleset, path): + """ + Write scores and returns a :any:`bob.pipelines.DelayedSample` containing + the instruction to open the score file + """ + + exclude_list = ["samples", "key", "data", "load", "_data", "references"] + + def create_csv_header(probe_sampleset): + first_biometric_reference = probe_sampleset[0] + + probe_dict = dict( + (k, f"probe_{k}") + for k in probe_sampleset.__dict__.keys() + if k not in exclude_list + ) + + bioref_dict = dict( + (k, f"bio_ref_{k}") + for k in first_biometric_reference.__dict__.keys() + if k not in exclude_list + ) + + header = ( + ["probe_key"] + + [probe_dict[k] for k in probe_dict] + + [bioref_dict[k] for k in bioref_dict] + + ["score"] + ) + return header, probe_dict, bioref_dict + + os.makedirs(path, exist_ok=True) + checkpointed_scores = [] + + header, probe_dict, bioref_dict = create_csv_header(probe_sampleset[0]) + + for probe in probe_sampleset: + filename = os.path.join(path, probe.subject) + ".csv" + with open(filename, "w") as f: + + csv_write = csv.writer(f) + csv_write.writerow(header) + + rows = [] + probe_row = [str(probe.key)] + [ + str(probe.__dict__[k]) for k in probe_dict.keys() + ] + + for biometric_reference in probe: + bio_ref_row = [ + str(biometric_reference.__dict__[k]) + for k in list(bioref_dict.keys()) + ["data"] + ] + + rows.append(probe_row + bio_ref_row) + + csv_write.writerows(rows) + checkpointed_scores.append( + SampleSet( + [ + DelayedSample( + functools.partial(self.read, filename), parent=probe + ) + ], + parent=probe, + ) + ) + return checkpointed_scores + + def read(self, path): + """ + Base Instruction to load a score file + """ + return open(path).readlines() + + def concatenate_write_scores(self, samplesets_list, filename): + """ + Given a list of samplsets, write them all in a single file + """ + os.makedirs(os.path.dirname(filename), exist_ok=True) + f = open(filename, "w") + first = True + for samplesets in samplesets_list: + for sset in samplesets: + for s in sset: + if first: + f.writelines(s.data) + first = False + else: + f.writelines(s.data[1:]) -- GitLab