Created CSV Score writter

9e3a1f03 · Tiago de Freitas Pereira · 40cb9aea · 9e3a1f03 · 9e3a1f03
Commit 9e3a1f03 authored 5 years ago by Tiago de Freitas Pereira
--- a/bob/bio/base/pipelines/vanilla_biometrics/abstract_classes.py
+++ b/bob/bio/base/pipelines/vanilla_biometrics/abstract_classes.py
@@ -13,15 +13,9 @@ class BioAlgorithm(metaclass=ABCMeta):
    biometric model enrollement, via ``enroll()`` and scoring, with
    ``score()``.

-    Parameters
-    ----------
-      allow_score_multiple_references: bool
-         If true, your scoring function can be executed by :any:`BioAlgorithm.score_multiple_biometric_references`
-
    """

-    def __init__(self, allow_score_multiple_references=False, **kwargs):
-        self.allow_score_multiple_references = allow_score_multiple_references
+    def __init__(self, **kwargs):
        self.stacked_biometric_references = None

    def enroll_samples(self, biometric_references):
@@ -66,7 +60,12 @@ class BioAlgorithm(metaclass=ABCMeta):
        """
        pass

-    def score_samples(self, probe_features, biometric_references, allow_scoring_with_all_biometric_references=False):
+    def score_samples(
+        self,
+        probe_features,
+        biometric_references,
+        allow_scoring_with_all_biometric_references=False,
+    ):
        """Scores a new sample against multiple (potential) references

        Parameters
@@ -83,7 +82,7 @@ class BioAlgorithm(metaclass=ABCMeta):

            allow_scoring_with_all_biometric_references: bool
                If true will call `self.score_multiple_biometric_references`, at scoring time, to compute scores in one shot with multiple probes.
-                This optiization is useful when all probes needs to be compared with all biometric references AND
+                This optimization is useful when all probes needs to be compared with all biometric references AND
                your scoring function allows this broadcast computation.


@@ -92,18 +91,29 @@ class BioAlgorithm(metaclass=ABCMeta):

            scores : list
                For each sample in a probe, returns as many scores as there are
-                samples in the probe, together with the probe's and the
+                samples in the probe, together with the probes and the
                relevant reference's subject identifiers.

        """

        retval = []
        for p in probe_features:
-            retval.append(self._score_sample_set(p, biometric_references, allow_scoring_with_all_biometric_references=allow_scoring_with_all_biometric_references))
+            retval.append(
+                self._score_sample_set(
+                    p,
+                    biometric_references,
+                    allow_scoring_with_all_biometric_references=allow_scoring_with_all_biometric_references,
+                )
+            )
        return retval

-    def _score_sample_set(self, sampleset, biometric_references, allow_scoring_with_all_biometric_references):
-        """Given a sampleset for probing, compute the scores and retures a sample set with the scores
+    def _score_sample_set(
+        self,
+        sampleset,
+        biometric_references,
+        allow_scoring_with_all_biometric_references,
+    ):
+        """Given a sampleset for probing, compute the scores and returns a sample set with the scores
        """

        # Stacking the samples from a sampleset
@@ -111,11 +121,10 @@ class BioAlgorithm(metaclass=ABCMeta):

        # Compute scores for each sample inside of the sample set
        # TODO: In some cases we want to compute 1 score per sampleset (IJB-C)
-        # We should add an agregator function here so we can properlly agregate samples from
+        # We should add an aggregator function here so we can properly aggregator samples from
        # a sampleset either after or before scoring.
-        # To be honest, this should be the default behaviour
+        # To be honest, this should be the default behavior
        retval = []
-
        for subprobe_id, (s, parent) in enumerate(zip(data, sampleset.samples)):
            # Creating one sample per comparison
            subprobe_scores = []
@@ -129,9 +138,9 @@ class BioAlgorithm(metaclass=ABCMeta):
                scores = self.score_multiple_biometric_references(
                    self.stacked_biometric_references, s
                )
-                
-                # Wrapping the scores in samples                
-                for ref, score in zip(biometric_references, scores):                    
+
+                # Wrapping the scores in samples
+                for ref, score in zip(biometric_references, scores):
                    subprobe_scores.append(Sample(score, parent=ref))
            else:

@@ -141,9 +150,13 @@ class BioAlgorithm(metaclass=ABCMeta):
                    score = self.score(ref.data, s)
                    subprobe_scores.append(Sample(score, parent=ref))

-            # Creating one sampleset per probe
-            subprobe = SampleSet(subprobe_scores, parent=parent)
-            subprobe.subject = sampleset.subject            
+            # Fetching metadata from the probe
+            kwargs = dict(
+                (metadata, sampleset.__dict__[metadata])
+                for metadata in sampleset.__dict__.keys()
+                if metadata not in ["samples", "key", "data", "load", "_data"]
+            )
+            subprobe = SampleSet(subprobe_scores, parent=parent, **kwargs)
            retval.append(subprobe)

        return retval
@@ -259,7 +272,6 @@ class ScoreWriter(metaclass=ABCMeta):
    def write(self, sampleset, path):
        pass

-
    @abstractmethod
    def read(self, path):
        pass
@@ -267,19 +279,3 @@ class ScoreWriter(metaclass=ABCMeta):
    @abstractmethod
    def concatenate_write_scores(self, sampleset, path):
        pass
-
-
-def create_score_delayed_sample(path, probe):
-    """
-    Write scores in the four columns format
-    """
-
-    with open(path, "w") as f:
-        for score_line in probe.samples:
-            f.write(score_line.data)
-
-    def load():
-        with open(path) as f:
-            return f.read()
-
-    return DelayedSample(load, parent=probe)
--- a/bob/bio/base/pipelines/vanilla_biometrics/score_writers.py
+++ b/bob/bio/base/pipelines/vanilla_biometrics/score_writers.py
@@ -6,6 +6,8 @@ import os
 from bob.pipelines import SampleSet, DelayedSample
 from .abstract_classes import ScoreWriter
 import functools
+import csv
+

 class FourColumnsScoreWriter(ScoreWriter):
    """
@@ -61,4 +63,101 @@ class FourColumnsScoreWriter(ScoreWriter):
        for samplesets in samplesets_list:
            for sset in samplesets:
                for s in sset:
-                    f.writelines(s.data)
\ No newline at end of file
+                    f.writelines(s.data)
+
+
+class CSVScoreWriter(ScoreWriter):
+    """
+    Read and write scores in CSV format, shipping all metadata with the scores    
+    """
+
+    def write(self, probe_sampleset, path):
+        """
+        Write scores and returns a :any:`bob.pipelines.DelayedSample` containing
+        the instruction to open the score file
+        """
+
+        exclude_list = ["samples", "key", "data", "load", "_data", "references"]
+
+        def create_csv_header(probe_sampleset):
+            first_biometric_reference = probe_sampleset[0]
+
+            probe_dict = dict(
+                (k, f"probe_{k}")
+                for k in probe_sampleset.__dict__.keys()
+                if k not in exclude_list
+            )
+
+            bioref_dict = dict(
+                (k, f"bio_ref_{k}")
+                for k in first_biometric_reference.__dict__.keys()
+                if k not in exclude_list
+            )
+
+            header = (
+                ["probe_key"]
+                + [probe_dict[k] for k in probe_dict]
+                + [bioref_dict[k] for k in bioref_dict]
+                + ["score"]
+            )
+            return header, probe_dict, bioref_dict
+
+        os.makedirs(path, exist_ok=True)
+        checkpointed_scores = []
+
+        header, probe_dict, bioref_dict = create_csv_header(probe_sampleset[0])
+
+        for probe in probe_sampleset:
+            filename = os.path.join(path, probe.subject) + ".csv"
+            with open(filename, "w") as f:
+
+                csv_write = csv.writer(f)
+                csv_write.writerow(header)
+
+                rows = []
+                probe_row = [str(probe.key)] + [
+                    str(probe.__dict__[k]) for k in probe_dict.keys()
+                ]
+
+                for biometric_reference in probe:
+                    bio_ref_row = [
+                        str(biometric_reference.__dict__[k])
+                        for k in list(bioref_dict.keys()) + ["data"]
+                    ]
+
+                    rows.append(probe_row + bio_ref_row)
+
+                csv_write.writerows(rows)
+                checkpointed_scores.append(
+                    SampleSet(
+                        [
+                            DelayedSample(
+                                functools.partial(self.read, filename), parent=probe
+                            )
+                        ],
+                        parent=probe,
+                    )
+                )
+        return checkpointed_scores
+
+    def read(self, path):
+        """
+        Base Instruction to load a score file
+        """
+        return open(path).readlines()
+
+    def concatenate_write_scores(self, samplesets_list, filename):
+        """
+        Given a list of samplsets, write them all in a single file
+        """
+        os.makedirs(os.path.dirname(filename), exist_ok=True)
+        f = open(filename, "w")
+        first = True
+        for samplesets in samplesets_list:
+            for sset in samplesets:
+                for s in sset:
+                    if first:
+                        f.writelines(s.data)
+                        first = False
+                    else:
+                        f.writelines(s.data[1:])