Merge branch 'fix-ijbc' into 'master'

Resolve "IJBC database will fail on non-Idiap filesystems" Closes #49 and #53 See merge request !124

Merge branch 'fix-ijbc' into 'master'
5c081127 · Tiago de Freitas Pereira · e8e95bf0 · 4892e072 · 5c081127 · 5c081127
Commit 5c081127 authored 3 years ago by Tiago de Freitas Pereira
--- a/bob/bio/face/config/database/ijbc.py
+++ b/bob/bio/face/config/database/ijbc.py
 #!/usr/bin/env python
 from bob.bio.face.database import IJBCDatabase
-from bob.extension import rc
 database = IJBCDatabase()
--- a/bob/bio/face/database/ijbc.py
+++ b/bob/bio/face/database/ijbc.py
-#!/usr/bin/env python
+from bob.bio.base.pipelines.vanilla_biometrics.abstract_classes import Database
-# vim: set fileencoding=utf-8 :
+import pandas as pd
-# Tiago de Freitas Pereira <tiago.pereira@idiap.ch>
+from bob.pipelines.sample import DelayedSample, SampleSet
-# Sat 20 Aug 15:43:10 CEST 2020
+from bob.extension import rc
-from bob.pipelines.utils import hash_string
-from bob.extension.download import get_file, find_element_in_tarball
-import pickle
 import os
+import bob.io.image
+from functools import partial
+from bob.pipelines.utils import hash_string
+def _make_sample_from_template_row(row, image_directory):
+    # Appending this key, so we can handle parallel writting done correctly
+    # paying the penalty of having duplicate files
+    key = os.path.splitext(row["FILENAME"])[0] + "-" + str(row["TEMPLATE_ID"])
+    return DelayedSample(
+        load=partial(bob.io.image.load, os.path.join(image_directory, row["FILENAME"])),
+        reference_id=str(row["TEMPLATE_ID"]),
+        subject_id=str(row["SUBJECT_ID"]),
+        key=key,
+        gender=row["GENDER"],
+        indoor_outdoor=row["INDOOR_OUTDOOR"],
+        skintone=row["SKINTONE"],
+        yaw=row["YAW"],
+        rool=row["ROLL"],
+        occ1=row["OCC1"],
+        occ2=row["OCC2"],
+        occ3=row["OCC3"],
+        occ4=row["OCC4"],
+        occ5=row["OCC5"],
+        occ6=row["OCC6"],
+        occ7=row["OCC7"],
+        occ8=row["OCC8"],
+        occ9=row["OCC9"],
+        occ10=row["OCC10"],
+        occ11=row["OCC11"],
+        occ12=row["OCC12"],
+        occ13=row["OCC13"],
+        occ14=row["OCC14"],
+        occ15=row["OCC15"],
+        occ16=row["OCC16"],
+        occ17=row["OCC17"],
+        occ18=row["OCC18"],
+        annotations={
+            "topleft": (float(row["FACE_Y"]), float(row["FACE_X"])),
+            "bottomright": (
+                float(row["FACE_Y"]) + float(row["FACE_HEIGHT"]),
+                float(row["FACE_X"]) + float(row["FACE_WIDTH"]),
+            ),
+            "size": (float(row["FACE_HEIGHT"]), float(row["FACE_WIDTH"])),
+        },
+    )
+def _make_sample_set_from_template_group(template_group, image_directory):
+    samples = list(
+        template_group.apply(
+            _make_sample_from_template_row, axis=1, image_directory=image_directory
+        )
+    )
+    return SampleSet(
+        samples,
+        reference_id=samples[0].reference_id,
+        subject_id=samples[0].subject_id,
+        key=samples[0].reference_id,
+    )
+class IJBCDatabase(Database):
+    """
+    This package contains the access API and descriptions for the IARPA Janus Benchmark C -- IJB-C database.
+    The actual raw data can be downloaded from the original web page: http://www.nist.gov/programs-projects/face-challenges (note that not everyone might be eligible for downloading the data).
-def load_ijbc_sample(original_path, extension=[".jpg", ".png"]):
+    Included in the database, there are list files defining verification as well as closed- and open-set identification protocols.
-    for e in extension:
+    For verification, two different protocols are provided.
-        path = original_path + e
+    For the ``1:1`` protocol, gallery and probe templates are combined using several images and video frames for each subject.
-        if os.path.exists(path):
+    Compared gallery and probe templates share the same gender and skin tone -- these have been matched to make the comparisions more realistic and difficult.
-            return path
-    else:
-        return ""
+    For closed-set identification, the gallery of the ``1:1`` protocol is used, while probes stem from either only images, mixed images and video frames, or plain videos.
+    For open-set identification, the same probes are evaluated, but the gallery is split into two parts, either of which is left out to provide unknown probe templates, i.e., probe templates with no matching subject in the gallery.
+    In any case, scores are computed between all (active) gallery templates and all probes.
+    The IJB-C dataset provides additional evaluation protocols for face detection and clustering, but these are (not yet) part of this interface.
+    .. warning::
+      To use this dataset protocol, you need to have the original files of the IJBC datasets.
+      Once you have it downloaded, please run the following command to set the path for Bob
+        .. code-block:: sh
+            bob config set bob.bio.face.ijbc.directory [IJBC PATH]
+    The code below allows you to fetch the galery and probes of the "1:1" protocol.
+    .. code-block:: python
+        >>> from bob.bio.face.database import IJBCDatabase
+        >>> ijbc = IJBCDatabase()
+        >>>
+        >>> # Fetching the gallery 
+        >>> references = ijbc.references()
+        >>> # Fetching the probes 
+        >>> probes = ijbc.probes()
+    """
+    def __init__(
+        self,
+        protocol="1:1",
+        original_directory=rc.get("bob.bio.face.ijbc.directory"),
+        **kwargs,
+    ):
+        if original_directory is None or not os.path.exists(original_directory):
+            raise ValueError(
+                "Invalid or non existant `original_directory`: f{original_directory}"
+            )
+        self._check_protocol(protocol)
+        super().__init__(
+            name="ijbc",
+            protocol=protocol,
+            allow_scoring_with_all_biometric_references=False,
+            annotation_type="bounding-box",
+            fixed_positions=None,
+            memory_demanding=True,
+        )
-class IJBCDatabase:
+        self.image_directory = os.path.join(original_directory, "images")
-    def __init__(self, pkl_directory=None):
+        self.protocol_directory = os.path.join(original_directory, "protocols")
-        self.annotation_type = "bounding-box"
+        self._cached_probes = None
-        self.fixed_positions = None
+        self._cached_references = None
-        self.allow_scoring_with_all_biometric_references = False
        self.hash_fn = hash_string
-        self.memory_demanding = True
-        if pkl_directory is None:
+        self._load_metadata(protocol)
-            urls = IJBCDatabase.urls()
-            pkl_directory = get_file(
+    def _load_metadata(self, protocol):
-                "ijbc.tar.gz", urls, file_hash="4b25d7f10595eb9f97f328a2d448d957"
+        # Load CSV files
+        if protocol == "1:1":
+            self.reference_templates = pd.concat(
+                [
+                    pd.read_csv(
+                        os.path.join(self.protocol_directory, "ijbc_1N_gallery_G1.csv")
+                    ),
+                    pd.read_csv(
+                        os.path.join(self.protocol_directory, "ijbc_1N_gallery_G2.csv")
+                    ),
+                ]
            )
-        self.pkl_directory = pkl_directory
+            self.probe_templates = pd.read_csv(
+                os.path.join(self.protocol_directory, "ijbc_1N_probe_mixed.csv")
+            )
-    def _assert_group(self, group):
+            self.matches = pd.read_csv(
-        assert (
+                os.path.join(self.protocol_directory, "ijbc_11_G1_G2_matches.csv"),
-            group == "dev"
+                names=["REFERENCE_TEMPLATE_ID", "PROBE_TEMPLATE_ID"],
-        ), "The IJBC database only has a `dev` group. Received : {}".format(group)
+            ).astype("str")
-    def references(self, group="dev"):
+            self.metadata = pd.read_csv(
-        self._assert_group(group)
+                os.path.join(self.protocol_directory, "ijbc_metadata.csv"),
-        return pickle.loads(
+                usecols=[
-            find_element_in_tarball(self.pkl_directory, "db_references.pickle", True)
+                    "SUBJECT_ID",
-        )
+                    "FILENAME",
+                    "SIGHTING_ID",
+                    "FACIAL_HAIR",
+                    "AGE",
+                    "INDOOR_OUTDOOR",
+                    "SKINTONE",
+                    "GENDER",
+                    "YAW",
+                    "ROLL",
+                ]
+                + [f"OCC{i}" for i in range(1, 19)],
+            )
-    def probes(self, group="dev"):
+            # LEFT JOIN WITH METADATA
-        self._assert_group(group)
+            self.probe_templates = pd.merge(
-        return pickle.loads(
+                self.probe_templates,
-            find_element_in_tarball(self.pkl_directory, "db_probes.pickle", True)
+                self.metadata,
-        )
+                on=["SUBJECT_ID", "FILENAME", "SIGHTING_ID"],
+                how="left",
+            )
+            # LEFT JOIN WITH METADATA
+            self.reference_templates = pd.merge(
+                self.reference_templates,
+                self.metadata,
+                on=["SUBJECT_ID", "FILENAME", "SIGHTING_ID"],
+                how="left",
+            )
+        else:
+            raise ValueError(
+                f"Protocol `{protocol}` not supported. We do accept merge requests :-)"
+            )
    def background_model_samples(self):
-        import cloudpickle
+        return None
-        return cloudpickle.loads(
+    def probes(self, group="dev"):
-            find_element_in_tarball(
+        self._check_group(group)
-                self.pkl_directory, "db_background_model_samples.pickle", True
+        if self._cached_probes is None:
+            self._cached_probes = list(
+                self.probe_templates.groupby("TEMPLATE_ID").apply(
+                    _make_sample_set_from_template_group,
+                    image_directory=self.image_directory,
+                )
            )
+            # Link probes to the references they have to be compared with
+            # We might make that faster if we manage to write it as a Panda instruction
+            grouped_matches = self.matches.groupby("PROBE_TEMPLATE_ID")
+            for probe_sampleset in self._cached_probes:
+                probe_sampleset.references = list(
+                    grouped_matches.get_group(probe_sampleset.reference_id)[
+                        "REFERENCE_TEMPLATE_ID"
+                    ]
+                )
+        return self._cached_probes
+    def references(self, group="dev"):
+        self._check_group(group)
+        if self._cached_references is None:
+            self._cached_references = list(
+                self.reference_templates.groupby("TEMPLATE_ID").apply(
+                    _make_sample_set_from_template_group,
+                    image_directory=self.image_directory,
+                )
+            )
+        return self._cached_references
+    def all_samples(self, group="dev"):
+        self._check_group(group)
+        return self.references() + self.probes()
+    def groups(self):
+        return ["dev"]
+    def protocols(self):
+        return ["1:1"]
+    def _check_protocol(self, protocol):
+        assert protocol in self.protocols(), "Unvalid protocol `{}` not in {}".format(
+            protocol, self.protocols()
        )
-    @staticmethod
+    def _check_group(self, group):
-    def urls():
+        assert group in self.groups(), "Unvalid group `{}` not in {}".format(
-        return [
+            group, self.groups()
-            "https://www.idiap.ch/software/bob/databases/latest/ijbc.tar.gz",
+        )
-            "http://www.idiap.ch/software/bob/databases/latest/ijbc.tar.gz",
-        ]
--- a/bob/bio/face/database/mobio.py
+++ b/bob/bio/face/database/mobio.py
@@ -29,6 +29,16 @@ class MobioDatabase(CSVDatasetZTNorm):
    One image was extracted from each video by choosing the video frame after 10 seconds.
    The eye positions were manually labelled and distributed with the database.
+    .. warning::
+      To use this dataset protocol, you need to have the original files of the Mobio dataset.
+      Once you have it downloaded, please run the following command to set the path for Bob
+        .. code-block:: sh
+            bob config set bob.db.mobio.directory [MOBIO PATH]
    For more information check:
    .. code-block:: latex

--- a/bob/bio/face/database/multipie.py
+++ b/bob/bio/face/database/multipie.py
@@ -27,6 +27,17 @@ class MultipieDatabase(CSVDataset):
    different expressions. For each of those expressions, a complete set of 30 pictures is captured that includes
    15 different view points times 20 different illumination conditions (18 with various flashes, plus 2 pictures with no flash at all). 
+    .. warning::
+      To use this dataset protocol, you need to have the original files of the Multipie dataset.
+      Once you have it downloaded, please run the following command to set the path for Bob
+        .. code-block:: sh
+            bob config set bob.db.multipie.directory [MULTIPIE PATH]
    Available expressions:
     - Session 1 : *neutral*, *smile*

--- a/bob/bio/face/test/test_databases.py
+++ b/bob/bio/face/test/test_databases.py
@@ -27,6 +27,7 @@ from bob.bio.base.test.test_database_implementations import check_database
 import bob.core
 from bob.extension.download import get_file
 from nose.plugins.skip import SkipTest
+from bob.extension import rc
 logger = bob.core.log.setup("bob.bio.face")
@@ -304,24 +305,20 @@ def test_replaymobile():
        raise SkipTest(e)
+@pytest.mark.skipif(
+    rc.get("bob.bio.face.ijbc.directory") is None,
+    reason="IJBC original protocols not available. Please do `bob config set bob.bio.face.ijbc.directory [IJBC PATH]` to set the IJBC data path.",
+)
 def test_ijbc():
    from bob.bio.face.database import IJBCDatabase
-    # Getting the absolute path
-    urls = IJBCDatabase.urls()
-    filename = get_file("ijbc.tar.gz", urls)
-    # Removing the file before the test
-    try:
-        os.remove(filename)
-    except Exception:
-        pass
    database = IJBCDatabase()
-    assert len(database.background_model_samples()) == 140732
+    # assert len(database.background_model_samples()) == 140732
    assert len(database.references()) == 3531
    assert len(database.probes()) == 19593
+    num_comparisons = sum([len(item.references) for item in database.probes()])
+    assert num_comparisons == 19557 + 15638932 # Genuine + Impostor
 @db_available("fargo")

--- a/doc/implemented.rst
+++ b/doc/implemented.rst
@@ -14,6 +14,7 @@ Databases
   bob.bio.face.database.AtntBioDatabase
   bob.bio.face.database.CasiaAfricaDatabase
   bob.bio.face.database.MobioDatabase
+   bob.bio.face.database.IJBCDatabase
   bob.bio.face.database.ReplayBioDatabase
   bob.bio.face.database.ReplayMobileBioDatabase
   bob.bio.face.database.GBUBioDatabase