From 24356f0784e11a4e34fec9547d0e691799ca13c7 Mon Sep 17 00:00:00 2001 From: Tiago Freitas Pereira <tiagofrepereira@gmail.com> Date: Thu, 17 Jun 2021 11:26:06 +0200 Subject: [PATCH] Small patches on IJBC --- bob/bio/face/config/database/ijbc.py | 1 - bob/bio/face/database/ijbc.py | 116 ++++++++++++++++++++------- bob/bio/face/test/test_databases.py | 17 ++-- doc/implemented.rst | 1 + 4 files changed, 94 insertions(+), 41 deletions(-) diff --git a/bob/bio/face/config/database/ijbc.py b/bob/bio/face/config/database/ijbc.py index ea25374d..175ec852 100644 --- a/bob/bio/face/config/database/ijbc.py +++ b/bob/bio/face/config/database/ijbc.py @@ -1,6 +1,5 @@ #!/usr/bin/env python from bob.bio.face.database import IJBCDatabase -from bob.extension import rc database = IJBCDatabase() diff --git a/bob/bio/face/database/ijbc.py b/bob/bio/face/database/ijbc.py index e979960d..30dc6d15 100644 --- a/bob/bio/face/database/ijbc.py +++ b/bob/bio/face/database/ijbc.py @@ -5,16 +5,21 @@ from bob.extension import rc import os import bob.io.image from functools import partial +import uuid +from bob.pipelines.utils import hash_string def _make_sample_from_template_row(row, image_directory): + + # Appending this hash, so we can handle parallel writting done correctly + # paying the penalty of having duplicate files + hashstr = str(uuid.uuid4()) + return DelayedSample( - load=partial( - bob.io.image.load, path=os.path.join(image_directory, row["FILENAME"]) - ), + load=partial(bob.io.image.load, os.path.join(image_directory, row["FILENAME"])), reference_id=str(row["TEMPLATE_ID"]), subject_id=str(row["SUBJECT_ID"]), - key=os.path.splitext(row["FILENAME"])[0], + key=os.path.splitext(row["FILENAME"])[0] + "-" + hashstr, annotations={ "topleft": (float(row["FACE_Y"]), float(row["FACE_X"])), "bottomright": ( @@ -27,25 +32,72 @@ def _make_sample_from_template_row(row, image_directory): def _make_sample_set_from_template_group(template_group, image_directory): + samples = list( template_group.apply( _make_sample_from_template_row, axis=1, image_directory=image_directory ) ) return SampleSet( - samples, reference_id=samples[0].template_id, subject_id=samples[0].subject_id + samples, reference_id=samples[0].reference_id, subject_id=samples[0].subject_id ) class IJBCDatabase(Database): + """ + + This package contains the access API and descriptions for the IARPA Janus Benchmark C -- IJB-C database. + The actual raw data can be downloaded from the original web page: http://www.nist.gov/programs-projects/face-challenges (note that not everyone might be eligible for downloading the data). + + Included in the database, there are list files defining verification as well as closed- and open-set identification protocols. + For verification, two different protocols are provided. + For the ``1:1`` protocol, gallery and probe templates are combined using several images and video frames for each subject. + Compared gallery and probe templates share the same gender and skin tone -- these have been matched to make the comparisions more realistic and difficult. + + For closed-set identification, the gallery of the ``1:1`` protocol is used, while probes stem from either only images, mixed images and video frames, or plain videos. + For open-set identification, the same probes are evaluated, but the gallery is split into two parts, either of which is left out to provide unknown probe templates, i.e., probe templates with no matching subject in the gallery. + In any case, scores are computed between all (active) gallery templates and all probes. + + The IJB-C dataset provides additional evaluation protocols for face detection and clustering, but these are (not yet) part of this interface. + + + .. warning:: + + To use this dataset protocol, you need to have the original files of the IJBC datasets. + Once you have it downloaded, please run the following command to set the path for Bob + + .. code-block:: sh + + bob config set bob.bio.face.ijbc.directory [IJBC PATH] + + + The code below allows you to fetch the galery and probes of the "1:1" protocol. + + .. code-block:: python + + >>> from bob.bio.face.database import IJBCDatabase + >>> ijbc = IJBCDatabase() + >>> + >>> # Fetching the gallery + >>> references = ijbc.references() + >>> # Fetching the probes + >>> probes = ijbc.probes() + + """ + def __init__( self, protocol="1:1", - original_directory=rc["bob.bio.face.ijbc.directory"], - **kwargs + original_directory=rc.get("bob.bio.face.ijbc.directory"), + **kwargs, ): - self._check_protocol(protocol) + if original_directory is None or not os.path.exists(original_directory): + raise ValueError( + "Invalid or non existant `original_directory`: f{original_directory}" + ) + + self._check_protocol(protocol) super().__init__( name="ijbc", protocol=protocol, @@ -59,30 +111,36 @@ class IJBCDatabase(Database): self.protocol_directory = os.path.join(original_directory, "protocols") self._cached_probes = None self._cached_references = None + self.hash_fn = hash_string - self._load_metadata() + self._load_metadata(protocol) - def _load_metadata(self): + def _load_metadata(self, protocol): # Load CSV files - self.reference_templates = pd.concat( - [ - pd.read_csv( - os.path.join(self.protocol_directory, "ijbc_1N_gallery_G1.csv") - ), - pd.read_csv( - os.path.join(self.protocol_directory, "ijbc_1N_gallery_G2.csv") - ), - ] - ) + if protocol == "1:1": + self.reference_templates = pd.concat( + [ + pd.read_csv( + os.path.join(self.protocol_directory, "ijbc_1N_gallery_G1.csv") + ), + pd.read_csv( + os.path.join(self.protocol_directory, "ijbc_1N_gallery_G2.csv") + ), + ] + ) - self.probe_templates = pd.read_csv( - os.path.join(self.protocol_directory, "ijbc_1N_probe_mixed.csv") - ) + self.probe_templates = pd.read_csv( + os.path.join(self.protocol_directory, "ijbc_1N_probe_mixed.csv") + ) - self.matches = pd.read_csv( - os.path.join(self.protocol_directory, "ijbc_11_G1_G2_matches.csv"), - names=["REFERENCE_TEMPLATE_ID", "PROBE_TEMPLATE_ID"], - ) + self.matches = pd.read_csv( + os.path.join(self.protocol_directory, "ijbc_11_G1_G2_matches.csv"), + names=["REFERENCE_TEMPLATE_ID", "PROBE_TEMPLATE_ID"], + ) + else: + raise ValueError( + f"Protocol `{protocol}` not supported. We do accept merge requests :-)" + ) def background_model_samples(self): return None @@ -102,7 +160,7 @@ class IJBCDatabase(Database): grouped_matches = self.matches.groupby("PROBE_TEMPLATE_ID") for probe_sampleset in self._cached_probes: probe_sampleset.references = list( - grouped_matches.get_group(int(probe_sampleset.template_id))[ + grouped_matches.get_group(int(probe_sampleset.reference_id))[ "REFERENCE_TEMPLATE_ID" ] ) @@ -140,4 +198,4 @@ class IJBCDatabase(Database): def _check_group(self, group): assert group in self.groups(), "Unvalid group `{}` not in {}".format( group, self.groups() - ) \ No newline at end of file + ) diff --git a/bob/bio/face/test/test_databases.py b/bob/bio/face/test/test_databases.py index ff6213f4..269d5964 100644 --- a/bob/bio/face/test/test_databases.py +++ b/bob/bio/face/test/test_databases.py @@ -27,6 +27,7 @@ from bob.bio.base.test.test_database_implementations import check_database import bob.core from bob.extension.download import get_file from nose.plugins.skip import SkipTest +from bob.extension import rc logger = bob.core.log.setup("bob.bio.face") @@ -304,22 +305,16 @@ def test_replaymobile(): raise SkipTest(e) +@pytest.mark.skipif( + rc.get("bob.bio.face.ijbc.directory") is None, + reason="IJBC original protocols not available. Please do `bob config set bob.bio.face.ijbc.directory [IJBC PATH]` to set the IJBC data path.", +) def test_ijbc(): from bob.bio.face.database import IJBCDatabase - # Getting the absolute path - urls = IJBCDatabase.urls() - filename = get_file("ijbc.tar.gz", urls) - - # Removing the file before the test - try: - os.remove(filename) - except Exception: - pass - database = IJBCDatabase() - assert len(database.background_model_samples()) == 140732 + # assert len(database.background_model_samples()) == 140732 assert len(database.references()) == 3531 assert len(database.probes()) == 19593 diff --git a/doc/implemented.rst b/doc/implemented.rst index 4e3d967d..f0695bc3 100644 --- a/doc/implemented.rst +++ b/doc/implemented.rst @@ -14,6 +14,7 @@ Databases bob.bio.face.database.AtntBioDatabase bob.bio.face.database.CasiaAfricaDatabase bob.bio.face.database.MobioDatabase + bob.bio.face.database.IJBCDatabase bob.bio.face.database.ReplayBioDatabase bob.bio.face.database.ReplayMobileBioDatabase bob.bio.face.database.GBUBioDatabase -- GitLab