Commit 5c081127 authored by Tiago de Freitas Pereira's avatar Tiago de Freitas Pereira
Browse files

Merge branch 'fix-ijbc' into 'master'

Resolve "IJBC database will fail on non-Idiap filesystems"

Closes #49 and #53

See merge request !124
parents e8e95bf0 4892e072
Pipeline #51996 passed with stages
in 173 minutes and 40 seconds
#!/usr/bin/env python
from bob.bio.face.database import IJBCDatabase
from bob.extension import rc
database = IJBCDatabase()
#!/usr/bin/env python
# vim: set fileencoding=utf-8 :
# Tiago de Freitas Pereira <tiago.pereira@idiap.ch>
# Sat 20 Aug 15:43:10 CEST 2020
from bob.pipelines.utils import hash_string
from bob.extension.download import get_file, find_element_in_tarball
import pickle
from bob.bio.base.pipelines.vanilla_biometrics.abstract_classes import Database
import pandas as pd
from bob.pipelines.sample import DelayedSample, SampleSet
from bob.extension import rc
import os
import bob.io.image
from functools import partial
from bob.pipelines.utils import hash_string
def _make_sample_from_template_row(row, image_directory):
# Appending this key, so we can handle parallel writting done correctly
# paying the penalty of having duplicate files
key = os.path.splitext(row["FILENAME"])[0] + "-" + str(row["TEMPLATE_ID"])
return DelayedSample(
load=partial(bob.io.image.load, os.path.join(image_directory, row["FILENAME"])),
reference_id=str(row["TEMPLATE_ID"]),
subject_id=str(row["SUBJECT_ID"]),
key=key,
gender=row["GENDER"],
indoor_outdoor=row["INDOOR_OUTDOOR"],
skintone=row["SKINTONE"],
yaw=row["YAW"],
rool=row["ROLL"],
occ1=row["OCC1"],
occ2=row["OCC2"],
occ3=row["OCC3"],
occ4=row["OCC4"],
occ5=row["OCC5"],
occ6=row["OCC6"],
occ7=row["OCC7"],
occ8=row["OCC8"],
occ9=row["OCC9"],
occ10=row["OCC10"],
occ11=row["OCC11"],
occ12=row["OCC12"],
occ13=row["OCC13"],
occ14=row["OCC14"],
occ15=row["OCC15"],
occ16=row["OCC16"],
occ17=row["OCC17"],
occ18=row["OCC18"],
annotations={
"topleft": (float(row["FACE_Y"]), float(row["FACE_X"])),
"bottomright": (
float(row["FACE_Y"]) + float(row["FACE_HEIGHT"]),
float(row["FACE_X"]) + float(row["FACE_WIDTH"]),
),
"size": (float(row["FACE_HEIGHT"]), float(row["FACE_WIDTH"])),
},
)
def _make_sample_set_from_template_group(template_group, image_directory):
samples = list(
template_group.apply(
_make_sample_from_template_row, axis=1, image_directory=image_directory
)
)
return SampleSet(
samples,
reference_id=samples[0].reference_id,
subject_id=samples[0].subject_id,
key=samples[0].reference_id,
)
class IJBCDatabase(Database):
"""
This package contains the access API and descriptions for the IARPA Janus Benchmark C -- IJB-C database.
The actual raw data can be downloaded from the original web page: http://www.nist.gov/programs-projects/face-challenges (note that not everyone might be eligible for downloading the data).
def load_ijbc_sample(original_path, extension=[".jpg", ".png"]):
for e in extension:
path = original_path + e
if os.path.exists(path):
return path
else:
return ""
Included in the database, there are list files defining verification as well as closed- and open-set identification protocols.
For verification, two different protocols are provided.
For the ``1:1`` protocol, gallery and probe templates are combined using several images and video frames for each subject.
Compared gallery and probe templates share the same gender and skin tone -- these have been matched to make the comparisions more realistic and difficult.
For closed-set identification, the gallery of the ``1:1`` protocol is used, while probes stem from either only images, mixed images and video frames, or plain videos.
For open-set identification, the same probes are evaluated, but the gallery is split into two parts, either of which is left out to provide unknown probe templates, i.e., probe templates with no matching subject in the gallery.
In any case, scores are computed between all (active) gallery templates and all probes.
The IJB-C dataset provides additional evaluation protocols for face detection and clustering, but these are (not yet) part of this interface.
.. warning::
To use this dataset protocol, you need to have the original files of the IJBC datasets.
Once you have it downloaded, please run the following command to set the path for Bob
.. code-block:: sh
bob config set bob.bio.face.ijbc.directory [IJBC PATH]
The code below allows you to fetch the galery and probes of the "1:1" protocol.
.. code-block:: python
>>> from bob.bio.face.database import IJBCDatabase
>>> ijbc = IJBCDatabase()
>>>
>>> # Fetching the gallery
>>> references = ijbc.references()
>>> # Fetching the probes
>>> probes = ijbc.probes()
"""
def __init__(
self,
protocol="1:1",
original_directory=rc.get("bob.bio.face.ijbc.directory"),
**kwargs,
):
if original_directory is None or not os.path.exists(original_directory):
raise ValueError(
"Invalid or non existant `original_directory`: f{original_directory}"
)
self._check_protocol(protocol)
super().__init__(
name="ijbc",
protocol=protocol,
allow_scoring_with_all_biometric_references=False,
annotation_type="bounding-box",
fixed_positions=None,
memory_demanding=True,
)
class IJBCDatabase:
def __init__(self, pkl_directory=None):
self.annotation_type = "bounding-box"
self.fixed_positions = None
self.allow_scoring_with_all_biometric_references = False
self.image_directory = os.path.join(original_directory, "images")
self.protocol_directory = os.path.join(original_directory, "protocols")
self._cached_probes = None
self._cached_references = None
self.hash_fn = hash_string
self.memory_demanding = True
if pkl_directory is None:
urls = IJBCDatabase.urls()
pkl_directory = get_file(
"ijbc.tar.gz", urls, file_hash="4b25d7f10595eb9f97f328a2d448d957"
self._load_metadata(protocol)
def _load_metadata(self, protocol):
# Load CSV files
if protocol == "1:1":
self.reference_templates = pd.concat(
[
pd.read_csv(
os.path.join(self.protocol_directory, "ijbc_1N_gallery_G1.csv")
),
pd.read_csv(
os.path.join(self.protocol_directory, "ijbc_1N_gallery_G2.csv")
),
]
)
self.pkl_directory = pkl_directory
self.probe_templates = pd.read_csv(
os.path.join(self.protocol_directory, "ijbc_1N_probe_mixed.csv")
)
def _assert_group(self, group):
assert (
group == "dev"
), "The IJBC database only has a `dev` group. Received : {}".format(group)
self.matches = pd.read_csv(
os.path.join(self.protocol_directory, "ijbc_11_G1_G2_matches.csv"),
names=["REFERENCE_TEMPLATE_ID", "PROBE_TEMPLATE_ID"],
).astype("str")
def references(self, group="dev"):
self._assert_group(group)
return pickle.loads(
find_element_in_tarball(self.pkl_directory, "db_references.pickle", True)
)
self.metadata = pd.read_csv(
os.path.join(self.protocol_directory, "ijbc_metadata.csv"),
usecols=[
"SUBJECT_ID",
"FILENAME",
"SIGHTING_ID",
"FACIAL_HAIR",
"AGE",
"INDOOR_OUTDOOR",
"SKINTONE",
"GENDER",
"YAW",
"ROLL",
]
+ [f"OCC{i}" for i in range(1, 19)],
)
def probes(self, group="dev"):
self._assert_group(group)
return pickle.loads(
find_element_in_tarball(self.pkl_directory, "db_probes.pickle", True)
)
# LEFT JOIN WITH METADATA
self.probe_templates = pd.merge(
self.probe_templates,
self.metadata,
on=["SUBJECT_ID", "FILENAME", "SIGHTING_ID"],
how="left",
)
# LEFT JOIN WITH METADATA
self.reference_templates = pd.merge(
self.reference_templates,
self.metadata,
on=["SUBJECT_ID", "FILENAME", "SIGHTING_ID"],
how="left",
)
else:
raise ValueError(
f"Protocol `{protocol}` not supported. We do accept merge requests :-)"
)
def background_model_samples(self):
import cloudpickle
return None
return cloudpickle.loads(
find_element_in_tarball(
self.pkl_directory, "db_background_model_samples.pickle", True
def probes(self, group="dev"):
self._check_group(group)
if self._cached_probes is None:
self._cached_probes = list(
self.probe_templates.groupby("TEMPLATE_ID").apply(
_make_sample_set_from_template_group,
image_directory=self.image_directory,
)
)
# Link probes to the references they have to be compared with
# We might make that faster if we manage to write it as a Panda instruction
grouped_matches = self.matches.groupby("PROBE_TEMPLATE_ID")
for probe_sampleset in self._cached_probes:
probe_sampleset.references = list(
grouped_matches.get_group(probe_sampleset.reference_id)[
"REFERENCE_TEMPLATE_ID"
]
)
return self._cached_probes
def references(self, group="dev"):
self._check_group(group)
if self._cached_references is None:
self._cached_references = list(
self.reference_templates.groupby("TEMPLATE_ID").apply(
_make_sample_set_from_template_group,
image_directory=self.image_directory,
)
)
return self._cached_references
def all_samples(self, group="dev"):
self._check_group(group)
return self.references() + self.probes()
def groups(self):
return ["dev"]
def protocols(self):
return ["1:1"]
def _check_protocol(self, protocol):
assert protocol in self.protocols(), "Unvalid protocol `{}` not in {}".format(
protocol, self.protocols()
)
@staticmethod
def urls():
return [
"https://www.idiap.ch/software/bob/databases/latest/ijbc.tar.gz",
"http://www.idiap.ch/software/bob/databases/latest/ijbc.tar.gz",
]
def _check_group(self, group):
assert group in self.groups(), "Unvalid group `{}` not in {}".format(
group, self.groups()
)
......@@ -29,6 +29,16 @@ class MobioDatabase(CSVDatasetZTNorm):
One image was extracted from each video by choosing the video frame after 10 seconds.
The eye positions were manually labelled and distributed with the database.
.. warning::
To use this dataset protocol, you need to have the original files of the Mobio dataset.
Once you have it downloaded, please run the following command to set the path for Bob
.. code-block:: sh
bob config set bob.db.mobio.directory [MOBIO PATH]
For more information check:
.. code-block:: latex
......
......@@ -27,6 +27,17 @@ class MultipieDatabase(CSVDataset):
different expressions. For each of those expressions, a complete set of 30 pictures is captured that includes
15 different view points times 20 different illumination conditions (18 with various flashes, plus 2 pictures with no flash at all).
.. warning::
To use this dataset protocol, you need to have the original files of the Multipie dataset.
Once you have it downloaded, please run the following command to set the path for Bob
.. code-block:: sh
bob config set bob.db.multipie.directory [MULTIPIE PATH]
Available expressions:
- Session 1 : *neutral*, *smile*
......
......@@ -27,6 +27,7 @@ from bob.bio.base.test.test_database_implementations import check_database
import bob.core
from bob.extension.download import get_file
from nose.plugins.skip import SkipTest
from bob.extension import rc
logger = bob.core.log.setup("bob.bio.face")
......@@ -304,24 +305,20 @@ def test_replaymobile():
raise SkipTest(e)
@pytest.mark.skipif(
rc.get("bob.bio.face.ijbc.directory") is None,
reason="IJBC original protocols not available. Please do `bob config set bob.bio.face.ijbc.directory [IJBC PATH]` to set the IJBC data path.",
)
def test_ijbc():
from bob.bio.face.database import IJBCDatabase
# Getting the absolute path
urls = IJBCDatabase.urls()
filename = get_file("ijbc.tar.gz", urls)
# Removing the file before the test
try:
os.remove(filename)
except Exception:
pass
database = IJBCDatabase()
assert len(database.background_model_samples()) == 140732
# assert len(database.background_model_samples()) == 140732
assert len(database.references()) == 3531
assert len(database.probes()) == 19593
num_comparisons = sum([len(item.references) for item in database.probes()])
assert num_comparisons == 19557 + 15638932 # Genuine + Impostor
@db_available("fargo")
......
......@@ -14,6 +14,7 @@ Databases
bob.bio.face.database.AtntBioDatabase
bob.bio.face.database.CasiaAfricaDatabase
bob.bio.face.database.MobioDatabase
bob.bio.face.database.IJBCDatabase
bob.bio.face.database.ReplayBioDatabase
bob.bio.face.database.ReplayMobileBioDatabase
bob.bio.face.database.GBUBioDatabase
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment