Skip to content
Snippets Groups Projects
Commit 6b38ab4c authored by Laurent COLBOIS's avatar Laurent COLBOIS
Browse files

Rework IJBC database using Pandas, to have both performance and portability

parent ffbe30af
No related branches found
No related tags found
1 merge request!124Resolve "IJBC database will fail on non-Idiap filesystems"
Pipeline #51432 failed
#!/usr/bin/env python from bob.bio.base.pipelines.vanilla_biometrics.abstract_classes import Database
# vim: set fileencoding=utf-8 : import pandas as pd
# Tiago de Freitas Pereira <tiago.pereira@idiap.ch> from bob.pipelines.sample import DelayedSample, SampleSet
# Sat 20 Aug 15:43:10 CEST 2020 from bob.extension import rc
from bob.pipelines.utils import hash_string
from bob.extension.download import get_file, find_element_in_tarball
import pickle
import os import os
import bob.io.image
from functools import partial
def load_ijbc_sample(original_path, extension=[".jpg", ".png"]): def load(path):
for e in extension: return bob.io.image.load(os.path.join(rc["bob.db.ijbc.directory"], path))
path = original_path + e
if os.path.exists(path):
return path
else:
return ""
class IJBCDatabase: def _make_sample_from_template_row(row, image_directory):
def __init__(self, pkl_directory=None): return DelayedSample(
self.annotation_type = "bounding-box" load=partial(
self.fixed_positions = None bob.io.image.load, path=os.path.join(image_directory, row["FILENAME"])
self.allow_scoring_with_all_biometric_references = False ),
self.hash_fn = hash_string template_id=str(row["TEMPLATE_ID"]),
self.memory_demanding = True subject_id=str(row["SUBJECT_ID"]),
key=os.path.splitext(row["FILENAME"])[0],
annotations={
"topleft": (float(row["FACE_Y"]), float(row["FACE_X"])),
"bottomright": (
float(row["FACE_Y"]) + float(row["FACE_HEIGHT"]),
float(row["FACE_X"]) + float(row["FACE_WIDTH"]),
),
"size": (float(row["FACE_HEIGHT"]), float(row["FACE_WIDTH"])),
},
)
if pkl_directory is None:
urls = IJBCDatabase.urls()
pkl_directory = get_file(
"ijbc.tar.gz", urls, file_hash="4b25d7f10595eb9f97f328a2d448d957"
)
self.pkl_directory = pkl_directory def _make_sample_set_from_template_group(template_group, image_directory):
samples = list(
template_group.apply(
_make_sample_from_template_row, axis=1, image_directory=image_directory
)
)
return SampleSet(
samples, template_id=samples[0].template_id, subject_id=samples[0].subject_id
)
def _assert_group(self, group): class IJBCDatabase(Database):
assert ( def __init__(
group == "dev" self,
), "The IJBC database only has a `dev` group. Received : {}".format(group) protocol="1:1",
original_directory=rc["bob.bio.face.ijbc.directory"],
**kwargs
):
self._check_protocol(protocol)
def references(self, group="dev"): super().__init__(
self._assert_group(group) name="ijbc",
return pickle.loads( protocol=protocol,
find_element_in_tarball(self.pkl_directory, "db_references.pickle", True) allow_scoring_with_all_biometric_references=False,
annotation_type="eyes-center",
fixed_positions=None,
memory_demanding=True,
) )
def probes(self, group="dev"): self.image_directory = os.path.join(original_directory, "images")
self._assert_group(group) self.protocol_directory = os.path.join(original_directory, "protocols")
return pickle.loads( self._cached_probes = None
find_element_in_tarball(self.pkl_directory, "db_probes.pickle", True) self._cached_references = None
self._load_metadata()
def _load_metadata(self):
# Load CSV files
self.reference_templates = pd.concat(
[
pd.read_csv(
os.path.join(self.protocol_directory, "ijbc_1N_gallery_G1.csv")
),
pd.read_csv(
os.path.join(self.protocol_directory, "ijbc_1N_gallery_G2.csv")
),
]
)
self.probe_templates = pd.read_csv(
os.path.join(self.protocol_directory, "ijbc_1N_probe_mixed.csv")
)
self.matches = pd.read_csv(
os.path.join(self.protocol_directory, "ijbc_11_G1_G2_matches.csv"),
names=["REFERENCE_TEMPLATE_ID", "PROBE_TEMPLATE_ID"],
) )
def background_model_samples(self): def background_model_samples(self):
import cloudpickle return None
return cloudpickle.loads( def probes(self, group="dev"):
find_element_in_tarball( self._check_group(group)
self.pkl_directory, "db_background_model_samples.pickle", True if self._cached_probes is None:
self._cached_probes = list(
self.probe_templates.groupby("TEMPLATE_ID").apply(
_make_sample_set_from_template_group,
image_directory=self.image_directory,
)
) )
# Link probes to the references they have to be compared with
# We might make that faster if we manage to write it as a Panda instruction
grouped_matches = self.matches.groupby("PROBE_TEMPLATE_ID")
for probe_sampleset in self._cached_probes:
probe_sampleset.references = list(
grouped_matches.get_group(int(probe_sampleset.template_id))[
"REFERENCE_TEMPLATE_ID"
]
)
return self._cached_probes
def references(self, group="dev"):
self._check_group(group)
if self._cached_references is None:
self._cached_references = list(
self.reference_templates.groupby("TEMPLATE_ID").apply(
_make_sample_set_from_template_group,
image_directory=self.image_directory,
)
)
return self._cached_references
def all_samples(self, group="dev"):
self._check_group(group)
return self.references() + self.probes()
def groups(self):
return ["dev"]
def protocols(self):
return ["1:1"]
def _check_protocol(self, protocol):
assert protocol in self.protocols(), "Unvalid protocol `{}` not in {}".format(
protocol, self.protocols()
) )
@staticmethod def _check_group(self, group):
def urls(): assert group in self.groups(), "Unvalid group `{}` not in {}".format(
return [ group, self.groups()
"https://www.idiap.ch/software/bob/databases/latest/ijbc.tar.gz", )
"http://www.idiap.ch/software/bob/databases/latest/ijbc.tar.gz",
]
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment