diff --git a/bob/bio/face/config/database/gbu_bad.py b/bob/bio/face/config/database/gbu_bad.py index 06354486d2e4336c7cdba99b257d0b02e514db1d..0b4454ef44fca458abcffb8b2adb8637f3845135 100644 --- a/bob/bio/face/config/database/gbu_bad.py +++ b/bob/bio/face/config/database/gbu_bad.py @@ -1,11 +1,7 @@ #!/usr/bin/env python -from bob.extension import rc from bob.bio.face.database import GBUDatabase - -mbgc_v1_directory = rc["bob.bio.face.gbu.directory"] - database = GBUDatabase(protocol="Bad") diff --git a/bob/bio/face/config/database/gbu_good.py b/bob/bio/face/config/database/gbu_good.py index 521bb9e9c453238fca3223cbe34cd63210b6a746..8d6c5d97a9ed739a2e81ec70844e52677a32079d 100644 --- a/bob/bio/face/config/database/gbu_good.py +++ b/bob/bio/face/config/database/gbu_good.py @@ -1,11 +1,7 @@ #!/usr/bin/env python -from bob.extension import rc from bob.bio.face.database import GBUDatabase - -mbgc_v1_directory = rc["bob.bio.face.gbu.directory"] - database = GBUDatabase(protocol="Good") diff --git a/bob/bio/face/config/database/gbu_ugly.py b/bob/bio/face/config/database/gbu_ugly.py index 8c271788e53b8098a1065d2019a42dc95ea084c3..5da93feb6a87ea5b5f788779a6e320c407854a99 100644 --- a/bob/bio/face/config/database/gbu_ugly.py +++ b/bob/bio/face/config/database/gbu_ugly.py @@ -1,11 +1,7 @@ #!/usr/bin/env python -from bob.extension import rc from bob.bio.face.database import GBUDatabase - -mbgc_v1_directory = rc["bob.bio.face.gbu.directory"] - database = GBUDatabase(protocol="Ugly") diff --git a/bob/bio/face/config/database/lfw_restricted.py b/bob/bio/face/config/database/lfw_restricted.py deleted file mode 100644 index 0684999626b9c46327fcd9edbc8b445a56623621..0000000000000000000000000000000000000000 --- a/bob/bio/face/config/database/lfw_restricted.py +++ /dev/null @@ -1,29 +0,0 @@ -#!/usr/bin/env python - -from bob.bio.face.database import LFWBioDatabase -from bob.bio.base.pipelines.vanilla_biometrics import DatabaseConnector -from bob.extension import rc - - -lfw_directory = rc["bob.db.lfw.directory"] - -database = DatabaseConnector( - LFWBioDatabase( - original_directory=lfw_directory, - annotation_type="funneled", - protocol="view1", - training_depends_on_protocol=True, - models_depend_on_protocol=True, - all_files_options={"world_type": "restricted"}, - extractor_training_options={ - "world_type": "restricted" - }, # 'subworld' : 'twofolds' - projector_training_options={ - "world_type": "restricted" - }, # 'subworld' : 'twofolds' - enroller_training_options={ - "world_type": "restricted" - }, # 'subworld' : 'twofolds' - ), - allow_scoring_with_all_biometric_references=False, -) diff --git a/bob/bio/face/config/database/lfw_unrestricted.py b/bob/bio/face/config/database/lfw_unrestricted.py deleted file mode 100644 index 39ebafb4ee14cbfb47f226ba4f0e07a9c95fc09f..0000000000000000000000000000000000000000 --- a/bob/bio/face/config/database/lfw_unrestricted.py +++ /dev/null @@ -1,29 +0,0 @@ -#!/usr/bin/env python - -from bob.bio.face.database import LFWBioDatabase -from bob.bio.base.pipelines.vanilla_biometrics import DatabaseConnector -from bob.extension import rc - - -lfw_directory = rc["bob.db.lfw.directory"] - -database = DatabaseConnector( - LFWBioDatabase( - original_directory=lfw_directory, - annotation_type="funneled", - protocol="view1", - training_depends_on_protocol=True, - models_depend_on_protocol=True, - all_files_options={"world_type": "unrestricted"}, - extractor_training_options={ - "world_type": "unrestricted" - }, # 'subworld' : 'twofolds' - projector_training_options={ - "world_type": "unrestricted" - }, # 'subworld' : 'twofolds' - enroller_training_options={ - "world_type": "unrestricted" - }, # 'subworld' : 'twofolds' - ), - allow_scoring_with_all_biometric_references=False, -) diff --git a/bob/bio/face/config/database/lfw_view2.py b/bob/bio/face/config/database/lfw_view2.py new file mode 100644 index 0000000000000000000000000000000000000000..719d763e1f6538250b711e22714472c481ee3aa0 --- /dev/null +++ b/bob/bio/face/config/database/lfw_view2.py @@ -0,0 +1,3 @@ +from bob.bio.face.database import LFWDatabase + +database = LFWDatabase(protocol="view2") diff --git a/bob/bio/face/database/__init__.py b/bob/bio/face/database/__init__.py index 84568b99302305b2ac8a76413a5709e641220220..90c77f948269ca42352489ebba0d5e94eafdfc7d 100644 --- a/bob/bio/face/database/__init__.py +++ b/bob/bio/face/database/__init__.py @@ -6,7 +6,8 @@ from .mobio import MobioDatabase from .replay import ReplayBioDatabase from .atnt import AtntBioDatabase from .gbu import GBUDatabase -from .lfw import LFWBioDatabase +from .arface import ARFaceDatabase +from .lfw import LFWDatabase from .multipie import MultipieDatabase from .ijbc import IJBCDatabase from .replaymobile import ReplayMobileBioDatabase @@ -46,7 +47,8 @@ __appropriate__( ReplayBioDatabase, AtntBioDatabase, GBUDatabase, - LFWBioDatabase, + ARFaceDatabase, + LFWDatabase, MultipieDatabase, ReplayMobileBioDatabase, FargoBioDatabase, diff --git a/bob/bio/face/database/lfw.py b/bob/bio/face/database/lfw.py index 1b8927307fe814dc4c743845da953e7496df7847..37985b03abc4f19c7781c4c33a51e2df38ac5fd4 100644 --- a/bob/bio/face/database/lfw.py +++ b/bob/bio/face/database/lfw.py @@ -3,76 +3,359 @@ # Tiago de Freitas Pereira <tiago.pereira@idiap.ch> # Sat 20 Aug 15:43:10 CEST 2016 -""" - LFW database implementation of bob.bio.base.database.Database interface. - It is an extension of an SQL-based database interface, which directly talks to LFW database, for - verification experiments (good to use in bob.bio.base framework). -""" +from bob.bio.base.pipelines.vanilla_biometrics.abstract_classes import Database +from bob.pipelines import DelayedSample, SampleSet +import os +from functools import partial +from bob.extension import rc +import bob.io.image +import copy +import logging +import numpy as np -from .database import FaceBioFile -from bob.bio.base.database import BioDatabase +logger = logging.getLogger(__name__) -class LFWBioFile(FaceBioFile): +class LFWDatabase(Database): + """ + This package contains the access API and descriptions for the `Labeled Faced in the Wild <http://vis-www.cs.umass.edu/lfw>`_ (LFW) database. + It only contains the Bob_ accessor methods to use the DB directly from python, with our certified protocols. + The actual raw data for the database should be downloaded from the original URL (though we were not able to contact the corresponding Professor). - def __init__(self, f): - super(LFWBioFile, self).__init__(client_id=f.client_id, path=f.path, file_id=f.id) - self._f = f + The LFW database provides two different sets (called "views"). + The first one, called ``view1`` is used for optimizing meta-parameters of your algorithm. + The second one, called ``view2`` is used for benchmarking. + This interface supports only the ``view2`` protocol. + Please note that in ``view2`` there is only a ``'dev'`` group, but no ``'eval'``. + + + .. warning:: + + To use this dataset protocol, you need to have the original files of the LFW datasets. + Once you have it downloaded, please run the following command to set the path for Bob + + .. code-block:: sh + + bob config set bob.bio.face.lfw.directory [LFW PATH] + bob config set bob.bio.face.lfw.annotation_directory [LFW ANNOTATION_PATH] # for the annotations + + + + .. code-block:: python + + >>> from bob.bio.face.database import LFWDatabase + >>> lfw = LFWDatabase(protocol="view2") + >>> + >>> # Fetching the gallery + >>> references = lfw.references() + >>> # Fetching the probes + >>> probes = lfw.probes() + + + + Parameters + ---------- + + protocol: str + One of the database protocols. Options are `view2` + + annotation_type: str + Type of the annotations used for face crop. Default to `eyes-center` + + image_relative_path: str + LFW provides several types image crops. Some with the full image, some with with specific + face crop. Use this variable to set which image crop you want. Default to `all_images`, which means + no crop. + + annotation_directory: str + LFW annotations path. Default to what is set in the variable `bob.bio.face.lfw.directory` + + original_directory: str + LFW phisical path. Default to what is set in the variable `bob.bio.face.lfw.directory` + + annotation_issuer: str + Type of the annotations. Default to `funneled`. Possible types `funneled` or `idiap` -class LFWBioDatabase(BioDatabase): - """ - LFW database implementation of bob.bio.base.database.Database interface. - It is an extension of an SQL-based database interface, which directly talks to LFW database, for - verification experiments (good to use in bob.bio.base framework). """ def __init__( - self, - original_directory=None, - original_extension='.jpg', - annotation_type=None, - **kwargs + self, + protocol, + annotation_type="eyes-center", + image_relative_path="all_images", + fixed_positions=None, + original_directory=rc.get("bob.bio.face.lfw.directory"), + extension=".jpg", + annotation_directory=rc.get("bob.bio.face.lfw.annotation_directory"), + annotation_issuer="funneled", ): - from bob.db.lfw.query import Database as LowLevelDatabase - self._db = LowLevelDatabase(original_directory, original_extension, annotation_type) - - # call base class constructors to open a session to the database - super(LFWBioDatabase, self).__init__( - name='lfw', - original_directory=original_directory, - original_extension=original_extension, + + if original_directory is None or not os.path.exists(original_directory): + raise ValueError( + "Invalid or non existant `original_directory`: f{original_directory}." + "Please, do `bob config set bob.bio.face.lfw.directory PATH` to set the LFW data directory." + ) + + if annotation_directory is None or not os.path.exists(annotation_directory): + logger.warning( + "Invalid or non existant `annotation_directory`: f{annotation_directory}." + "As a result, `SampleSet` will not contain annotations" + "Please, do `bob config set bob.bio.face.lfw.annotation_directory PATH` to set the LFW annotation directory." + ) + + if annotation_issuer != "funneled" and annotation_issuer != "idiap": + raise ValueError( + f"Invalid annotation issuer: {annotation_issuer}. Possible values are `idiap` or `funneled`" + ) + self.annotation_issuer = annotation_issuer + # Hard-coding the extension of the annotations + # I don't think we need this exposed + # Please, open an issue if otherwise + self.annotation_extension = ( + ".pos" if annotation_issuer == "idiap" else ".jpg.pts" + ) + + self._check_protocol(protocol) + + self.references_dict = {} + self.probes_dict = {} + self.pairs = {} + self.probe_reference_keys = {} # Inverted pairs + + self.annotations = None + self.original_directory = original_directory + self.annotation_directory = annotation_directory + self.extension = extension + self.image_relative_path = image_relative_path + + # Some path manipulation lambdas + self.subject_id_from_filename = lambda x: "_".join(x.split("_")[0:-1]) + + self.make_path_from_filename = lambda x: os.path.join( + self.subject_id_from_filename(x), x + ) + + self.load_pairs() + + super().__init__( + name="lfw", + protocol=protocol, + allow_scoring_with_all_biometric_references=False, annotation_type=annotation_type, - **kwargs) - - def groups(self): - return self._db.groups() + fixed_positions=None, + memory_demanding=False, + ) + + def _extract_funneled(self, annotation_path): + """Interprets the annotation string as if it came from the funneled images. + Inspired by: https://gitlab.idiap.ch/bob/bob.db.lfw/-/blob/5ac22c5b77aae971de6b73cbe23f26d6a5632072/bob/db/lfw/models.py#L69 + """ + with open(annotation_path) as f: + splits = np.array(f.readlines()[0].split(" "), "float") + + assert len(splits) == 18 + locations = [ + "reyeo", + "reyei", + "leyei", + "leyeo", + "noser", + "noset", + "nosel", + "mouthr", + "mouthl", + ] + annotations = dict( + [ + (locations[i], (float(splits[2 * i + 1]), float(splits[2 * i]))) + for i in range(9) + ] + ) + # add eye center annotations as the center between the eye corners + annotations["leye"] = ( + (annotations["leyei"][0] + annotations["leyeo"][0]) / 2.0, + (annotations["leyei"][1] + annotations["leyeo"][1]) / 2.0, + ) + annotations["reye"] = ( + (annotations["reyei"][0] + annotations["reyeo"][0]) / 2.0, + (annotations["reyei"][1] + annotations["reyeo"][1]) / 2.0, + ) - @property - def original_directory(self): - return self._db.original_directory + return annotations - @original_directory.setter - def original_directory(self, value): - self._db.original_directory = value + def _extract_idiap(self, annotation_file): + """Interprets the annotation string as if it came from the Idiap annotations. + Inspired by: https://gitlab.idiap.ch/bob/bob.db.lfw/-/blob/5ac22c5b77aae971de6b73cbe23f26d6a5632072/bob/db/lfw/models.py#L81""" - def model_ids_with_protocol(self, groups=None, protocol=None, **kwargs): - return self._db.model_ids(groups=groups, protocol=protocol) + annotations = {} + splits = {} + with open(annotation_path) as f: + for line in f.readlines(): + line = line.split(" ") + if len(line) == 3: + # splits.append([float(line[2]), float(line[1])]) + splits[int(line[0])] = (float(line[1]), float(line[2])) - def objects(self, groups=None, protocol=None, purposes=None, model_ids=None, **kwargs): - retval = self._db.objects(groups=groups, protocol=protocol, purposes=purposes, model_ids=model_ids, **kwargs) - return [LFWBioFile(f) for f in retval] + if 3 in splits: + annotations["reye"] = splits[3] + + if 8 in splits: + annotations["leye"] = splits[8] + + return annotations + + def load_pairs(self): + pairs_path = os.path.join(self.original_directory, "view2", "pairs.txt") + self.pairs = {} + + make_filename = lambda name, index: f"{name}_{index.zfill(4)}" + + with open(pairs_path) as f: + for i, line in enumerate(f.readlines()): + # Skip the first line + if i == 0: + continue + + line = line.split("\t") + + # Three lines, genuine pairs otherwise impostor + if len(line) == 3: + # self.subject_id_from_filename() + key_filename = make_filename(line[0], line[1].rstrip("\n")) + value_filename = make_filename(line[0], line[2].rstrip("\n")) + + else: + key_filename = make_filename(line[0], line[1].rstrip("\n")) + value_filename = make_filename(line[2], line[3].rstrip("\n")) + + key = self.make_path_from_filename(key_filename) + value = self.make_path_from_filename(value_filename) + + if key not in self.pairs: + self.pairs[key] = [] + self.pairs[key].append(value) + + self._create_probe_reference_dict() + + @staticmethod + def protocols(): + return ["view2"] + + def background_model_samples(self): + return [] + + def _create_probe_reference_dict(self): + """ + Returns a dictionary whose each key (probe key) holds the list of biometric references + where that probe should be compared with. + """ + + self.probe_reference_keys = {} + for key in self.pairs: + for value in self.pairs[key]: + + if value not in self.probe_reference_keys: + self.probe_reference_keys[value] = [] + + self.probe_reference_keys[value].append(key) + + def probes(self, group="dev"): + if self.protocol not in self.probes_dict: + self.probes_dict[self.protocol] = [] + + for key in self.probe_reference_keys: + image_path = os.path.join( + self.original_directory, + self.image_relative_path, + key + self.extension, + ) + if self.annotation_directory is not None: + annotation_path = os.path.join( + self.annotation_directory, key + self.annotation_extension, + ) + annotations = ( + self._extract_funneled(annotation_path) + if self.annotation_issuer == "funneled" + else self._extract_idiap(annotation_path) + ) + else: + annotations = None + + sset = SampleSet( + key=key, + reference_id=key, + subject_id=self.subject_id_from_filename(key), + references=copy.deepcopy( + self.probe_reference_keys[key] + ), # deep copying to avoid bizarre issues with dask + samples=[ + DelayedSample( + key=key, + load=partial(bob.io.image.load, image_path), + annotations=annotations, + ) + ], + ) + self.probes_dict[self.protocol].append(sset) + + return self.probes_dict[self.protocol] + + def references(self, group="dev"): + + if self.protocol not in self.references_dict: + self.references_dict[self.protocol] = [] + + for key in self.pairs: + + image_path = os.path.join( + self.original_directory, + self.image_relative_path, + key + self.extension, + ) + if self.annotation_directory is not None: + annotation_path = os.path.join( + self.annotation_directory, key + self.annotation_extension, + ) + annotations = ( + self._extract_funneled(annotation_path) + if self.annotation_issuer == "funneled" + else self._extract_idiap(annotation_path) + ) + else: + annotations = None + + sset = SampleSet( + key=key, + reference_id=key, + subject_id=self.subject_id_from_filename(key), + samples=[ + DelayedSample( + key=key, + load=partial(bob.io.image.load, image_path), + annotations=annotations, + ) + ], + ) + self.references_dict[self.protocol].append(sset) + + return self.references_dict[self.protocol] + + def groups(self): + return ["dev"] - def annotations(self, myfile): - return self._db.annotations(myfile._f) + def all_samples(self, group="dev"): + self._check_group(group) - def client_id_from_model_id(self, model_id, group='dev'): - """Return the client id associated with the given model id. - In this base class implementation, it is assumed that only one model is enrolled for each client and, thus, client id and model id are identical. - All key word arguments are ignored. - Please override this function in derived class implementations to change this behavior.""" + return self.references() + self.probes() - # since there is one model per file, we can re-use the function above. - return self._db.get_client_id_from_file_id(model_id) + def _check_protocol(self, protocol): + assert protocol in self.protocols(), "Unvalid protocol `{}` not in {}".format( + protocol, self.protocols() + ) + def _check_group(self, group): + assert group in self.groups(), "Unvalid group `{}` not in {}".format( + group, self.groups() + ) diff --git a/bob/bio/face/test/test_databases.py b/bob/bio/face/test/test_databases.py index 13192153594201f9363f26b832fb7c4b48d132dc..6d832d25bbf191166b852f77295057a6f6e5e63f 100644 --- a/bob/bio/face/test/test_databases.py +++ b/bob/bio/face/test/test_databases.py @@ -77,41 +77,6 @@ def test_atnt(): ) -@db_available("lfw") -def test_lfw(): - database = bob.bio.base.load_resource( - "lfw-restricted", "database", preferred_package="bob.bio.face" - ) - try: - check_database(database, training_depends=True, models_depend=True) - check_database( - database, - groups=("dev", "eval"), - protocol="fold1", - training_depends=True, - models_depend=True, - ) - check_database( - bob.bio.base.load_resource( - "lfw-unrestricted", "database", preferred_package="bob.bio.face" - ), - training_depends=True, - models_depend=True, - ) - except IOError as e: - pytest.skip( - "The database could not queried; probably the db.sql3 file is missing. Here is the error: '%s'" - % e - ) - try: - _check_annotations(database, limit_files=1000) - except IOError as e: - pytest.skip( - "The annotations could not be queried; probably the annotation files are missing. Here is the error: '%s'" - % e - ) - - def test_mobio(): from bob.bio.face.database import MobioDatabase @@ -682,3 +647,19 @@ def test_arface(): references=("sunglasses", "scarf"), ) + +@pytest.mark.skipif( + rc.get("bob.bio.face.lfw.directory") is None, + reason="LFW original protocols not available. Please do `bob config set bob.bio.face.gbu.directory [GBU PATH]` to set the GBU data path.", +) +def test_lfw(): + + from bob.bio.face.database import LFWDatabase + + database = LFWDatabase(protocol="view2") + references = database.references() + probes = database.probes() + assert len(references) == 4564 + assert len(probes) == 4576 + # We need to have 6000 comparisons + assert sum([len(p.references) for p in probes]) == 6000 diff --git a/doc/implemented.rst b/doc/implemented.rst index 583564ca7646d3ae19b310fa59c6278787f6f115..49a12f402db63cab6a37c848426547814e2f84d9 100644 --- a/doc/implemented.rst +++ b/doc/implemented.rst @@ -18,7 +18,7 @@ Databases bob.bio.face.database.ReplayBioDatabase bob.bio.face.database.ReplayMobileBioDatabase bob.bio.face.database.GBUDatabase - bob.bio.face.database.LFWBioDatabase + bob.bio.face.database.LFWDatabase bob.bio.face.database.MultipieDatabase bob.bio.face.database.FargoBioDatabase bob.bio.face.database.MEDSDatabase diff --git a/setup.py b/setup.py index 6a1204a6891eea0160d42fba884c6148bb71d56f..0f15ee8b2d08fc8a26d533f5bcebf2b93ae6b072 100644 --- a/setup.py +++ b/setup.py @@ -119,8 +119,7 @@ setup( "ijbc-test2 = bob.bio.face.config.database.ijbc_test2:database", "ijbc-test4-g1 = bob.bio.face.config.database.ijbc_test4_g1:database", "ijbc-test4-g2 = bob.bio.face.config.database.ijbc_test4_g2:database", - "lfw-restricted = bob.bio.face.config.database.lfw_restricted:database", - "lfw-unrestricted = bob.bio.face.config.database.lfw_unrestricted:database", + "lfw-view2 = bob.bio.face.config.database.lfw_view2:database", "meds = bob.bio.face.config.database.meds:database", "mobio-all = bob.bio.face.config.database.mobio_all:database", "mobio-male = bob.bio.face.config.database.mobio_male:database", @@ -203,8 +202,7 @@ setup( "ijbc-test2 = bob.bio.face.config.database.ijbc_test2", "ijbc-test4-g1 = bob.bio.face.config.database.ijbc_test4_g1", "ijbc-test4-g2 = bob.bio.face.config.database.ijbc_test4_g2", - "lfw-restricted = bob.bio.face.config.database.lfw_restricted", - "lfw-unrestricted = bob.bio.face.config.database.lfw_unrestricted", + "lfw-view2 = bob.bio.face.config.database.lfw_view2", "meds = bob.bio.face.config.database.meds", "mobio-all = bob.bio.face.config.database.mobio_all", "mobio-male = bob.bio.face.config.database.mobio_male",