diff --git a/.gitignore b/.gitignore index b8afedd4fabe76662e1ef277c0a7b285fc0ff256..718a4685959d9031df4e0ec777732b5b0bc30abf 100644 --- a/.gitignore +++ b/.gitignore @@ -11,3 +11,4 @@ src develop-eggs sphinx dist +build diff --git a/bob/bio/base/database/__init__.py b/bob/bio/base/database/__init__.py index e0c15ef0577778483ad3ceb73d704fb25eb670e8..72f5e78c1ccab5cf5ad3a67cb8b2a689401782dc 100644 --- a/bob/bio/base/database/__init__.py +++ b/bob/bio/base/database/__init__.py @@ -2,6 +2,9 @@ from .file import BioFile from .file import BioFileSet from .database import BioDatabase from .database import ZTBioDatabase +from .filelist.query import FileListBioDatabase +from .filelist.models import Client + # gets sphinx autodoc done right - don't remove it def __appropriate__(*args): @@ -15,12 +18,16 @@ def __appropriate__(*args): <https://github.com/sphinx-doc/sphinx/issues/3048>` """ - for obj in args: obj.__module__ = __name__ + for obj in args: + obj.__module__ = __name__ + __appropriate__( BioFile, BioFileSet, BioDatabase, ZTBioDatabase, - ) + FileListBioDatabase, + Client, +) __all__ = [_ for _ in dir() if not _.startswith('_')] diff --git a/bob/bio/base/database/database.py b/bob/bio/base/database/database.py index fe89c8a192173775339a3269043d3ff941894346..7d0445eaa6c35c2ed52f03fc85d8736b38802621 100644 --- a/bob/bio/base/database/database.py +++ b/bob/bio/base/database/database.py @@ -8,8 +8,6 @@ import six from numpy.testing.decorators import setastest import bob.db.base -import bob.bio.base.database - class BioDatabase(six.with_metaclass(abc.ABCMeta, bob.db.base.Database)): """This class represents the basic API for database access. @@ -54,8 +52,6 @@ class BioDatabase(six.with_metaclass(abc.ABCMeta, bob.db.base.Database)): protocol : str or ``None`` The name of the protocol that defines the default experimental setup for this database. - .. todo:: Check if the ``None`` protocol is supported. - training_depends_on_protocol : bool Specifies, if the training set used for training the extractor and the projector depend on the protocol. This flag is used to avoid re-computation of data when running on the different protocols of the same database. @@ -637,7 +633,7 @@ class ZTBioDatabase(BioDatabase): All keyword parameters will be passed unaltered to the :py:class:`bob.bio.base.database.BioDatabase` constructor. """ # call base class constructor - BioDatabase.__init__(self, name, **kwargs) + super(ZTBioDatabase, self).__init__(name, **kwargs) self.z_probe_options = z_probe_options diff --git a/bob/bio/base/database/filelist/__init__.py b/bob/bio/base/database/filelist/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..cfb205f2615f5a8890b4c50f34d322b35be37f19 --- /dev/null +++ b/bob/bio/base/database/filelist/__init__.py @@ -0,0 +1,26 @@ +from .models import ListReader, Client +from .query import FileListBioDatabase + + +# gets sphinx autodoc done right - don't remove it +def __appropriate__(*args): + """Says object was actually declared here, and not in the import module. + Fixing sphinx warnings of not being able to find classes, when path is shortened. + Parameters: + + *args: An iterable of objects to modify + + Resolves `Sphinx referencing issues + <https://github.com/sphinx-doc/sphinx/issues/3048>` + """ + + for obj in args: + obj.__module__ = __name__ + + +__appropriate__( + ListReader, + Client, + FileListBioDatabase, +) +__all__ = [_ for _ in dir() if not _.startswith('_')] diff --git a/bob/bio/base/database/filelist/driver.py b/bob/bio/base/database/filelist/driver.py new file mode 100644 index 0000000000000000000000000000000000000000..71fa165176127f6497de425acc4e7db848a982b3 --- /dev/null +++ b/bob/bio/base/database/filelist/driver.py @@ -0,0 +1,140 @@ +#!/usr/bin/env python +# vim: set fileencoding=utf-8 : +# Laurent El Shafey <laurent.el-shafey@idiap.ch> +# +# Copyright (C) 2011-2013 Idiap Research Institute, Martigny, Switzerland +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, version 3 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + + +"""Commands the Verification Filelists database can respond to. +""" + +import os +import sys +from bob.db.base.driver import Interface as BaseInterface + + +def dumplist(args): + """Dumps lists of files based on your criteria""" + + from .query import FileListBioDatabase + db = FileListBioDatabase(args.list_directory, 'bio_filelist', use_dense_probe_file_list=False) + + r = db.objects( + purposes=args.purpose, + groups=args.group, + classes=args.sclass, + protocol=args.protocol + ) + + output = sys.stdout + if args.selftest: + from bob.db.base.utils import null + output = null() + + for f in r: + output.write('%s\n' % f.make_path(directory=args.directory, extension=args.extension)) + + return 0 + + +def checkfiles(args): + """Checks existence of files based on your criteria""" + + from .query import FileListBioDatabase + db = FileListBioDatabase(args.list_directory, 'bio_filelist', use_dense_probe_file_list=False) + + r = db.objects(protocol=args.protocol) + + # go through all files, check if they are available on the filesystem + good = [] + bad = [] + for f in r: + if os.path.exists(f.make_path(args.directory, args.extension)): + good.append(f) + else: + bad.append(f) + + # report + output = sys.stdout + if args.selftest: + from bob.db.base.utils import null + output = null() + + if bad: + for f in bad: + output.write('Cannot find file "%s"\n' % f.make_path(args.directory, args.extension)) + output.write('%d files (out of %d) were not found at "%s"\n' % + (len(bad), len(r), args.directory)) + + return 0 + + +class Interface(BaseInterface): + def name(self): + return 'bio_filelist' + + def version(self): + import pkg_resources # part of setuptools + return pkg_resources.require('bob.bio.base')[0].version + + def files(self): + return () + + def type(self): + return 'text' + + def add_commands(self, parser): + from . import __doc__ as docs + + subparsers = self.setup_parser(parser, + "Face Verification File Lists database", docs) + + import argparse + + # the "dumplist" action + parser = subparsers.add_parser('dumplist', help=dumplist.__doc__) + parser.add_argument('-l', '--list-directory', required=True, + help="The directory which contains the file lists.") + parser.add_argument('-d', '--directory', default='', + help="if given, this path will be prepended to every entry returned.") + parser.add_argument('-e', '--extension', default='', + help="if given, this extension will be appended to every entry returned.") + parser.add_argument('-u', '--purpose', + help="if given, this value will limit the output files to those designed for the given purposes.", + choices=('enroll', 'probe', '')) + parser.add_argument('-g', '--group', + help="if given, this value will limit the output files to those belonging to a particular protocolar group.", + choices=('dev', 'eval', 'world', 'optional_world_1', 'optional_world_2', '')) + parser.add_argument('-c', '--class', dest="sclass", + help="if given, this value will limit the output files to those belonging to the given classes.", + choices=('client', 'impostor', '')) + parser.add_argument('-p', '--protocol', default=None, + help="If set, the protocol is appended to the directory that contains the file lists.") + parser.add_argument('--self-test', dest="selftest", action='store_true', help=argparse.SUPPRESS) + parser.set_defaults(func=dumplist) # action + + # the "checkfiles" action + parser = subparsers.add_parser('checkfiles', help=checkfiles.__doc__) + parser.add_argument('-l', '--list-directory', required=True, + help="The directory which contains the file lists.") + parser.add_argument('-d', '--directory', dest="directory", default='', + help="if given, this path will be prepended to every entry returned.") + parser.add_argument('-e', '--extension', dest="extension", default='', + help="if given, this extension will be appended to every entry returned.") + parser.add_argument('-p', '--protocol', default=None, + help="If set, the protocol is appended to the directory that contains the file lists.") + parser.add_argument('--self-test', dest="selftest", action='store_true', help=argparse.SUPPRESS) + + parser.set_defaults(func=checkfiles) # action diff --git a/bob/bio/base/database/filelist/models.py b/bob/bio/base/database/filelist/models.py new file mode 100644 index 0000000000000000000000000000000000000000..bb1cc21a28d3d2c67909c2594afd5af2ac1bf26d --- /dev/null +++ b/bob/bio/base/database/filelist/models.py @@ -0,0 +1,195 @@ +#!/usr/bin/env python +# vim: set fileencoding=utf-8 : +# @author: Manuel Guenther <Manuel.Guenther@idiap.ch> +# @date: Wed Oct 24 10:47:43 CEST 2012 +# +# Copyright (C) 2011-2013 Idiap Research Institute, Martigny, Switzerland +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, version 3 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +""" +This file defines simple Client and File interfaces that are comparable with other bob.db databases. +""" + +import os +import fileinput +import re + + +class Client(object): + """ + The clients of this database contain ONLY client ids. Nothing special. + """ + + def __init__(self, client_id): + self.id = client_id + """The ID of the client, which is stored as a :py:class:`str` object.""" + + +class FileListFile(object): + """ + Initialize the File object with the minimum required data. + + If the ``model_id`` is not specified, ``model_id`` and ``client_id`` are identical. + If the ``claimed_id`` is not specified, it is expected to be the ``client_id``. + + **Parameters** + + client_id : various type + The id of the client, this file belongs to. + The type of it is dependent on your implementation. + If you use an SQL database, this should be an SQL type like Integer or String. + path : str + The path of this file, relative to the basic directory. + If you use an SQL database, this should be the SQL type String. + Please do not specify any file extensions. + file_id : various type + The id of the file. + The type of it is dependent on your implementation. + If you use an SQL database, this should be an SQL type like Integer or String. + If you are using an automatically determined file id, you can skip selecting the file id. + """ + + def __init__(self, file_name, client_id, model_id=None, claimed_id=None): + + # super(FileListFile, self).__init__(client_id=client_id, path=file_name, file_id=file_name) + super(FileListFile, self).__init__() + self.client_id = client_id + self.path = file_name + self.id = file_name + + # Note: in case of probe files, model ids are considered to be the ids of the model for the given probe file. + # Hence, there might be several probe files with the same file id, but different model ids. + # Therefore, please DO NOT USE the model_id outside of this class (or the according database queries). + # when the model id is not specified, we use the client id instead + self._model_id = client_id if model_id is None else model_id + # when the claimed id is not specified, we use the client id instead + self.claimed_id = client_id if claimed_id is None else claimed_id + + +############################################################################# +# internal access functions for the file lists; do not export! +############################################################################# + + +class ListReader(object): + def __init__(self, store_lists): + self.m_read_lists = {} + self.m_model_dicts = {} + self.m_store_lists = store_lists + + def _read_multi_column_list(self, list_file): + rows = [] + if not os.path.isfile(list_file): + raise RuntimeError('File %s does not exist.' % (list_file,)) + try: + for line in fileinput.input(list_file): + parsed_line = re.findall('[\w/(-.)]+', line) + if len(parsed_line): + # perform some sanity checks + if len(parsed_line) not in (2, 3, 4): + raise IOError("The read line '%s' from file '%s' could not be parsed successfully!" % ( + line.rstrip(), list_file)) + if len(rows) and len(rows[0]) != len(parsed_line): + raise IOError( + "The parsed line '%s' from file '%s' has a different number of elements than the first parsed line '%s'!" % ( + parsed_line, list_file, rows[0])) + # append the read line + rows.append(parsed_line) + fileinput.close() + except IOError as e: + raise RuntimeError("Error reading the file '%s' : '%s'." % (list_file, e)) + + # return the read list as a vector of columns + return rows + + def _read_column_list(self, list_file, column_count): + # read the list + rows = self._read_multi_column_list(list_file) + # extract the file from the first two columns + file_list = [] + for row in rows: + if column_count == 2: + assert len(row) == 2 + # we expect: filename client_id + file_list.append(FileListFile(file_name=row[0], client_id=row[1])) + elif column_count == 3: + assert len(row) in (2, 3) + # we expect: filename, model_id, client_id + file_list.append(FileListFile(file_name=row[0], client_id=row[2] if len(row) > 2 else row[1], model_id=row[1])) + elif column_count == 4: + assert len(row) in (3, 4) + # we expect: filename, model_id, claimed_id, client_id + file_list.append(FileListFile(file_name=row[0], client_id=row[3] if len(row) > 3 else row[1], model_id=row[1], + claimed_id=row[2])) + else: + raise ValueError( + "The given column count %d cannot be interpreted. This is a BUG, please report to the author." % column_count) + + return file_list + + def _create_model_dictionary(self, files): + # remember model ids + retval = {} + for file in files: + if file._model_id not in retval: + retval[file._model_id] = file.client_id + else: + if retval[file._model_id] != file.client_id: + raise ValueError( + "The read model id '%s' is associated to two different client ids '%s' and '%s'!" % ( + file._model_id, file.client_id, retval[file._model_id])) + return retval + + def read_list(self, list_file, group, type=None): + """Reads the list of Files from the given list file (if not done yet) and returns it.""" + if group in ('world', 'optional_world_1', 'optional_world_2'): + if group not in self.m_read_lists: + # read the world list into memory + list = self._read_column_list(list_file, 2) + if self.m_store_lists: + self.m_read_lists[group] = list + return list + # just return the previously read list + return self.m_read_lists[group] + + else: + if group not in self.m_read_lists: + self.m_read_lists[group] = {} + if type not in self.m_read_lists[group]: + if type in ('for_models', 'for_tnorm'): + list = self._read_column_list(list_file, 3) + elif type == 'for_scores': + list = self._read_column_list(list_file, 4) + elif type in ('for_probes', 'for_znorm'): + list = self._read_column_list(list_file, 2) + else: + raise ValueError("The given type must be one of %s, but not '%s'" % ( + ('for_models', 'for_scores', 'for_probes', 'for_tnorm', 'for_znorm'), type)) + if self.m_store_lists: + self.m_read_lists[group][type] = list + return list + return self.m_read_lists[group][type] + + def read_models(self, list_file, group, type=None): + """Generates a dictionary from model_ids to client_ids for the given list file, if not done yet, and returns it""" + assert group in ('dev', 'eval', 'world', 'optional_world_1', 'optional_world_2') + assert type in ('for_models', 'for_tnorm') + if group not in self.m_model_dicts: + self.m_model_dicts[group] = {} + if type not in self.m_model_dicts[group]: + dict = self._create_model_dictionary(self.read_list(list_file, group, type)) + if self.m_store_lists: + self.m_model_dicts[group][type] = dict + return dict + return self.m_model_dicts[group][type] diff --git a/bob/bio/base/database/filelist/query.py b/bob/bio/base/database/filelist/query.py new file mode 100644 index 0000000000000000000000000000000000000000..ba067861d63fb19ac08a427cbddfb22457fb33ce --- /dev/null +++ b/bob/bio/base/database/filelist/query.py @@ -0,0 +1,785 @@ +#!/usr/bin/env python +# vim: set fileencoding=utf-8 : +import os +import six + +import bob.db.base + +from .. import ZTBioDatabase +from .. import BioFile + +from . import ListReader, Client + + +class FileListBioDatabase(ZTBioDatabase): + """This class provides a user-friendly interface to databases that are given as file lists. + + Keyword parameters: + + base_dir : str + The directory that contains the filelists defining the protocol(s). If you use the protocol + attribute when querying the database, it will be appended to the base directory, such that + several protocols are supported by the same class instance of `bob.bio.base`. + + original_directory : str or ``None`` + The directory, where the original data can be found + + original_extension : str or [str] or ``None`` + The filename extension of the original data, or multiple extensions + + annotation_directory : str or ``None`` + The directory, where additional annotation files can be found + + annotation_extension : str or ``None`` + The filename extension of the annoation files + + annotation_type : str or ``None`` + The type of annotation that can be read. + Currently, options are 'eyecenter', 'named', 'idiap'. + See :py:func:`bob.db.base.read_annotation_file` for details. + + dev_subdir : str or ``None`` + Specify a custom subdirectory for the filelists of the development set (default is 'dev') + + eval_subdir : str or ``None`` + Specify a custom subdirectory for the filelists of the development set (default is 'eval') + + world_filename : str or ``None`` + Specify a custom filename for the training filelist (default is 'norm/train_world.lst') + + optional_world_1_filename : str or ``None`` + Specify a custom filename for the (first optional) training filelist + (default is 'norm/train_optional_world_1.lst') + + optional_world_2_filename : str or ``None`` + Specify a custom filename for the (second optional) training filelist + (default is 'norm/train_optional_world_2.lst') + + models_filename : str or ``None`` + Specify a custom filename for the model filelists (default is 'for_models.lst') + + probes_filename : str or ``None`` + Specify a custom filename for the probes filelists (default is 'for_probes.lst') + + scores_filename : str or ``None`` + Specify a custom filename for the scores filelists (default is 'for_scores.lst') + + tnorm_filename : str or ``None`` + Specify a custom filename for the T-norm scores filelists (default is 'for_tnorm.lst') + + znorm_filename : str or ``None`` + Specify a custom filename for the Z-norm scores filelists (default is 'for_znorm.lst') + + use_dense_probe_file_list : bool or None + Specify which list to use among 'probes_filename' (dense) or 'scores_filename'. + If ``None`` it is tried to be estimated based on the given parameters. + + keep_read_lists_in_memory : bool + If set to true, the lists are read only once and stored in memory + """ + + def __init__( + self, + base_dir, + name, + protocol=None, + biofilecls=BioFile, + + original_directory=None, + original_extension=None, + annotation_directory=None, + annotation_extension='.pos', + annotation_type='eyecenter', + + dev_subdir=None, + eval_subdir=None, + + world_filename=None, + optional_world_1_filename=None, + optional_world_2_filename=None, + models_filename=None, + + # For probing, use ONE of the two score file lists: + probes_filename=None, # File containing the probe files -> dense model/probe score matrix + scores_filename=None, # File containing list of model and probe files -> sparse model/probe score matrix + # For ZT-Norm: + tnorm_filename=None, + znorm_filename=None, + use_dense_probe_file_list=None, + # if both probe_filename and scores_filename is given, what kind of list should be used? + keep_read_lists_in_memory=True, + # if set to True (the RECOMMENDED default) lists are read only once and stored in memory. + **kwargs + ): + """Initializes the database with the file lists from the given base directory, + and the given sub-directories and file names (which default to useful values if not given).""" + + super(FileListBioDatabase, self).__init__( + name=name, + protocol=protocol, + original_directory=original_directory, + original_extension=original_extension, + annotation_directory=annotation_directory, + annotation_extension=annotation_extension, + annotation_type=annotation_type, + # extra args for pretty printing + dev_subdir=dev_subdir, + eval_subdir=eval_subdir, + world_filename=world_filename, + optional_world_1_filename=optional_world_1_filename, + optional_world_2_filename=optional_world_2_filename, + models_filename=models_filename, + probes_filename=probes_filename, + scores_filename=scores_filename, + tnorm_filename=tnorm_filename, + znorm_filename=znorm_filename, + use_dense_probe_file_list=use_dense_probe_file_list, + # if both probe_filename and scores_filename are given, what kind of list should be used? + keep_read_lists_in_memory=keep_read_lists_in_memory, + **kwargs) + # self.original_directory = original_directory + # self.original_extension = original_extension + self.biofilecls = biofilecls + + self.m_annotation_directory = annotation_directory + self.m_annotation_extension = annotation_extension + self.m_annotation_type = annotation_type + + self.m_base_dir = os.path.abspath(base_dir) + if not os.path.isdir(self.m_base_dir): + raise RuntimeError('Invalid directory specified %s.' % (self.m_base_dir)) + + # sub-directories for dev and eval set: + self.m_dev_subdir = dev_subdir if dev_subdir is not None else 'dev' + self.m_eval_subdir = eval_subdir if eval_subdir is not None else 'eval' + + # training list: format: filename client_id + self.m_world_filename = world_filename if world_filename is not None else os.path.join('norm', + 'train_world.lst') + # optional training list 1: format: filename client_id + self.m_optional_world_1_filename = optional_world_1_filename if optional_world_1_filename is not None else os.path.join( + 'norm', 'train_optional_world_1.lst') + # optional training list 2: format: filename client_id + self.m_optional_world_2_filename = optional_world_2_filename if optional_world_2_filename is not None else os.path.join( + 'norm', 'train_optional_world_2.lst') + # model list: format: filename model_id client_id + self.m_models_filename = models_filename if models_filename is not None else 'for_models.lst' + # scores list: format: filename model_id claimed_client_id client_id + self.m_scores_filename = scores_filename if scores_filename is not None else 'for_scores.lst' + # probe list: format: filename client_id + self.m_probes_filename = probes_filename if probes_filename is not None else 'for_probes.lst' + # T-Norm models format: filename model_id client_id + self.m_tnorm_filename = tnorm_filename if tnorm_filename is not None else 'for_tnorm.lst' + # Z-Norm files format: filename client_id + self.m_znorm_filename = znorm_filename if znorm_filename is not None else 'for_znorm.lst' + + # decide, which scoring type we have: + if probes_filename is not None and scores_filename is None: + self.m_use_dense_probes = True + elif probes_filename is None and scores_filename is not None: + self.m_use_dense_probes = False + elif use_dense_probe_file_list is not None: + self.m_use_dense_probes = use_dense_probe_file_list + # Then direct path to a given protocol + elif os.path.isdir(os.path.join(self.get_base_directory(), self.m_dev_subdir)) or os.path.isfile( + os.path.join(self.get_base_directory(), self.m_world_filename)): + if os.path.exists(self.get_list_file('dev', 'for_probes')) and not os.path.exists( + self.get_list_file('dev', 'for_scores')): + self.m_use_dense_probes = True + elif not os.path.exists(self.get_list_file('dev', 'for_probes')) and os.path.exists( + self.get_list_file('dev', 'for_scores')): + self.m_use_dense_probes = False + else: + raise ValueError("Unable to determine, which way of probing should be used. Please specify.") + # Then path to a directory that contains several subdirectories (one for each protocol) + else: + # Look at subdirectories for each protocol + protocols = [p for p in os.listdir(self.get_base_directory()) if + os.path.isdir(os.path.join(self.get_base_directory(), p))] + if len(protocols) == 0: + raise ValueError( + "Unable to determine, which way of probing should be used (no protocol directories found). Please specify.") + list_use_dense_probes = [] + for p in protocols: + if os.path.exists(self.get_list_file('dev', 'for_probes', p)) and not os.path.exists( + self.get_list_file('dev', 'for_scores', p)): + use_dense_probes = True + elif not os.path.exists(self.get_list_file('dev', 'for_probes', p)) and os.path.exists( + self.get_list_file('dev', 'for_scores', p)): + use_dense_probes = False + else: + raise ValueError( + "Unable to determine, which way of probing should be used, looking at the protocol (directory) '%s'. Please specify." % p) + list_use_dense_probes.append(use_dense_probes) + if len(set(list_use_dense_probes)) == 1: + self.m_use_dense_probes = list_use_dense_probes[0] + else: + raise ValueError( + "Unable to determine, which way of probing should be used, since this is not consistent accross protocols. Please specify.") + + self.m_list_reader = ListReader(keep_read_lists_in_memory) + + def _make_bio(self, files): + return [self.biofilecls(client_id=f.client_id, path=f.path, file_id=f.id) for f in files] + + def groups(self, protocol=None): + """This function returns the list of groups for this database. + + protocol : str or ``None`` + The protocol for which the groups should be retrieved. + + Returns: a list of groups + """ + + groups = [] + protocol = protocol or self.protocol + if protocol is not None: + if os.path.isdir(os.path.join(self.get_base_directory(), protocol, self.m_dev_subdir)): + groups.append('dev') + if os.path.isdir(os.path.join(self.get_base_directory(), protocol, self.m_eval_subdir)): + groups.append('eval') + if os.path.isfile(os.path.join(self.get_base_directory(), protocol, self.m_world_filename)): + groups.append('world') + if os.path.isfile(os.path.join(self.get_base_directory(), protocol, self.m_optional_world_1_filename)): + groups.append('optional_world_1') + if os.path.isfile(os.path.join(self.get_base_directory(), protocol, self.m_optional_world_2_filename)): + groups.append('optional_world_2') + else: + if os.path.isdir(os.path.join(self.get_base_directory(), self.m_dev_subdir)): + groups.append('dev') + if os.path.isdir(os.path.join(self.get_base_directory(), self.m_eval_subdir)): + groups.append('eval') + if os.path.isfile(os.path.join(self.get_base_directory(), self.m_world_filename)): + groups.append('world') + if os.path.isfile(os.path.join(self.get_base_directory(), self.m_optional_world_1_filename)): + groups.append('optional_world_1') + if os.path.isfile(os.path.join(self.get_base_directory(), self.m_optional_world_2_filename)): + groups.append('optional_world_2') + return groups + + def implements_zt(self, protocol=None, groups=None): + """Checks if the file lists for the ZT score normalization are available. + + Keyword Parameters: + + protocol : str or ``None`` + The protocol for which the groups should be retrieved. + + groups : str or [str] or ``None`` + The groups for which the ZT score normalization file lists should be checked ("dev", "eval"). + + Returns: + ``True`` if the all file lists for ZT score normalization exist, otherwise ``False``. + """ + groups = self.check_parameters_for_validity(groups, "group", ('dev', 'eval')) + + protocol = protocol or self.protocol + for group in groups: + for t in ['for_tnorm', 'for_znorm']: + if not os.path.exists(self.get_list_file(group, t, protocol)): + return False + # all files exist + return True + + def get_base_directory(self): + """Returns the base directory where the filelists defining the database + are located.""" + return self.m_base_dir + + def set_base_directory(self, base_dir): + """Resets the base directory where the filelists defining the database + are located.""" + self.m_base_dir = base_dir + if not os.path.isdir(self.base_dir): + raise RuntimeError('Invalid directory specified %s.' % (self.base_dir)) + + def get_list_file(self, group, type=None, protocol=None): + if protocol: + base_directory = os.path.join(self.get_base_directory(), protocol) + else: + base_directory = self.get_base_directory() + if group == 'world': + return os.path.join(base_directory, self.m_world_filename) + elif group == 'optional_world_1': + return os.path.join(base_directory, self.m_optional_world_1_filename) + elif group == 'optional_world_2': + return os.path.join(base_directory, self.m_optional_world_2_filename) + else: + group_dir = self.m_dev_subdir if group == 'dev' else self.m_eval_subdir + list_name = {'for_models': self.m_models_filename, + 'for_probes': self.m_probes_filename, + 'for_scores': self.m_scores_filename, + 'for_tnorm': self.m_tnorm_filename, + 'for_znorm': self.m_znorm_filename + }[type] + return os.path.join(base_directory, group_dir, list_name) + + def client_id_from_model_id(self, model_id, group='dev'): + """Returns the client id that is connected to the given model id. + + Keyword parameters: + + model_id : str or ``None`` + The model id for which the client id should be returned. + + groups : str or [str] or ``None`` + (optional) the groups, the client belongs to. + Might be one or more of ('dev', 'eval', 'world', 'optional_world_1', 'optional_world_2'). + If groups are given, only these groups are considered. + + protocol : str or ``None`` + The protocol to consider + + Returns: The client id for the given model id, if found. + """ + # compatibility reasons + groups = group + groups = self.check_parameters_for_validity(groups, "group", + ('dev', 'eval', 'world', 'optional_world_1', 'optional_world_2'), + default_parameters=('dev', 'eval', 'world')) + + protocol = self.protocol + for group in groups: + model_dict = self.m_list_reader.read_models(self.get_list_file(group, 'for_models', protocol), group, + 'for_models') + if model_id in model_dict: + return model_dict[model_id] + + raise ValueError("The given model id '%s' cannot be found in one of the groups '%s'" % (model_id, groups)) + + def client_id_from_t_model_id(self, t_model_id, group='dev'): + """Returns the client id that is connected to the given T-Norm model id. + + Keyword parameters: + + model_id : str or ``None`` + The model id for which the client id should be returned. + + groups : str or [str] or ``None`` + (optional) the groups, the client belongs to. + Might be one or more of ('dev', 'eval'). + If groups are given, only these groups are considered. + + protocol : str or ``None`` + The protocol to consider + + Returns: The client id for the given model id of a T-Norm model, if found. + """ + groups = group + groups = self.check_parameters_for_validity(groups, "group", ('dev', 'eval')) + + protocol = self.protocol + for group in groups: + model_dict = self.m_list_reader.read_models(self.get_list_file(group, 'for_tnorm', protocol), group, + 'for_tnorm') + if t_model_id in model_dict: + return model_dict[t_model_id] + + raise ValueError( + "The given T-norm model id '%s' cannot be found in one of the groups '%s'" % (t_model_id, groups)) + + def clients(self, protocol=None, groups=None): + """Returns a list of :py:class:`Client` objects for the specific query by the user. + + Keyword Parameters: + + protocol : str or ``None`` + The protocol to consider + + groups : str or [str] or ``None`` + The groups to which the clients belong ("dev", "eval", "world", "optional_world_1", "optional_world_2"). + + Returns: A list containing all the :py:class:`Client` objects which have the given properties. + """ + + protocol = protocol or self.protocol + client_ids = self.client_ids(protocol, groups) + return [Client(id) for id in client_ids] + + def tclients(self, protocol=None, groups=None): + """Returns a list of T-Norm :py:class:`Client` objects for the specific query by the user. + + Keyword Parameters: + + protocol : str or ``None`` + The protocol to consider + + groups : str or [str] or ``None`` + The groups to which the clients belong ("dev", "eval"). + + Returns: A list containing all the T-Norm :py:class:`Client` objects which have the given properties. + """ + protocol = protocol or self.protocol + tclient_ids = self.tclient_ids(protocol, groups) + return [Client(id) for id in tclient_ids] + + def zclients(self, protocol=None, groups=None): + """Returns a list of Z-Norm Client objects for the specific query by the user. + + Keyword Parameters: + + protocol : str or ``None`` + The protocol to consider + + groups : str or [str] or ``None`` + The groups to which the models belong ("dev", "eval"). + + Returns: A list containing all the Z-Norm Client objects which have the given properties. + """ + protocol = protocol or self.protocol + zclient_ids = self.zclient_ids(protocol, groups) + return [Client(id) for id in zclient_ids] + + def __client_id_list__(self, groups, type, protocol=None): + ids = set() + protocol = protocol or self.protocol + # read all lists for all groups and extract the model ids + for group in groups: + files = self.m_list_reader.read_list(self.get_list_file(group, type, protocol), group, type) + for file in files: + ids.add(file.client_id) + return ids + + def client_ids(self, protocol=None, groups=None): + """Returns a list of client ids for the specific query by the user. + + Keyword Parameters: + + protocol : str or ``None`` + The protocol to consider + + groups : str or [str] or ``None`` + The groups to which the clients belong ("dev", "eval", "world", "optional_world_1", "optional_world_2"). + + Returns: A list containing all the client ids which have the given properties. + """ + + protocol = protocol or self.protocol + groups = self.check_parameters_for_validity(groups, "group", + ('dev', 'eval', 'world', 'optional_world_1', 'optional_world_2'), + default_parameters=('dev', 'eval', 'world')) + + return self.__client_id_list__(groups, 'for_models', protocol) + + def tclient_ids(self, protocol=None, groups=None): + """Returns a list of T-Norm client ids for the specific query by the user. + + Keyword Parameters: + + protocol : str or ``None`` + The protocol to consider + + groups : str or [str] or ``None`` + The groups to which the clients belong ("dev", "eval"). + + Returns: A list containing all the T-Norm client ids which have the given properties. + """ + + protocol = protocol or self.protocol + groups = self.check_parameters_for_validity(groups, "group", ('dev', 'eval')) + + return self.__client_id_list__(groups, 'for_tnorm', protocol) + + def zclient_ids(self, protocol=None, groups=None): + """Returns a list of Z-Norm client ids for the specific query by the user. + + Keyword Parameters: + + protocol : str or ``None`` + The protocol to consider + + groups : str or [str] or ``None`` + The groups to which the clients belong ("dev", "eval"). + + Returns: A list containing all the Z-Norm client ids which have the given properties. + """ + + protocol = protocol or self.protocol + groups = self.check_parameters_for_validity(groups, "group", ('dev', 'eval')) + + return self.__client_id_list__(groups, 'for_znorm', protocol) + + def __model_id_list__(self, groups, type, protocol=None): + ids = set() + protocol = protocol or self.protocol + # read all lists for all groups and extract the model ids + for group in groups: + dict = self.m_list_reader.read_models(self.get_list_file(group, type, protocol), group, type) + ids.update(dict.keys()) + return list(ids) + + def model_ids_with_protocol(self, groups=None, protocol=None, **kwargs): + """Returns a list of model ids for the specific query by the user. + + Keyword Parameters: + + protocol : str or ``None`` + The protocol to consider + + groups : str or [str] or ``None`` + The groups to which the models belong ("dev", "eval", "world", "optional_world_1", "optional_world_2"). + + Returns: A list containing all the model ids which have the given properties. + """ + protocol = protocol or self.protocol + + groups = self.check_parameters_for_validity(groups, "group", + ('dev', 'eval', 'world', 'optional_world_1', 'optional_world_2'), + default_parameters=('dev', 'eval', 'world')) + + return self.__model_id_list__(groups, 'for_models', protocol) + + def tmodel_ids_with_protocol(self, protocol=None, groups=None, **kwargs): + """Returns a list of T-Norm model ids for the specific query by the user. + + Keyword Parameters: + + protocol : str or ``None`` + The protocol to consider + + groups : str or [str] or ``None`` + The groups to which the models belong ("dev", "eval"). + + Returns: A list containing all the T-Norm model ids belonging to the given group. + """ + protocol = protocol or self.protocol + + groups = self.check_parameters_for_validity(groups, "group", ('dev', 'eval')) + + return self.__model_id_list__(groups, 'for_tnorm', protocol) + + def objects(self, groups=None, protocol=None, purposes=None, model_ids=None, classes=None, **kwargs): + """Returns a set of :py:class:`BioFile` objects for the specific query by the user. + + Keyword Parameters: + + protocol : str or ``None`` + The protocol to consider + + purposes : str or [str] or ``None`` + The purposes required to be retrieved ("enroll", "probe") or a tuple + with several of them. If 'None' is given (this is the default), it is + considered the same as a tuple with all possible values. This field is + ignored for the data from the "world", "optional_world_1", "optional_world_2" groups. + + model_ids : str or [str] or ``None`` + Only retrieves the files for the provided list of model ids (claimed + client id). If 'None' is given (this is the default), no filter over + the model_ids is performed. + + groups : str or [str] or ``None`` + One of the groups ("dev", "eval", "world", "optional_world_1", "optional_world_2") or a tuple with several of them. + If 'None' is given (this is the default), it is considered the same as a + tuple with all possible values. + + classes : str or [str] or ``None`` + The classes (types of accesses) to be retrieved ('client', 'impostor') + or a tuple with several of them. If 'None' is given (this is the + default), it is considered the same as a tuple with all possible values. + Note: classes are not allowed to be specified when the 'probes_filename' is used. + + Returns: A list of :py:class:`BioFile` objects considering all the filtering criteria. + """ + + protocol = protocol or self.protocol + if self.m_use_dense_probes and classes is not None: + raise ValueError("To be able to use the 'classes' keyword, please use the 'for_scores.lst' list file.") + + purposes = self.check_parameters_for_validity(purposes, "purpose", ('enroll', 'probe')) + groups = self.check_parameters_for_validity(groups, "group", + ('dev', 'eval', 'world', 'optional_world_1', 'optional_world_2'), + default_parameters=('dev', 'eval', 'world')) + classes = self.check_parameters_for_validity(classes, "class", ('client', 'impostor')) + + if isinstance(model_ids, six.string_types): + model_ids = (model_ids,) + + # first, collect all the lists that we want to process + lists = [] + probe_lists = [] + if 'world' in groups: + lists.append(self.m_list_reader.read_list(self.get_list_file('world', protocol=protocol), 'world')) + if 'optional_world_1' in groups: + lists.append(self.m_list_reader.read_list(self.get_list_file('optional_world_1', protocol=protocol), + 'optional_world_1')) + if 'optional_world_2' in groups: + lists.append(self.m_list_reader.read_list(self.get_list_file('optional_world_2', protocol=protocol), + 'optional_world_2')) + + for group in ('dev', 'eval'): + if group in groups: + if 'enroll' in purposes: + lists.append( + self.m_list_reader.read_list(self.get_list_file(group, 'for_models', protocol=protocol), group, + 'for_models')) + if 'probe' in purposes: + if self.m_use_dense_probes: + probe_lists.append( + self.m_list_reader.read_list(self.get_list_file(group, 'for_probes', protocol=protocol), + group, 'for_probes')) + else: + probe_lists.append( + self.m_list_reader.read_list(self.get_list_file(group, 'for_scores', protocol=protocol), + group, 'for_scores')) + + # now, go through the lists and filter the elements + + # remember the file ids that are already in the list + file_ids = set() + retval = [] + + # non-probe files; just filter by model id + for list in lists: + for file in list: + # check if we already have this file + if file.id not in file_ids: + if model_ids is None or file._model_id in model_ids: + file_ids.add(file.id) + retval.append(file) + + # probe files; filter by model id and by class + for list in probe_lists: + if self.m_use_dense_probes: + # dense probing is used; do not filter over the model ids and not over the classes + # -> just add all probe files + for file in list: + if file.id not in file_ids: + file_ids.add(file.id) + retval.append(file) + + else: + # sparse probing is used; filter over model ids and over the classes + for file in list: + # filter by model id + if model_ids is None or file._model_id in model_ids: + # filter by class + if ('client' in classes and file.client_id == file.claimed_id) or \ + ('impostor' in classes and file.client_id != file.claimed_id): + # check if we already have this file + if file.id not in file_ids: + file_ids.add(file.id) + retval.append(file) + + return self._make_bio(retval) + + def tobjects(self, groups=None, protocol=None, model_ids=None, **kwargs): + """Returns a list of :py:class:`BioFile` objects for enrolling T-norm models for score normalization. + + Keyword Parameters: + + protocol : str or ``None`` + The protocol to consider + + model_ids : str or [str] or ``None`` + Only retrieves the files for the provided list of model ids (claimed + client id). If 'None' is given (this is the default), no filter over + the model_ids is performed. + + groups : str or [str] or ``None`` + The groups to which the models belong ("dev", "eval"). + + Returns: A list of :py:class:`BioFile` objects considering all the filtering criteria. + """ + protocol = protocol or self.protocol + + groups = self.check_parameters_for_validity(groups, "group", ('dev', 'eval')) + + if (isinstance(model_ids, six.string_types)): + model_ids = (model_ids,) + + # iterate over the lists and extract the files + # we assume that there is no duplicate file here... + retval = [] + for group in groups: + for file in self.m_list_reader.read_list(self.get_list_file(group, 'for_tnorm', protocol), group, + 'for_tnorm'): + if model_ids is None or file._model_id in model_ids: + retval.append(file) + + return self._make_bio(retval) + + def zobjects(self, groups=None, protocol=None, **kwargs): + """Returns a list of :py:class:`BioFile` objects to perform Z-norm score normalization. + + Keyword Parameters: + + protocol : str or ``None`` + The protocol to consider + + groups : str or [str] or ``None`` + The groups to which the clients belong ("dev", "eval"). + + Returns: A list of File objects considering all the filtering criteria. + """ + + protocol = protocol or self.protocol + groups = self.check_parameters_for_validity(groups, "group", ('dev', 'eval')) + + # iterate over the lists and extract the files + # we assume that there is no duplicate file here... + retval = [] + for group in groups: + retval.extend([file for file in + self.m_list_reader.read_list(self.get_list_file(group, 'for_znorm', protocol), group, + 'for_znorm')]) + + return self._make_bio(retval) + + def annotations(self, file): + """Reads the annotations for the given file id from file and returns them in a dictionary. + + If you don't have a copy of the annotation files, you can download them under http://www.idiap.ch/resource/biometric. + + Keyword parameters: + + file : :py:class:`bob.bio.base.database.BioFile` + The :py:class:`BioFile` object for which the annotations should be read. + + Return value + The annotations as a dictionary: {'reye':(re_y,re_x), 'leye':(le_y,le_x)} + """ + if self.m_annotation_directory is None: + return None + + # since the file id is equal to the file name, we can simply use it + annotation_file = os.path.join(self.m_annotation_directory, file.id + self.m_annotation_extension) + + # return the annotations as read from file + return bob.db.base.read_annotation_file(annotation_file, self.m_annotation_type) + + def original_file_name(self, file, check_existence=True): + """Returns the original file name of the given file. + + This interface supports several original extensions, so that file lists can contain images of different data types. + + When multiple original extensions are specified, this function will check the existence of any of these file names, and return the first one that actually exists. + In this case, the ``check_existence`` flag is ignored. + + **Keyword parameters** + + file : :py:class:`bob.bio.base.database.BioFile` + The py:class:`File` object for which the file name should be returned. + + check_existence : bool + Should the existence of the original file be checked? + (Ignored when multiple original extensions were specified in the contructor.) + + **Returns** + str : The full path of the original data file. + """ + + if isinstance(self.original_extension, str): + # extract file name + file_name = file.make_path(self.original_directory, self.original_extension) + if check_existence and os.path.exists(file_name): + return file_name + + # check all registered extensions + for extension in self.original_extension: + file_name = file.make_path(self.original_directory, extension) + if check_existence and os.path.exists(file_name): + return file_name + + # None of the extensions matched + raise IOError("File '%s' does not exist with any of the extensions '%s'" % ( + file.make_path(self.original_directory, None), self.original_extension)) diff --git a/bob/bio/base/test/data/example_fielist/data/model4_session1_sample2.pos b/bob/bio/base/test/data/example_fielist/data/model4_session1_sample2.pos new file mode 100644 index 0000000000000000000000000000000000000000..b349687eb6b003b9e460d8e6ce0314348f1ce58e --- /dev/null +++ b/bob/bio/base/test/data/example_fielist/data/model4_session1_sample2.pos @@ -0,0 +1,2 @@ +key1 10 20 +key2 30 40 diff --git a/bob/bio/base/test/data/example_fielist/dev/for_models.lst b/bob/bio/base/test/data/example_fielist/dev/for_models.lst new file mode 100644 index 0000000000000000000000000000000000000000..2814d2a0b9d805ed2808f3840b6a54c1fb3ac21e --- /dev/null +++ b/bob/bio/base/test/data/example_fielist/dev/for_models.lst @@ -0,0 +1,8 @@ +data/model3_session1_sample1 3 3 +data/model3_session1_sample2 3 3 +data/model3_session1_sample3 3 3 +data/model3_session2_sample1 3 3 +data/model4_session1_sample1 4 4 +data/model4_session1_sample2 4 4 +data/model4_session1_sample3 4 4 +data/model4_session2_sample1 4 4 diff --git a/bob/bio/base/test/data/example_fielist/dev/for_probes.lst b/bob/bio/base/test/data/example_fielist/dev/for_probes.lst new file mode 100644 index 0000000000000000000000000000000000000000..f4eab50c90709900ec65f75fea62a03dc0cfae48 --- /dev/null +++ b/bob/bio/base/test/data/example_fielist/dev/for_probes.lst @@ -0,0 +1,10 @@ +data/model3_session3_sample1 3 +data/model3_session3_sample2 3 +data/model3_session3_sample3 3 +data/model3_session4_sample1 3 +data/model4_session3_sample1 4 +data/model4_session3_sample2 4 +data/model4_session3_sample1 4 +data/model4_session3_sample2 4 +data/model4_session3_sample3 4 +data/model4_session4_sample1 4 diff --git a/bob/bio/base/test/data/example_fielist/dev/for_scores.lst b/bob/bio/base/test/data/example_fielist/dev/for_scores.lst new file mode 100644 index 0000000000000000000000000000000000000000..7b75816dc7e176cf8c66a9a0448fb13bf08b6c58 --- /dev/null +++ b/bob/bio/base/test/data/example_fielist/dev/for_scores.lst @@ -0,0 +1,12 @@ +data/model3_session3_sample1 3 3 3 +data/model3_session3_sample2 3 3 3 +data/model3_session3_sample3 3 3 3 +data/model3_session4_sample1 3 3 3 +data/model4_session3_sample1 3 3 4 +data/model4_session3_sample2 3 3 4 +data/model4_session3_sample1 4 4 4 +data/model4_session3_sample2 4 4 4 +data/model4_session3_sample3 4 4 4 +data/model4_session4_sample1 4 4 4 +data/model3_session3_sample1 4 4 3 +data/model3_session3_sample2 4 4 3 diff --git a/bob/bio/base/test/data/example_fielist/dev/for_tnorm.lst b/bob/bio/base/test/data/example_fielist/dev/for_tnorm.lst new file mode 100644 index 0000000000000000000000000000000000000000..a5e4573170a9d3fbb0ef395fd5159e4b9b8bfd36 --- /dev/null +++ b/bob/bio/base/test/data/example_fielist/dev/for_tnorm.lst @@ -0,0 +1,8 @@ +data/model7_session1_sample1 7 7 +data/model7_session1_sample2 7 7 +data/model7_session1_sample3 7 7 +data/model7_session2_sample1 7 7 +data/model8_session1_sample1 8 8 +data/model8_session1_sample2 8 8 +data/model8_session1_sample3 8 8 +data/model8_session2_sample1 8 8 diff --git a/bob/bio/base/test/data/example_fielist/dev/for_znorm.lst b/bob/bio/base/test/data/example_fielist/dev/for_znorm.lst new file mode 100644 index 0000000000000000000000000000000000000000..8b6a51192b7676e37a62b00e83955053d11b58a7 --- /dev/null +++ b/bob/bio/base/test/data/example_fielist/dev/for_znorm.lst @@ -0,0 +1,8 @@ +data/model9_session1_sample1 9 +data/model9_session1_sample2 9 +data/model9_session1_sample3 9 +data/model9_session2_sample1 9 +data/model10_session1_sample1 10 +data/model10_session1_sample2 10 +data/model10_session1_sample3 10 +data/model10_session2_sample1 10 diff --git a/bob/bio/base/test/data/example_fielist/eval/for_models.lst b/bob/bio/base/test/data/example_fielist/eval/for_models.lst new file mode 100644 index 0000000000000000000000000000000000000000..4f9d76ffb83b8754461ea40032918a1e47d2d870 --- /dev/null +++ b/bob/bio/base/test/data/example_fielist/eval/for_models.lst @@ -0,0 +1,8 @@ +data/model5_session1_sample1 5 5 +data/model5_session1_sample2 5 5 +data/model5_session1_sample3 5 5 +data/model5_session2_sample1 5 5 +data/model6_session1_sample1 6 6 +data/model6_session1_sample2 6 6 +data/model6_session1_sample3 6 6 +data/model6_session2_sample1 6 6 diff --git a/bob/bio/base/test/data/example_fielist/eval/for_probes.lst b/bob/bio/base/test/data/example_fielist/eval/for_probes.lst new file mode 100644 index 0000000000000000000000000000000000000000..148e29d840f9a2dcc44ba14827fe0ffc5b861696 --- /dev/null +++ b/bob/bio/base/test/data/example_fielist/eval/for_probes.lst @@ -0,0 +1,8 @@ +data/model5_session3_sample1 5 +data/model5_session3_sample2 5 +data/model5_session3_sample3 5 +data/model5_session4_sample1 5 +data/model6_session3_sample1 6 +data/model6_session3_sample2 6 +data/model6_session3_sample3 6 +data/model6_session4_sample1 6 diff --git a/bob/bio/base/test/data/example_fielist/eval/for_scores.lst b/bob/bio/base/test/data/example_fielist/eval/for_scores.lst new file mode 100644 index 0000000000000000000000000000000000000000..55278857428848521cbe9d6f59114cd069a2e29f --- /dev/null +++ b/bob/bio/base/test/data/example_fielist/eval/for_scores.lst @@ -0,0 +1,8 @@ +data/model5_session3_sample1 5 5 5 +data/model5_session3_sample2 5 5 5 +data/model5_session3_sample3 5 5 5 +data/model5_session4_sample1 5 5 5 +data/model6_session3_sample1 6 6 6 +data/model6_session3_sample2 6 6 6 +data/model6_session3_sample3 6 6 6 +data/model6_session4_sample1 6 6 6 diff --git a/bob/bio/base/test/data/example_fielist/eval/for_tnorm.lst b/bob/bio/base/test/data/example_fielist/eval/for_tnorm.lst new file mode 100644 index 0000000000000000000000000000000000000000..a5e4573170a9d3fbb0ef395fd5159e4b9b8bfd36 --- /dev/null +++ b/bob/bio/base/test/data/example_fielist/eval/for_tnorm.lst @@ -0,0 +1,8 @@ +data/model7_session1_sample1 7 7 +data/model7_session1_sample2 7 7 +data/model7_session1_sample3 7 7 +data/model7_session2_sample1 7 7 +data/model8_session1_sample1 8 8 +data/model8_session1_sample2 8 8 +data/model8_session1_sample3 8 8 +data/model8_session2_sample1 8 8 diff --git a/bob/bio/base/test/data/example_fielist/eval/for_znorm.lst b/bob/bio/base/test/data/example_fielist/eval/for_znorm.lst new file mode 100644 index 0000000000000000000000000000000000000000..8b6a51192b7676e37a62b00e83955053d11b58a7 --- /dev/null +++ b/bob/bio/base/test/data/example_fielist/eval/for_znorm.lst @@ -0,0 +1,8 @@ +data/model9_session1_sample1 9 +data/model9_session1_sample2 9 +data/model9_session1_sample3 9 +data/model9_session2_sample1 9 +data/model10_session1_sample1 10 +data/model10_session1_sample2 10 +data/model10_session1_sample3 10 +data/model10_session2_sample1 10 diff --git a/bob/bio/base/test/data/example_fielist/norm/train_optional_world_1.lst b/bob/bio/base/test/data/example_fielist/norm/train_optional_world_1.lst new file mode 100644 index 0000000000000000000000000000000000000000..f1e458aedfb1d83ecf702ce694b15842f7796007 --- /dev/null +++ b/bob/bio/base/test/data/example_fielist/norm/train_optional_world_1.lst @@ -0,0 +1,8 @@ +data/model11_session1_sample1 1 +data/model11_session1_sample2 1 +data/model11_session1_sample3 1 +data/model11_session2_sample1 1 +data/model12_session1_sample1 2 +data/model12_session1_sample2 2 +data/model12_session1_sample3 2 +data/model12_session2_sample1 2 diff --git a/bob/bio/base/test/data/example_fielist/norm/train_optional_world_2.lst b/bob/bio/base/test/data/example_fielist/norm/train_optional_world_2.lst new file mode 100644 index 0000000000000000000000000000000000000000..e4632b867b0039195319ab1ab645d9e3a26ac43b --- /dev/null +++ b/bob/bio/base/test/data/example_fielist/norm/train_optional_world_2.lst @@ -0,0 +1,8 @@ +data/model13_session1_sample1 1 +data/model13_session1_sample2 1 +data/model13_session1_sample3 1 +data/model13_session2_sample1 1 +data/model14_session1_sample1 2 +data/model14_session1_sample2 2 +data/model14_session1_sample3 2 +data/model14_session2_sample1 2 diff --git a/bob/bio/base/test/data/example_fielist/norm/train_world.lst b/bob/bio/base/test/data/example_fielist/norm/train_world.lst new file mode 100644 index 0000000000000000000000000000000000000000..75287173539e4f701f03704c17f6c88273eb93b0 --- /dev/null +++ b/bob/bio/base/test/data/example_fielist/norm/train_world.lst @@ -0,0 +1,8 @@ +data/model1_session1_sample1 1 +data/model1_session1_sample2 1 +data/model1_session1_sample3 1 +data/model1_session2_sample1 1 +data/model2_session1_sample1 2 +data/model2_session1_sample2 2 +data/model2_session1_sample3 2 +data/model2_session2_sample1 2 diff --git a/bob/bio/base/test/dummy/filelist.py b/bob/bio/base/test/dummy/filelist.py new file mode 100644 index 0000000000000000000000000000000000000000..e2e12543316d6ec45b71f2cf2f01b25ac8927df4 --- /dev/null +++ b/bob/bio/base/test/dummy/filelist.py @@ -0,0 +1,22 @@ +from bob.bio.base.database import FileListBioDatabase +from bob.bio.base.test.utils import atnt_database_directory +import pkg_resources + +database = FileListBioDatabase( + base_dir=pkg_resources.resource_filename('bob.bio.base.test', 'data/atnt'), + original_directory=atnt_database_directory(), + original_extension=".pgm", + dev_subdir='.', + eval_subdir='.', + world_filename='world.lst', + models_filename='models.lst', + probes_filename='probes.lst', + tnorm_filename='models.lst', + znorm_filename='probes.lst', + keep_read_lists_in_memory=True, + name='test_filelist', + protocol=None, + check_original_files_for_existence=True, + training_depends_on_protocol=False, + models_depend_on_protocol=False +) diff --git a/bob/bio/base/test/dummy/fileset.py b/bob/bio/base/test/dummy/fileset.py index e4b3368f90626e23e61d4093f9ddac9775ff5635..4f2c6461b09b5bd15b20975394848cc4918fa288 100644 --- a/bob/bio/base/test/dummy/fileset.py +++ b/bob/bio/base/test/dummy/fileset.py @@ -1,6 +1,7 @@ from bob.bio.base.database import ZTBioDatabase, BioFileSet, BioFile from bob.bio.base.test.utils import atnt_database_directory + class DummyDatabase(ZTBioDatabase): def __init__(self): @@ -22,9 +23,9 @@ class DummyDatabase(ZTBioDatabase): def _make_bio(self, files): return [BioFile(client_id=f.client_id, path=f.path, file_id=f.id) for f in files] - def probe_file_sets(self, model_id=None, group='dev'): + def object_sets(self, groups='dev', protocol=None, purposes=None, model_ids=None): """Returns the list of probe File objects (for the given model id, if given).""" - files = self.arrange_by_client(self.sort(self.objects(protocol=None, groups=group, purposes='probe'))) + files = self.arrange_by_client(self.sort(self.objects(protocol=None, groups=groups, purposes=purposes))) # arrange files by clients file_sets = [BioFileSet(client_files[0].client_id, client_files) for client_files in files] return file_sets diff --git a/bob/bio/base/test/test_database_implementations.py b/bob/bio/base/test/test_database_implementations.py index 2cfcc5b1d22bfb208f09c4ed39f161f4ad3022dd..f122ed78ca55b4119f771048725c2718243a2bca 100644 --- a/bob/bio/base/test/test_database_implementations.py +++ b/bob/bio/base/test/test_database_implementations.py @@ -19,8 +19,10 @@ def check_database(database, groups=('dev',), protocol=None, training_depends=Fa if 'HOME' in os.environ: database.replace_directories(os.path.join(os.environ['HOME'], '.bob_bio_databases.txt')) - if protocol: database.protocol = protocol - if protocol is None: protocol = database.protocol + if protocol: + database.protocol = protocol + if protocol is None: + protocol = database.protocol assert len(database.all_files()) > 0 if not skip_train: @@ -30,7 +32,7 @@ def check_database(database, groups=('dev',), protocol=None, training_depends=Fa for group in groups: model_ids = database.model_ids_with_protocol(group, protocol=protocol) assert len(model_ids) > 0 - assert database.client_id_from_model_id(model_ids[0]) is not None + assert database.client_id_from_model_id(model_ids[0], group) is not None assert len(database.enroll_files(model_ids[0], group)) > 0 assert len(database.probe_files(model_ids[0], group)) > 0 @@ -44,8 +46,6 @@ def check_database_zt(database, groups=('dev', 'eval'), protocol=None, training_ for group in groups: t_model_ids = database.t_model_ids(group) assert len(t_model_ids) > 0 - assert database.client_id_from_model_id(t_model_ids[0]) is not None + assert database.client_id_from_model_id(t_model_ids[0], group) is not None assert len(database.t_enroll_files(t_model_ids[0], group)) > 0 assert len(database.z_probe_files(group)) > 0 - - diff --git a/bob/bio/base/test/test_filelist.py b/bob/bio/base/test/test_filelist.py new file mode 100644 index 0000000000000000000000000000000000000000..15a8d5aee0311acb3b90dbf9308046807c66571f --- /dev/null +++ b/bob/bio/base/test/test_filelist.py @@ -0,0 +1,179 @@ +#!/usr/bin/env python +# vim: set fileencoding=utf-8 : +# Laurent El Shafey <laurent.el-shafey@idiap.ch> +# +# Copyright (C) 2011-2013 Idiap Research Institute, Martigny, Switzerland +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, version 3 of the License. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +"""A few checks at the Verification Filelist database. +""" + +import os +import bob.io.base.test_utils +from bob.bio.base.database import FileListBioDatabase + + +example_dir = os.path.realpath(bob.io.base.test_utils.datafile('.', __name__, 'data/example_fielist')) + + +def test_query(): + db = FileListBioDatabase(example_dir, 'test', use_dense_probe_file_list=False) + + assert len(db.groups()) == 5 # 5 groups (dev, eval, world, optional_world_1, optional_world_2) + + assert len(db.client_ids()) == 6 # 6 client ids for world, dev and eval + assert len(db.client_ids(groups='world')) == 2 # 2 client ids for world + assert len(db.client_ids(groups='optional_world_1')) == 2 # 2 client ids for optional world 1 + assert len(db.client_ids(groups='optional_world_2')) == 2 # 2 client ids for optional world 2 + assert len(db.client_ids(groups='dev')) == 2 # 2 client ids for dev + assert len(db.client_ids(groups='eval')) == 2 # 2 client ids for eval + + assert len(db.tclient_ids()) == 2 # 2 client ids for T-Norm score normalization + assert len(db.zclient_ids()) == 2 # 2 client ids for Z-Norm score normalization + + assert len(db.model_ids_with_protocol()) == 6 # 6 model ids for world, dev and eval + assert len(db.model_ids_with_protocol(groups='world')) == 2 # 2 model ids for world + assert len(db.model_ids_with_protocol(groups='optional_world_1')) == 2 # 2 model ids for optional world 1 + assert len(db.model_ids_with_protocol(groups='optional_world_2')) == 2 # 2 model ids for optional world 2 + assert len(db.model_ids_with_protocol(groups='dev')) == 2 # 2 model ids for dev + assert len(db.model_ids_with_protocol(groups='eval')) == 2 # 2 model ids for eval + + assert len(db.tmodel_ids_with_protocol()) == 2 # 2 model ids for T-Norm score normalization + + assert len(db.objects(groups='world')) == 8 # 8 samples in the world set + + assert len(db.objects(groups='dev', purposes='enroll')) == 8 # 8 samples for enrollment in the dev set + assert len(db.objects(groups='dev', purposes='enroll', + model_ids='3')) == 4 # 4 samples for to enroll model '3' in the dev set + assert len(db.objects(groups='dev', purposes='enroll', + model_ids='7')) == 0 # 0 samples for enrolling model '7' (it is a T-Norm model) + assert len(db.objects(groups='dev', purposes='probe')) == 8 # 8 samples as probes in the dev set + assert len( + db.objects(groups='dev', purposes='probe', classes='client')) == 8 # 8 samples as client probes in the dev set + assert len(db.objects(groups='dev', purposes='probe', + classes='impostor')) == 4 # 4 samples as impostor probes in the dev set + + assert len(db.tobjects(groups='dev')) == 8 # 8 samples for enrolling T-norm models + assert len(db.tobjects(groups='dev', model_ids='7')) == 4 # 4 samples for enrolling T-norm model '7' + assert len( + db.tobjects(groups='dev', model_ids='3')) == 0 # 0 samples for enrolling T-norm model '3' (no T-Norm model) + assert len(db.zobjects(groups='dev')) == 8 # 8 samples for Z-norm impostor accesses + + assert db.client_id_from_model_id('1', group=None) == '1' + assert db.client_id_from_model_id('3', group=None) == '3' + assert db.client_id_from_model_id('6', group=None) == '6' + assert db.client_id_from_t_model_id('7', group=None) == '7' + + +def test_query_protocol(): + db = FileListBioDatabase(os.path.dirname(example_dir), 'test', protocol='example_fielist', use_dense_probe_file_list=False) + + assert len(db.groups()) == 5 # 5 groups (dev, eval, world, optional_world_1, optional_world_2) + + assert len(db.client_ids()) == 6 # 6 client ids for world, dev and eval + assert len(db.client_ids(groups='world', )) == 2 # 2 client ids for world + assert len(db.client_ids(groups='optional_world_1', )) == 2 # 2 client ids for optional world 1 + assert len(db.client_ids(groups='optional_world_2', )) == 2 # 2 client ids for optional world 2 + assert len(db.client_ids(groups='dev', )) == 2 # 2 client ids for dev + assert len(db.client_ids(groups='eval', )) == 2 # 2 client ids for eval + + assert len(db.tclient_ids()) == 2 # 2 client ids for T-Norm score normalization + assert len(db.zclient_ids()) == 2 # 2 client ids for Z-Norm score normalization + + assert len(db.model_ids_with_protocol()) == 6 # 6 model ids for world, dev and eval + assert len(db.model_ids_with_protocol(groups='world', )) == 2 # 2 model ids for world + assert len(db.model_ids_with_protocol(groups='optional_world_1', )) == 2 # 2 model ids for optional world 1 + assert len(db.model_ids_with_protocol(groups='optional_world_2', )) == 2 # 2 model ids for optional world 2 + assert len(db.model_ids_with_protocol(groups='dev', )) == 2 # 2 model ids for dev + assert len(db.model_ids_with_protocol(groups='eval', )) == 2 # 2 model ids for eval + + assert len(db.tmodel_ids_with_protocol()) == 2 # 2 model ids for T-Norm score normalization + + assert len(db.objects(groups='world', )) == 8 # 8 samples in the world set + + assert len(db.objects(groups='dev', purposes='enroll', )) == 8 # 8 samples for enrollment in the dev set + assert len(db.objects(groups='dev', purposes='enroll', model_ids='3', + )) == 4 # 4 samples for to enroll model '3' in the dev set + assert len(db.objects(groups='dev', purposes='enroll', model_ids='7', + )) == 0 # 0 samples for enrolling model '7' (it is a T-Norm model) + assert len(db.objects(groups='dev', purposes='probe', )) == 8 # 8 samples as probes in the dev set + assert len(db.objects(groups='dev', purposes='probe', classes='client', + )) == 8 # 8 samples as client probes in the dev set + assert len(db.objects(groups='dev', purposes='probe', classes='impostor', + )) == 4 # 4 samples as impostor probes in the dev set + + assert len(db.tobjects(groups='dev', )) == 8 # 8 samples for enrolling T-norm models + assert len(db.tobjects(groups='dev', model_ids='7', )) == 4 # 4 samples for enrolling T-norm model '7' + assert len(db.tobjects(groups='dev', model_ids='3', + )) == 0 # 0 samples for enrolling T-norm model '3' (no T-Norm model) + assert len(db.zobjects(groups='dev')) == 8 # 8 samples for Z-norm impostor accesses + + assert db.client_id_from_model_id('1', group=None) == '1' + assert db.client_id_from_model_id('3', group=None) == '3' + assert db.client_id_from_model_id('6', group=None) == '6' + assert db.client_id_from_t_model_id('7', group=None) == '7' + + +def test_query_dense(): + db = FileListBioDatabase(example_dir, 'test', probes_filename='for_probes.lst') + + assert len(db.objects(groups='world')) == 8 # 8 samples in the world set + + assert len(db.objects(groups='dev', purposes='enroll')) == 8 # 8 samples for enrollment in the dev set + assert len(db.objects(groups='dev', purposes='probe')) == 8 # 8 samples as probes in the dev set + + +def test_annotation(): + db = FileListBioDatabase(example_dir, 'test', use_dense_probe_file_list=False, + annotation_directory=example_dir, annotation_type='named') + f = [o for o in db.objects() if o.path == "data/model4_session1_sample2"][0] + annots = db.annotations(f) + + assert annots is not None + assert 'key1' in annots + assert 'key2' in annots + assert annots['key1'] == (20, 10) + assert annots['key2'] == (40, 30) + + +def test_multiple_extensions(): + # check that the old behavior still works + db = FileListBioDatabase(example_dir, 'test', use_dense_probe_file_list=False, + original_directory=example_dir, original_extension='.pos') + file = bob.bio.base.database.BioFile(4, "data/model4_session1_sample2", "data/model4_session1_sample2") + file_name = db.original_file_name(file, True) + assert file_name == os.path.join(example_dir, file.path + '.pos') + + # check that the new behavior works as well + db = FileListBioDatabase(example_dir, 'test', use_dense_probe_file_list=False, + original_directory=example_dir, original_extension=['.jpg', '.pos']) + file_name = db.original_file_name(file) + assert file_name == os.path.join(example_dir, file.path + '.pos') + + file = bob.bio.base.database.BioFile(4, "data/model4_session1_sample1", "data/model4_session1_sample1") + try: + file_name = db.original_file_name(file, False) + raised = False + except IOError as e: + raised = True + + assert raised + + +def test_driver_api(): + from bob.db.base.script.dbmanage import main + assert main(('bio_filelist dumplist --list-directory=%s --self-test' % example_dir).split()) == 0 + assert main(( + 'bio_filelist dumplist --list-directory=%s --purpose=enroll --group=dev --class=client --self-test' % example_dir).split()) == 0 + assert main(('bio_filelist checkfiles --list-directory=%s --self-test' % example_dir).split()) == 0 diff --git a/bob/bio/base/test/test_scripts.py b/bob/bio/base/test/test_scripts.py index 18e23edcb0a6b67adf66658eaf75b51f5aa44a20..9958754a65f1ffce20264c0610a6bf0dc54e8049 100644 --- a/bob/bio/base/test/test_scripts.py +++ b/bob/bio/base/test/test_scripts.py @@ -214,6 +214,48 @@ def test_verify_fileset(): _verify(parameters, test_dir, 'test_fileset', ref_modifier="-fileset") +def test_verify_filelist(): + test_dir = tempfile.mkdtemp(prefix='bobtest_') + # define dummy parameters + parameters = [ + '-d', os.path.join(dummy_dir, 'filelist.py'), + '-p', 'dummy', + '-e', 'dummy', + '-a', 'dummy', + '--zt-norm', + '-vs', 'test_filelist', + '--temp-directory', test_dir, + '--result-directory', test_dir, + '--preferred-package', 'bob.bio.base' + ] + + try: + from bob.bio.base.script.verify import main + main(parameters) + + # assert that the score file exists + score_files = [os.path.join(test_dir, 'test_filelist', 'None', norm, 'scores-dev') for norm in ('nonorm', 'ztnorm')] + assert os.path.exists(score_files[0]), "Score file %s does not exist" % score_files[0] + assert os.path.exists(score_files[1]), "Score file %s does not exist" % score_files[1] + + # assert that the scores are are identical (might be in a different order, though + reference_files = [os.path.join(data_dir, 'scores-%s-dev' % norm) for norm in ('nonorm', 'ztnorm')] + + for i in (0,1): + # load scores + a1, b1 = bob.measure.load.split_four_column(score_files[i]) + a2, b2 = bob.measure.load.split_four_column(reference_files[i]) + # sort scores + a1 = sorted(a1); a2 = sorted(a2); b1 = sorted(b1); b2 = sorted(b2) + + # assert that scores are almost equal + assert all(abs(a1[j] - a2[j]) < 1e-6 for j in range(len(a1))) + assert all(abs(b1[j] - b2[j]) < 1e-6 for j in range(len(b1))) + + finally: + shutil.rmtree(test_dir) + + def test_verify_missing(): test_dir = tempfile.mkdtemp(prefix='bobtest_') # define dummy parameters diff --git a/doc/filelist-guide.rst b/doc/filelist-guide.rst new file mode 100644 index 0000000000000000000000000000000000000000..96287eb549859e5504e61562d286875607c31857 --- /dev/null +++ b/doc/filelist-guide.rst @@ -0,0 +1,161 @@ +.. vim: set fileencoding=utf-8 : +.. @author: Manuel Guenther <manuel.guenther@idiap.ch> +.. @date: Fri Aug 29 13:52:39 CEST 2014 + +========================================== + Verification File List Database Guide +========================================== + +The Database Interface +---------------------- + +The :py:class:`bob.bio.base.database.FileListBioDatabase` complies with the standard biometric verification database as described in :ref:`bob.bio.base`. +All functions defined in that interface are properly instantiated, as soon as the user provides the required file lists. + +Creating File Lists +------------------- + +The initial step for using this package is to provide file lists specifying the ``'world'`` (training), ``'dev'`` (development) and ``'eval'`` (evaluation) set to be used by the biometric verification algorithm. +The summarized complete structure of the list base directory (here denoted as ``basedir``) containing all the files should be like this:: + + basedir -- norm -- train_world.lst + | |-- train_optional_world_1.lst + | |-- train_optional_world_2.lst + | + |-- dev -- for_models.lst + | |-- for_probes.lst + | |-- for_scores.lst + | |-- for_tnorm.lst + | |-- for_znorm.lst + | + |-- eval -- for_models.lst + |-- for_probes.lst + |-- for_scores.lst + |-- for_tnorm.lst + |-- for_znorm.lst + + +The file lists contain several information that need to be available for the biometric recognition experiment to run properly. +A complete list of possible information is: + +* ``filename``: The name of the data file, **relative** to the common root of all data files, and **without** file name extension. +* ``client_id``: The name or ID of the subject the biometric traces of which are contained in the data file. + These names are handled as :py:class:`str` objects, so ``001`` is different from ``1``. +* ``model_id``: + + - used for model enrollment: The name or ID of the *client model* that should be enrolled. In most cases, the ``model_id`` is identical to the ``client_id``. + - used for scoring: The name or ID of the *client model* that the probe file should be compared with. + +* ``claimed_client_id``: + + - used for scoring: The ``client_id`` of the client model that the probe file should be compared with. + + +The following list files need to be created: + +- **For training**: + + * *world file*, with default name ``train_world.lst``, in the default sub-directory ``norm``. + It is a 2-column file with format: + + .. code-block:: text + + filename client_id + + * two (optional) *world files*, with default names ``train_optional_world_1.lst`` and ``train_optional_world_2.lst``, in default sub-directory ``norm``. + The format is the same as for the world file. + These files are not needed for most of biometric recognition algorithms, hence, they need to be specified only if the algorithm uses them. + +- **For enrollment**: + + * two *model files* for the development and evaluation set, with default name ``for_models.lst`` in the default sub-directories ``dev`` and ``eval``, respectively. + They are 3-column files with format:: + + .. code-block:: text + + filename model_id client_id + +- **For scoring**: + + There exist two different ways to implement file lists used for scoring. + + * The first (and simpler) variant is to define a file list of probe files, where all probe files will be tested against all models. + Hence, you need to specify two *probe files* for the development and evaluation set, with default name ``for_probes.lst`` in the default sub-directories ``dev`` and ``eval``, respectively. + They are 2-column files with format: + + .. code-block:: text + + filename client_id + + * The other option is to specify a detailed list, which probe file should be be compared with which client model, i.e., two *score files* for the development and evaluation set, with default name ``for_scores.lst`` in the sub-directories ``dev`` and ``eval``, respectively. + These files need to be provided only if the scoring is to be done selectively, meaning by creating a sparse probe/model scoring matrix. + They are 4-column files with format:: + + .. code-block:: text + + filename model_id claimed_client_id client_id + +- **For ZT score normalization**: + + Optionally, file lists for ZT score normalization can be added. + These are + + * two *files for t-score normalization* for the development and evaluation set, with default name ``for_tnorm.lst`` in both sub-directories ``dev`` and ``eval``, respectively. + They are 3-column files with format:: + + .. code-block:: text + + filename model_id client_id + + * two *files for z-score normalization* for the development and evaluation set, with default name ``for_znorm.lst`` in both sub-directories ``dev`` and ``eval``, respectively. + They are 2-column files with format:: + + .. code-block:: text + + filename client_id + +.. note:: The verification queries will use either only the probe or only the score files, so only one of them is mandatory. + In case both probe and score files are provided, the user should set the parameter ``use_dense_probe_file_list``, which specifies the files to consider, when creating the object of the ``Database`` class. + +.. note:: If the database does not provide an evaluation set, the scoring files can be omitted. + Similarly, if the user only define **for scoring** files and omit the remaining ones, the only valid queries will be scoring-related ones. + + + +Protocols and File Lists +------------------------ + +When you instantiate a database, you have to specify the base directory that contains the file lists. +If you have only a single protocol, you could specify the full path to the file lists described above as follows: + +.. code-block:: python + + >>> db = bob.bio.base.database.FileListBioDatabase('basedir/protocol', 'mydb') + +Next, you should query the data, WITHOUT specifying any protocol: + +.. code-block:: python + + >>> db.objects() + +Alternatively, if you have more protocols, you could do the following: + +.. code-block:: python + + >>> db = bob.bio.base.database.FileListBioDatabase('basedir', 'mydb', protocol='protocol') + >>> db.objects() + +When a protocol is specified, it is appended to the base directory that contains the file lists. +If you need to use another protocol, the best option is to create another instance. +For instance, given two protocols 'P1' and 'P2' (with filelists contained in 'basedir/P1' and 'basedir/P2', respectively), the following would work: + +.. code-block:: python + + >>> db1 = bob.bio.base.database.FileListBioDatabase('basedir', 'mydb', protocol='P1') + >>> db2 = bob.bio.base.database.FileListBioDatabase('basedir', 'mydb', protocol='P2') + >>> db1.objects() # Get the objects for the protocol P1 + >>> db2.objects() # Get the objects for the protocol P2 + +Note that if you use several protocols as explained above, the scoring part should be defined in the same way for all the protocols, either by using ``for_probes.lst`` or ``for_scores.lst``. +This means that at the time of the database instantiation, it will be determined (or specified using the ``use_dense_probe_file_list`` optional argument), whether the protocols should use the content of ``for_probes.lst`` or ``for_scores.lst``. +In particular, it is not possible to use a mixture of those for different protocols, once the database object has been created. diff --git a/doc/implementation.rst b/doc/implementation.rst index 8e7c3547ae70f3ae59d3f836f09ce9547d5ae4ab..0230be9aab43fde2b8addf764a2f6495a5209578 100644 --- a/doc/implementation.rst +++ b/doc/implementation.rst @@ -219,20 +219,21 @@ Verification Database Interface For most of the data sets, we rely on the database interfaces from Bob_. Particularly, all databases that are derived from the :py:class:`bob.bio.base.database.BioDatabase` (click `here <https://gitlab.idiap.ch/bob/bob/wikis/Packages>`_ for a list of implemented databases) are supported by a special derivation of the databases from above. -For these databases, the special :py:class:`bob.bio.base.database.BioDatabase` interface is provided, which takes the Bob_ database as parameter. +For these databases, the special :py:class:`bob.bio.base.database.BioDatabase` interface is provided, which wraps the actual Bob_ databases with all their specificities. Several such databases are defined in the according packages, i.e., :ref:`bob.bio.spear <bob.bio.spear>`, :ref:`bob.bio.face <bob.bio.face>` and :ref:`bob.bio.video <bob.bio.video>`. For Bob_'s ZT-norm databases, we provide the :py:class:`bob.bio.base.database.ZTBioDatabase` interface. Defining your own Database ~~~~~~~~~~~~~~~~~~~~~~~~~~ -.. +.. note:: If you have your own database that you want to execute the recognition experiments on, you should - first check if you could use the :ref:`Verifcation File List Database <bob.db.bio_filelist>` interface by - defining appropriate file lists for the training set, the model set, and the probes. - In most of the cases, the :py:class:`bob.db.bio_filelist.Database` should be sufficient to run experiments. - Please refer to the documentation :ref:`Documentation <bob.db.bio_filelist>` of this database for more instructions on how to configure this database. + first check if you could use the ``Verification File List Database`` interface by defining appropriate + file lists for the training set, the model set, and the probes. + Please refer to the documentation :doc:`filelist-guide` of this database for more instructions on how to setup this database. + + For an example, you might want to have a look into the implementation of the `BANCA FileList database <http://gitlab.idiap.ch/bob/bob.bio.spear/tree/master/bob/bio/spear/config/database/banca>`_, where the protocol with the name ``G`` is implemented, and its according `database configuration file <https://gitlab.idiap.ch/bob/bob.bio.spear/blob/master/bob/bio/spear/config/database/banca_audio_G.py>`_. -To "plug" your own database in this framework you have to write your own database class by deriving :py:class:`bob.bio.base.database.BioDatabase`. +To "plug" your own (non-file-list-based) database in this framework you have to write your own database class by deriving :py:class:`bob.bio.base.database.BioDatabase`. In this case, you have to derive your class from the :py:class:`bob.bio.base.database.BioDatabase`, and provide the following functions: diff --git a/doc/index.rst b/doc/index.rst index 3b1c0b3d3ba0d747b3fe1d36dac151e4ed9ef52c..5745a54a75999346ac7492c9cee7c4af9e951bf4 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -52,6 +52,7 @@ Users Guide installation experiments implementation + filelist-guide more ================ diff --git a/setup.py b/setup.py index ff8aa0bfe26433483a275445b0290b85420a6939..ecf073d7cab0768155e6410d833323be01d26a38 100644 --- a/setup.py +++ b/setup.py @@ -122,6 +122,10 @@ setup( 'demanding = bob.bio.base.config.grid.demanding:grid', 'gpu = bob.bio.base.config.grid.gpu:grid', ], + # declare database to bob + 'bob.db': [ + 'bio_filelist = bob.bio.base.database.filelist.driver:Interface', + ], }, # Classifiers are important if you plan to distribute this package through