diff --git a/bob/bio/base/__init__.py b/bob/bio/base/__init__.py index 48dbde118eeb3b217a39ed298c8f7c45e6065dfc..5b2461a9b9400164f5010f4413fbabf26d2c17b3 100644 --- a/bob/bio/base/__init__.py +++ b/bob/bio/base/__init__.py @@ -1,4 +1,5 @@ from .utils import * +from . import database from . import preprocessor from . import extractor from . import algorithm diff --git a/bob/bio/base/database/__init__.py b/bob/bio/base/database/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..bb9010855d0e805269330657ab5c5f6f394dd07a --- /dev/null +++ b/bob/bio/base/database/__init__.py @@ -0,0 +1,7 @@ +from .file import BioFile +from .file import BioFileSet +from .database import BioDatabase +from .database import ZTBioDatabase + +# gets sphinx autodoc done right - don't remove it +__all__ = [_ for _ in dir() if not _.startswith('_')] diff --git a/bob/bio/base/database/database.py b/bob/bio/base/database/database.py new file mode 100644 index 0000000000000000000000000000000000000000..7393ee4d1200479d138d3eefbbd775f8eb85380d --- /dev/null +++ b/bob/bio/base/database/database.py @@ -0,0 +1,834 @@ +#!/usr/bin/env python +# vim: set fileencoding=utf-8 : + +import os +import abc +import six +# Nose is detecting a function as a test function, while it is not... +from numpy.testing.decorators import setastest +import bob.db.base + +import bob.bio.base.database + +class BioDatabase(six.with_metaclass(abc.ABCMeta, bob.db.base.Database)): + def __init__( + self, + name, + all_files_options={}, # additional options for the database query that can be used to extract all files + extractor_training_options={}, + # additional options for the database query that can be used to extract the training files for the extractor training + projector_training_options={}, + # additional options for the database query that can be used to extract the training files for the extractor training + enroller_training_options={}, + # additional options for the database query that can be used to extract the training files for the extractor training + check_original_files_for_existence=False, + original_directory=None, + original_extension=None, + annotation_directory=None, + annotation_extension='.pos', + annotation_type=None, + protocol='Default', + training_depends_on_protocol=False, + models_depend_on_protocol=False, + **kwargs + ): + """This class represents the basic API for database access. + Please use this class as a base class for your database access classes. + Do not forget to call the constructor of this base class in your derived class. + + **Parameters:** + + name : str + A unique name for the database. + + all_files_options : dict + Dictionary of options passed to the :py:meth:`bob.bio.base.database.BioDatabase.objects` database query when retrieving all data. + + extractor_training_options : dict + Dictionary of options passed to the :py:meth:`bob.bio.base.database.BioDatabase.objects` database query used to retrieve the files for the extractor training. + + projector_training_options : dict + Dictionary of options passed to the :py:meth:`bob.bio.base.database.BioDatabase.objects` database query used to retrieve the files for the projector training. + + enroller_training_options : dict + Dictionary of options passed to the :py:meth:`bob.bio.base.database.BioDatabase.objects` database query used to retrieve the files for the enroller training. + + check_original_files_for_existence : bool + Enables to test for the original data files when querying the database. + + original_directory : str + The directory where the original data of the database are stored. + + original_extension : str + The file name extension of the original data. + + annotation_directory : str + The directory where the image annotations of the database are stored, if any. + + annotation_extension : str + The file name extension of the annotation files. + + annotation_type : str + The type of the annotation file to read, see :py:func:`bob.bio.base.database.read_annotation_file` for accepted formats. + + protocol : str or ``None`` + The name of the protocol that defines the default experimental setup for this database. + + .. todo:: Check if the ``None`` protocol is supported. + + training_depends_on_protocol : bool + Specifies, if the training set used for training the extractor and the projector depend on the protocol. + This flag is used to avoid re-computation of data when running on the different protocols of the same database. + + models_depend_on_protocol : bool + Specifies, if the models depend on the protocol. + This flag is used to avoid re-computation of models when running on the different protocols of the same database. + + kwargs : ``key=value`` pairs + The arguments of the :py:class:`Database` base class constructor. + + """ + + assert isinstance(name, str) + + self.name = name + + self.all_files_options = all_files_options + self.extractor_training_options = extractor_training_options + self.projector_training_options = projector_training_options + self.enroller_training_options = enroller_training_options + self.check_existence = check_original_files_for_existence + + self._kwargs = kwargs + + self.original_directory = original_directory + self.original_extension = original_extension + self.annotation_directory = annotation_directory + self.annotation_extension = annotation_extension + self.annotation_type = annotation_type + self.protocol = protocol + self.training_depends_on_protocol = training_depends_on_protocol + self.models_depend_on_protocol = models_depend_on_protocol + self.models_depend_on_protocol = models_depend_on_protocol + + # try if the implemented model_ids_with_protocol() and objects() function have at least the required interface + try: + # create a value that is very unlikely a valid value for anything + test_value = '#6T7+§X' + # test if the parameters of the functions apply + self.model_ids_with_protocol(groups=test_value, protocol=test_value) + self.objects(groups=test_value, protocol=test_value, purposes=test_value, model_ids=(test_value,)) + self.annotations(file=bob.bio.base.database.BioFile(test_value, test_value, test_value)) + except TypeError as e: + # type error indicates that the given parameters are not valid. + raise NotImplementedError(str( + e) + "\nPlease implement:\n - the model_ids_with_protocol(...) function with at least the " + "arguments 'groups' and 'protocol'\n - the objects(...) function with at least the " + "arguments 'groups', 'protocol', 'purposes' and 'model_ids'\n - the annotations() " + "function with at least the arguments 'file_id'.") + except: + # any other error is fine at this stage. + pass + + def __str__(self): + """__str__() -> info + + This function returns all parameters of this class. + + **Returns:** + + info : str + A string containing the full information of all parameters of this class. + """ + params = "name=%s, protocol=%s, original_directory=%s, original_extension=%s" % (self.name, self.protocol, self.original_directory, self.original_extension) + params += ", ".join(["%s=%s" % (key, value) for key, value in self._kwargs.items()]) + params += ", original_directory=%s, original_extension=%s" % (self.original_directory, self.original_extension) + if self.all_files_options: + params += ", all_files_options=%s" % self.all_files_options + if self.extractor_training_options: + params += ", extractor_training_options=%s" % self.extractor_training_options + if self.projector_training_options: + params += ", projector_training_options=%s" % self.projector_training_options + if self.enroller_training_options: + params += ", enroller_training_options=%s" % self.enroller_training_options + + return "%s(%s)" % (str(self.__class__), params) + + ########################################################################### + # Helper functions that you might want to use in derived classes + ########################################################################### + def replace_directories(self, replacements=None): + """This helper function replaces the ``original_directory`` and the ``annotation_directory`` of the database with the directories read from the given replacement file. + + This function is provided for convenience, so that the database configuration files do not need to be modified. + Instead, this function uses the given dictionary of replacements to change the original directory and the original extension (if given). + + The given ``replacements`` can be of type ``dict``, including all replacements, or a file name (as a ``str``), in which case the file is read. + The structure of the file should be: + + .. code-block:: text + + # Comments starting with # and empty lines are ignored + + [YOUR_..._DATA_DIRECTORY] = /path/to/your/data + [YOUR_..._ANNOTATION_DIRECTORY] = /path/to/your/annotations + + If no annotation files are available (e.g. when they are stored inside the ``database``), the annotation directory can be left out. + + **Parameters:** + + replacements : dict or str + A dictionary with replacements, or a name of a file to read the dictionary from. + If the file name does not exist, no directories are replaced. + """ + if replacements is None: + return + if isinstance(replacements, str): + if not os.path.exists(replacements): + return + # Open the database replacement file and reads its content + with open(replacements) as f: + replacements = {} + for line in f: + if line.strip() and not line.startswith("#"): + splits = line.split("=") + assert len(splits) == 2 + replacements[splits[0].strip()] = splits[1].strip() + + assert isinstance(replacements, dict) + + if self.original_directory in replacements: + self.original_directory = replacements[self.original_directory] + + try: + if self.annotation_directory in replacements: + self.annotation_directory = replacements[self.annotation_directory] + except AttributeError: + pass + + def sort(self, files): + """sort(files) -> sorted + + Returns a sorted version of the given list of File's (or other structures that define an 'id' data member). + The files will be sorted according to their id, and duplicate entries will be removed. + + **Parameters:** + + files : [:py:class:`File`] + The list of files to be uniquified and sorted. + + **Returns:** + + sorted : [:py:class:`File`] + The sorted list of files, with duplicate :py:attr:`File.id`\s being removed. + """ + # sort files using their sort function + sorted_files = sorted(files) + # remove duplicates + return [f for i, f in enumerate(sorted_files) if not i or sorted_files[i - 1].id != f.id] + + def uses_probe_file_sets(self, protocol=None): + """Defines if, for the current protocol, the database uses several probe files to generate a score. + Returns True if the given protocol specifies file sets for probes, instead of a single probe file. + In this default implementation, False is returned, throughout. + If you need different behavior, please overload this function in your derived class.""" + return False + + def arrange_by_client(self, files): + """arrange_by_client(files) -> files_by_client + + Arranges the given list of files by client id. + This function returns a list of lists of File's. + + **Parameters:** + + files : :py:class:`File` + A list of files that should be split up by :py:attr:`File.client_id`. + + **Returns:** + + files_by_client : [[:py:class:`File`]] + The list of lists of files, where each sub-list groups the files with the same :py:attr:`File.client_id` + """ + client_files = {} + for file in files: + if file.client_id not in client_files: + client_files[file.client_id] = [] + client_files[file.client_id].append(file) + + files_by_clients = [] + for client in sorted(client_files.keys()): + files_by_clients.append(client_files[client]) + return files_by_clients + + def annotations(self, file): + """ + Returns the annotations for the given File object, if available. + It uses :py:func:`bob.bio.base.database.read_annotation_file` to load the annotations. + + **Parameters:** + + file : :py:class:`File` + The file for which annotations should be returned. + + **Returns:** + + annots : dict or None + The annotations for the file, if available. + """ + if self.annotation_directory: + try: + from bob.db.base.annotations import read_annotation_file + annotation_path = os.path.join(self.annotation_directory, file.path + self.annotation_extension) + return read_annotation_file(annotation_path, self.annotation_type) + except ImportError as e: + raise NotImplementedError(str(e) + " Annotations are not read." % e) + + return None + + def file_names(self, files, directory, extension): + """file_names(files, directory, extension) -> paths + + Returns the full path of the given File objects. + + **Parameters:** + + files : [:py:class:`File`] + The list of file object to retrieve the file names for. + + directory : str + The base directory, where the files can be found. + + extension : str + The file name extension to add to all files. + + **Returns:** + + paths : [str] or [[str]] + The paths extracted for the files, in the same order. + If this database provides file sets, a list of lists of file names is returned, one sub-list for each file set. + """ + # return the paths of the files + if self.uses_probe_file_sets() and files and hasattr(files[0], 'files'): + # List of Filesets: do not remove duplicates + return [[f.make_path(directory, extension) for f in file_set.files] for file_set in files] + else: + # List of files, do not remove duplicates + return [f.make_path(directory, extension) for f in files] + + def original_file_names(self, files): + """original_file_names(files) -> paths + + Returns the full path of the original data of the given File objects. + + **Parameters:** + + files : [:py:class:`File`] + The list of file object to retrieve the original data file names for. + + **Returns:** + + paths : [str] or [[str]] + The paths extracted for the files, in the same order. + If this database provides file sets, a list of lists of file names is returned, one sub-list for each file set. + """ + assert self.original_directory is not None + assert self.original_extension is not None + return self.file_names(files, self.original_directory, self.original_extension) + + ################################################################# + ###### Methods to be overwritten by derived classes ############# + ################################################################# + @abc.abstractmethod + def model_ids_with_protocol(self, groups=None, protocol=None, **kwargs): + """model_ids_with_protocol(groups = None, protocol = None, **kwargs) -> ids + + Returns a list of model ids for the given groups and given protocol. + + **Parameters:** + + groups : one or more of ``('world', 'dev', 'eval')`` + The groups to get the model ids for. + + protocol: a protocol name + + **Returns:** + + ids : [int] or [str] + The list of (unique) model ids for the given groups. + """ + raise NotImplementedError("Please implement this function in derived classes") + + def model_ids(self, groups='dev'): + """model_ids(group = 'dev') -> ids + + Returns a list of model ids for the given group, respecting the current protocol. + + **Parameters:** + + group : one of ``('dev', 'eval')`` + The group to get the model ids for. + + **Returns:** + + ids : [int] or [str] + The list of (unique) model ids for models of the given group. + """ + return sorted(self.model_ids_with_protocol(groups=groups, protocol=self.protocol)) + + @abc.abstractmethod + def objects(self, groups=None, protocol=None, purposes=None, model_ids=None, **kwargs): + """This function returns lists of File objects, which fulfill the given restrictions. + + Keyword parameters: + + groups : str or [str] + The groups of which the clients should be returned. + Usually, groups are one or more elements of ('world', 'dev', 'eval') + + protocol + The protocol for which the clients should be retrieved. + The protocol is dependent on your database. + If you do not have protocols defined, just ignore this field. + + purposes : str or [str] + The purposes for which File objects should be retrieved. + Usually, purposes are one of ('enroll', 'probe'). + + model_ids : [various type] + The model ids for which the File objects should be retrieved. + What defines a 'model id' is dependent on the database. + In cases, where there is only one model per client, model ids and client ids are identical. + In cases, where there is one model per file, model ids and file ids are identical. + But, there might also be other cases. + """ + raise NotImplementedError("This function must be implemented in your derived class.") + + ################################################################# + ######### Methods to provide common functionality ############### + ################################################################# + + def original_file_name(self, file): + """This function returns the original file name for the given File object. + + Keyword parameters: + + file : :py:class:`File` or a derivative + The File objects for which the file name should be retrieved + + Return value : str + The original file name for the given File object + """ + # check if directory is set + if not self.original_directory or not self.original_extension: + raise ValueError( + "The original_directory and/or the original_extension were not specified in the constructor.") + # extract file name + file_name = file.make_path(self.original_directory, self.original_extension) + if not self.check_existence or os.path.exists(file_name): + return file_name + raise ValueError("The file '%s' was not found. Please check the original directory '%s' and extension '%s'?" % ( + file_name, self.original_directory, self.original_extension)) + + def all_files(self, groups=None): + """all_files(groups=None) -> files + + Returns all files of the database, respecting the current protocol. + The files can be limited using the ``all_files_options`` in the constructor. + + **Parameters:** + + groups : some of ``('world', 'dev', 'eval')`` or ``None`` + The groups to get the data for. + If ``None``, data for all groups is returned. + + **Returns:** + + files : [:py:class:`File`] + The sorted and unique list of all files of the database. + """ + return self.sort(self.objects(protocol=self.protocol, groups=groups, **self.all_files_options)) + + def training_files(self, step=None, arrange_by_client=False): + """training_files(step = None, arrange_by_client = False) -> files + + Returns all training files for the given step, and arranges them by client, if desired, respecting the current protocol. + The files for the steps can be limited using the ``..._training_options`` defined in the constructor. + + **Parameters:** + + step : one of ``('train_extractor', 'train_projector', 'train_enroller')`` or ``None`` + The step for which the training data should be returned. + + arrange_by_client : bool + Should the training files be arranged by client? + If set to ``True``, training files will be returned in [[:py:class:`bob.bio.base.database.BioFile`]], where each sub-list contains the files of a single client. + Otherwise, all files will be stored in a simple [:py:class:`bob.bio.base.database.BioFile`]. + + **Returns:** + + files : [:py:class:`File`] or [[:py:class:`File`]] + The (arranged) list of files used for the training of the given step. + """ + if step is None: + training_options = self.all_files_options + elif step == 'train_extractor': + training_options = self.extractor_training_options + elif step == 'train_projector': + training_options = self.projector_training_options + elif step == 'train_enroller': + training_options = self.enroller_training_options + else: + raise ValueError( + "The given step '%s' must be one of ('train_extractor', 'train_projector', 'train_enroller')" % step) + + files = self.sort(self.objects(protocol=self.protocol, groups='world', **training_options)) + if arrange_by_client: + return self.arrange_by_client(files) + else: + return files + + @setastest(False) + def test_files(self, groups=['dev']): + """test_files(groups = ['dev']) -> files + + Returns all test files (i.e., files used for enrollment and probing) for the given groups, respecting the current protocol. + The files for the steps can be limited using the ``all_files_options`` defined in the constructor. + + **Parameters:** + + groups : some of ``('dev', 'eval')`` + The groups to get the data for. + + **Returns:** + + files : [:py:class:`File`] + The sorted and unique list of test files of the database. + """ + return self.sort(self.objects(protocol=self.protocol, groups=groups, **self.all_files_options)) + + def enroll_files(self, model_id=None, group='dev'): + """enroll_files(model_id, group = 'dev') -> files + + Returns a list of File objects that should be used to enroll the model with the given model id from the given group, respecting the current protocol. + If the model_id is None (the default), enrollment files for all models are returned. + + **Parameters:** + + model_id : int or str + A unique ID that identifies the model. + + group : one of ``('dev', 'eval')`` + The group to get the enrollment files for. + + **Returns:** + + files : [:py:class:`bob.bio.base.database.BioFile`] + The list of files used for to enroll the model with the given model id. + """ + if model_id: + return self.sort( + self.objects(protocol=self.protocol, groups=group, model_ids=(model_id,), purposes='enroll', + **self.all_files_options)) + else: + return self.sort( + self.objects(protocol=self.protocol, groups=group, purposes='enroll', **self.all_files_options)) + + def probe_files(self, model_id=None, group='dev'): + """probe_files(model_id = None, group = 'dev') -> files + + Returns a list of probe File objects, respecting the current protocol. + If a ``model_id`` is specified, only the probe files that should be compared with the given model id are returned (for most databases, these are all probe files of the given group). + Otherwise, all probe files of the given group are returned. + + **Parameters:** + + model_id : int or str or ``None`` + A unique ID that identifies the model. + + group : one of ``('dev', 'eval')`` + The group to get the enrollment files for. + + **Returns:** + + files : [:py:class:`File`] + The list of files used for to probe the model with the given model id. + """ + if model_id is not None: + files = self.objects(protocol=self.protocol, groups=group, model_ids=(model_id,), purposes='probe', + **self.all_files_options) + else: + files = self.objects(protocol=self.protocol, groups=group, purposes='probe', **self.all_files_options) + return self.sort(files) + + def object_sets(self, groups=None, protocol=None, purposes=None, model_ids=None, **kwargs): + """This function returns lists of FileSet objects, which fulfill the given restrictions. + + Keyword parameters: + + groups : str or [str] + The groups of which the clients should be returned. + Usually, groups are one or more elements of ('world', 'dev', 'eval') + + protocol + The protocol for which the clients should be retrieved. + The protocol is dependent on your database. + If you do not have protocols defined, just ignore this field. + + purposes : str or [str] + The purposes for which File objects should be retrieved. + Usually, purposes are one of ('enroll', 'probe'). + + model_ids : [various type] + The model ids for which the File objects should be retrieved. + What defines a 'model id' is dependent on the database. + In cases, where there is only one model per client, model ids and client ids are identical. + In cases, where there is one model per file, model ids and file ids are identical. + But, there might also be other cases. + """ + raise NotImplementedError("This function must be implemented in your derived class.") + + def probe_file_sets(self, model_id=None, group='dev'): + """probe_file_sets(model_id = None, group = 'dev') -> files + + Returns a list of probe FileSet objects, respecting the current protocol. + If a ``model_id`` is specified, only the probe files that should be compared with the given model id are returned (for most databases, these are all probe files of the given group). + Otherwise, all probe files of the given group are returned. + + **Parameters:** + + model_id : int or str or ``None`` + A unique ID that identifies the model. + + group : one of ``('dev', 'eval')`` + The group to get the enrollment files for. + + **Returns:** + + files : [:py:class:`FileSet`] or something similar + The list of file sets used to probe the model with the given model id.""" + if model_id is not None: + file_sets = self.object_sets(protocol=self.protocol, groups=group, model_ids=(model_id,), purposes='probe', + **self.all_files_options) + else: + file_sets = self.object_sets(protocol=self.protocol, groups=group, purposes='probe', + **self.all_files_options) + return self.sort(file_sets) + + def client_id_from_model_id(self, model_id, group='dev'): + """Return the client id associated with the given model id. + In this base class implementation, it is assumed that only one model is enrolled for each client and, thus, client id and model id are identical. + All key word arguments are ignored. + Please override this function in derived class implementations to change this behavior.""" + return model_id + + +class ZTBioDatabase(BioDatabase): + """This class defines another set of abstract functions that need to be implemented if your database provides the interface for computing scores used for ZT-normalization.""" + + def __init__(self, + name, + z_probe_options={}, # Limit the z-probes + **kwargs): + """**Construtctor Documentation** + + This constructor tests if all implemented functions take the correct arguments. + All keyword parameters will be passed unaltered to the :py:class:`bob.bio.base.database.BioDatabase` constructor. + """ + # call base class constructor + BioDatabase.__init__(self, name, **kwargs) + + self.z_probe_options = z_probe_options + + # try if the implemented tmodel_ids_with_protocol(), tobjects() and zobjects() function have at least the required interface + try: + # create a value that is very unlikely a valid value for anything + test_value = '#F9S%3*Y' + # test if the parameters of the functions apply + self.tmodel_ids_with_protocol(groups=test_value, protocol=test_value) + self.tobjects(groups=test_value, protocol=test_value, model_ids=test_value) + self.zobjects(groups=test_value, protocol=test_value) + except TypeError as e: + # type error indicates that the given parameters are not valid. + raise NotImplementedError(str( + e) + "\nPlease implement:\n - the tmodel_ids_with_protocol(...) function with at least the " + "arguments 'groups' and 'protocol'\n - the tobjects(...) function with at least the arguments " + "'groups', 'protocol' and 'model_ids'\n - the zobjects(...) function with at " + "least the arguments 'groups' and 'protocol'") + except: + # any other error is fine at this stage. + pass + + @abc.abstractmethod + def tobjects(self, groups=None, protocol=None, model_ids=None, **kwargs): + """This function returns the File objects of the T-Norm models of the given groups for the given protocol and the given model ids. + + Keyword parameters: + + groups : str or [str] + The groups of which the model ids should be returned. + Usually, groups are one or more elements of ('dev', 'eval') + + protocol : str + The protocol for which the model ids should be retrieved. + The protocol is dependent on your database. + If you do not have protocols defined, just ignore this field. + + model_ids : [various type] + The model ids for which the File objects should be retrieved. + What defines a 'model id' is dependent on the database. + In cases, where there is only one model per client, model ids and client ids are identical. + In cases, where there is one model per file, model ids and file ids are identical. + But, there might also be other cases. + """ + raise NotImplementedError("This function must be implemented in your derived class.") + + @abc.abstractmethod + def zobjects(self, groups=None, protocol=None, **kwargs): + """This function returns the File objects of the Z-Norm impostor files of the given groups for the given protocol. + + Keyword parameters: + + groups : str or [str] + The groups of which the model ids should be returned. + Usually, groups are one or more elements of ('dev', 'eval') + + protocol : str + The protocol for which the model ids should be retrieved. + The protocol is dependent on your database. + If you do not have protocols defined, just ignore this field. + """ + raise NotImplementedError("This function must be implemented in your derived class.") + + def all_files(self, groups=['dev']): + """all_files(groups=None) -> files + + Returns all files of the database, including those for ZT norm, respecting the current protocol. + The files can be limited using the ``all_files_options`` and the the ``z_probe_options`` in the constructor. + + **Parameters:** + + groups : some of ``('world', 'dev', 'eval')`` or ``None`` + The groups to get the data for. + If ``None``, data for all groups is returned. + + **Returns:** + + files : [:py:class:`File`] + The sorted and unique list of all files of the database. + """ + files = self.objects(protocol=self.protocol, groups=groups, **self.all_files_options) + + # add all files that belong to the ZT-norm + for group in groups: + if group == 'world': + continue + files += self.tobjects(protocol=self.protocol, groups=group, model_ids=None) + files += self.zobjects(protocol=self.protocol, groups=group, **self.z_probe_options) + return self.sort(files) + + @abc.abstractmethod + def tmodel_ids_with_protocol(self, protocol=None, groups=None, **kwargs): + """This function returns the ids of the T-Norm models of the given groups for the given protocol. + + Keyword parameters: + + groups : str or [str] + The groups of which the model ids should be returned. + Usually, groups are one or more elements of ('dev', 'eval') + + protocol : str + The protocol for which the model ids should be retrieved. + The protocol is dependent on your database. + If you do not have protocols defined, just ignore this field. + """ + raise NotImplementedError("This function must be implemented in your derived class.") + + def t_model_ids(self, groups='dev'): + """t_model_ids(group = 'dev') -> ids + + Returns a list of model ids of T-Norm models for the given group, respecting the current protocol. + + **Parameters:** + + group : one of ``('dev', 'eval')`` + The group to get the model ids for. + + **Returns:** + + ids : [int] or [str] + The list of (unique) model ids for T-Norm models of the given group. + """ + return sorted(self.tmodel_ids_with_protocol(protocol=self.protocol, groups=groups)) + + def t_enroll_files(self, t_model_id, group='dev'): + """t_enroll_files(t_model_id, group = 'dev') -> files + + Returns a list of File objects that should be used to enroll the T-Norm model with the given model id from the given group, respecting the current protocol. + + **Parameters:** + + t_model_id : int or str + A unique ID that identifies the model. + + group : one of ``('dev', 'eval')`` + The group to get the enrollment files for. + + **Returns:** + + files : [:py:class:`File`] + The sorted list of files used for to enroll the model with the given model id. + """ + return self.sort(self.tobjects(protocol=self.protocol, groups=group, model_ids=(t_model_id,))) + + def z_probe_files(self, group='dev'): + """z_probe_files(group = 'dev') -> files + + Returns a list of probe files used to compute the Z-Norm, respecting the current protocol. + The Z-probe files can be limited using the ``z_probe_options`` in the query to :py:meth:`ZTBioDatabase.z_probe_files` + + **Parameters:** + + group : one of ``('dev', 'eval')`` + The group to get the Z-norm probe files for. + + **Returns:** + + files : [:py:class:`File`] + The unique list of files used to compute the Z-norm. + """ + return self.sort(self.zobjects(protocol=self.protocol, groups=group, **self.z_probe_options)) + + def z_probe_file_sets(self, group='dev'): + """z_probe_file_sets(group = 'dev') -> files + + Returns a list of probe FileSet objects used to compute the Z-Norm. + This function needs to be implemented in derived class implementations. + + **Parameters:** + + group : one of ``('dev', 'eval')`` + The group to get the Z-norm probe files for. + + **Returns:** + + files : [:py:class:`FileSet`] + The unique list of file sets used to compute the Z-norm. + """ + raise NotImplementedError("Please implement this function in derived classes") + + def client_id_from_t_model_id(self, t_model_id, group='dev'): + """client_id_from_t_model_id(t_model_id, group = 'dev') -> client_id + Returns the client id for the given T-Norm model id. + In this base class implementation, we just use the :py:meth:`client_id_from_model_id` function. + Overload this function if you need another behavior. + **Parameters:** + t_model_id : int or str + A unique ID that identifies the T-Norm model. + group : one of ``('dev', 'eval')`` + The group to get the client ids for. + **Returns:** + client_id : [int] or [str] + A unique ID that identifies the client, to which the T-Norm model belongs. + """ + return self.client_id_from_model_id(t_model_id, group) diff --git a/bob/bio/base/database/file.py b/bob/bio/base/database/file.py new file mode 100644 index 0000000000000000000000000000000000000000..3dca312725b7e1cb231d22d43445aea4acbd14b2 --- /dev/null +++ b/bob/bio/base/database/file.py @@ -0,0 +1,61 @@ +#!/usr/bin/env python +# vim: set fileencoding=utf-8 : + +import bob.db.base + + +class BioFile(bob.db.base.File): + """A simple base class that defines basic properties of File object for the use in verification experiments""" + + def __init__(self, client_id, path, file_id=None): + """**Constructor Documentation** + + Initialize the File object with the minimum required data. + + Parameters: + + client_id : various type + The id of the client this file belongs to. + Its type depends on your implementation. + If you use an SQL database, this should be an SQL type like Integer or String. + For path and file_id, please refer to :py:class:`bob.db.base.File` constructor + """ + bob.db.base.File.__init__(self, path, file_id) + + # just copy the information + self.client_id = client_id + """The id of the client, to which this file belongs to.""" + + +class BioFileSet(BioFile): + """This class defines the minimum interface of a set of database files that needs to be exported. + Use this class, whenever the database provides several files that belong to the same probe. + Each file set has an id, and a list of associated files, which are of type :py:class:`BioFile` of the same client. + The file set id can be anything hashable, but needs to be unique all over the database. + **Parameters:** + file_set_id : str or int + A unique ID that identifies the file set. + files : [:py:class:`BioFile`] + A non-empty list of BioFile objects that should be stored inside this file. + All files of that list need to have the same client ID. + """ + + def __init__(self, file_set_id, files, path=None): + + # don't accept empty file lists + assert len(files), "Cannot create an empty BioFileSet" + + # call base class constructor + BioFile.__init__(self, files[0].client_id, "+".join(f.path for f in files) if path is None else path, file_set_id) + + # check that all files come from the same client + assert all(f.client_id == self.client_id for f in files) + + # The list of files contained in this set + self.files = files + """The list of :py:class:`BioFile` objects stored in this file set""" + + def __lt__(self, other): + """Defines an order between file sets by using the order of the file set ids.""" + # compare two BioFile set objects by comparing their IDs + return self.id < other.id diff --git a/bob/bio/base/preprocessor/Filename.py b/bob/bio/base/preprocessor/Filename.py index 06db452fe7b5d051d3f949a86fad7eff88a07c56..4c0fddd5c423555eb2357206122ac48762f6359a 100644 --- a/bob/bio/base/preprocessor/Filename.py +++ b/bob/bio/base/preprocessor/Filename.py @@ -10,11 +10,12 @@ from .Preprocessor import Preprocessor class Filename (Preprocessor): """This preprocessor is simply passing over the file name, in order to be used in an extractor that loads the data from file. - The file name that will be returned by the :py:meth:`read_data` function will contain the path of the :py:class:`bob.bio.db.BioFile`, but it might contain more paths (such as the ``--preprocessed-directory`` passed on command line). + The file name that will be returned by the :py:meth:`read_data` function will contain the path of the :py:class:`bob.bio.base.database.BioFile`, but it might contain more paths (such as the ``--preprocessed-directory`` passed on command line). """ def __init__(self): - Preprocessor.__init__(self, writes_data=False) + # call base class constructor, using a custom ``read_original_data`` that does nothing and always returns None + Preprocessor.__init__(self, writes_data=False, read_original_data = lambda x,y,z: None) # The call function (i.e. the operator() in C++ terms) @@ -40,28 +41,6 @@ class Filename (Preprocessor): return 1 - ############################################################ - ### Special functions that might be overwritten on need - ############################################################ - - def read_original_data(self, original_file_name): - """read_original_data(original_file_name) -> data - - This function does **not** read the original image.. - - **Parameters:** - - ``original_file_name`` : any - ignored - - **Returns:** - - ``data`` : ``None`` - throughout. - """ - pass - - def write_data(self, data, data_file): """Does **not** write any data. diff --git a/bob/bio/base/preprocessor/Preprocessor.py b/bob/bio/base/preprocessor/Preprocessor.py index 268a7b7a16f2fb67864a98f526eca7e64f78f4ca..ba9e4fcd5be34e13e3565afb67c4df58b97fce4c 100644 --- a/bob/bio/base/preprocessor/Preprocessor.py +++ b/bob/bio/base/preprocessor/Preprocessor.py @@ -3,10 +3,6 @@ # @author: Manuel Guenther <Manuel.Guenther@idiap.ch> # @date: Tue Oct 2 12:12:39 CEST 2012 -import bob.io.base - -import os - from .. import utils class Preprocessor: @@ -18,14 +14,19 @@ class Preprocessor: writes_data : bool Select, if the preprocessor actually writes preprocessed images, or if it is simply returning values. + read_original_data: callable + This function is used to read the original data from file. + It takes three inputs: A :py:class:`bob.bio.base.database.BioFile` (or one of its derivatives), the original directory (as ``str``) and the original extension (as ``str``). + kwargs : ``key=value`` pairs A list of keyword arguments to be written in the :py:meth:`__str__` function. """ - def __init__(self, writes_data = True, **kwargs): + def __init__(self, writes_data = True, read_original_data = lambda biofile,directory,extension : biofile.load(directory,extension), **kwargs): # Each class needs to have a constructor taking # all the parameters that are required for the preprocessing as arguments self.writes_data = writes_data + self.read_original_data = read_original_data self._kwargs = kwargs pass @@ -70,25 +71,6 @@ class Preprocessor: ### Special functions that might be overwritten on need ############################################################ - def read_original_data(self, original_file_name): - """read_original_data(original_file_name) -> data - - Reads the *original* data (usually something like an image) from file. - In this base class implementation, it uses :py:func:`bob.io.base.load` to do that. - If you have different format, please overwrite this function. - - **Parameters:** - - original_file_name : str - The file name to read the original data from. - - **Returns:** - - data : object (usually :py:class:`numpy.ndarray`) - The original data read from file. - """ - return bob.io.base.load(original_file_name) - def write_data(self, data, data_file): """Writes the given *preprocessed* data to a file with the given name. diff --git a/bob/bio/base/script/preprocess.py b/bob/bio/base/script/preprocess.py index 1014196ac414a4cb7307fdbc1db5e1171cc95de9..ccc837046d97e089f5ce97dfc6fffb446e794a4f 100644 --- a/bob/bio/base/script/preprocess.py +++ b/bob/bio/base/script/preprocess.py @@ -6,6 +6,7 @@ import bob.core logger = bob.core.log.setup("bob.bio.base") import bob.bio.base +from bob.bio.base.database.file import BioFile import bob.db.base import numpy @@ -46,7 +47,7 @@ def main(command_line_parameters=None): preprocessor = bob.bio.base.load_resource(' '.join(args.preprocessor), "preprocessor") logger.debug("Loading input data from file '%s'%s", args.input_file, " and '%s'" % args.annotation_file if args.annotation_file is not None else "") - data = preprocessor.read_original_data(args.input_file) + data = preprocessor.read_original_data(BioFile(1, args.input_file, 2), "", "") annotations = bob.db.base.annotations.read_annotation_file(args.annotation_file, 'named') if args.annotation_file is not None else None logger.info("Preprocessing data") diff --git a/bob/bio/base/test/dummy/database.py b/bob/bio/base/test/dummy/database.py index a44e0ec2fc2f2184d91735e195153f739ac1876e..e8cfac92044055f84a343cae0a226b92a9fb94f8 100644 --- a/bob/bio/base/test/dummy/database.py +++ b/bob/bio/base/test/dummy/database.py @@ -1,4 +1,5 @@ -from bob.bio.db import ZTBioDatabase +from bob.bio.base.database import ZTBioDatabase +from bob.bio.base.database.file import BioFile from bob.bio.base.test.utils import atnt_database_directory @@ -17,11 +18,14 @@ class DummyDatabase(ZTBioDatabase): import bob.db.atnt self.__db = bob.db.atnt.Database() + def _make_bio(self, files): + return [BioFile(client_id=f.client_id, path=f.path, file_id=f.id) for f in files] + def model_ids_with_protocol(self, groups=None, protocol=None, **kwargs): return self.__db.model_ids(groups, protocol) def objects(self, groups=None, protocol=None, purposes=None, model_ids=None, **kwargs): - return self.__db.objects(model_ids, groups, purposes, protocol, **kwargs) + return self._make_bio(self.__db.objects(model_ids, groups, purposes, protocol, **kwargs)) def tobjects(self, groups=None, protocol=None, model_ids=None, **kwargs): return [] diff --git a/bob/bio/base/test/dummy/fileset.py b/bob/bio/base/test/dummy/fileset.py index 570f2eb085e972b6cbeb6c7a742a2f337e52e808..e4b3368f90626e23e61d4093f9ddac9775ff5635 100644 --- a/bob/bio/base/test/dummy/fileset.py +++ b/bob/bio/base/test/dummy/fileset.py @@ -1,7 +1,6 @@ -from bob.bio.db import ZTBioDatabase, BioFileSet, BioFile +from bob.bio.base.database import ZTBioDatabase, BioFileSet, BioFile from bob.bio.base.test.utils import atnt_database_directory - class DummyDatabase(ZTBioDatabase): def __init__(self): @@ -20,25 +19,21 @@ class DummyDatabase(ZTBioDatabase): def uses_probe_file_sets(self): return True + def _make_bio(self, files): + return [BioFile(client_id=f.client_id, path=f.path, file_id=f.id) for f in files] + def probe_file_sets(self, model_id=None, group='dev'): """Returns the list of probe File objects (for the given model id, if given).""" - # import ipdb; ipdb.set_trace() files = self.arrange_by_client(self.sort(self.objects(protocol=None, groups=group, purposes='probe'))) # arrange files by clients - file_sets = [] - for client_files in files: - # convert into our File objects (so that they are tested as well) - our_files = [BioFile(f.client_id, f.path, f.id) for f in client_files] - # generate file set for each client - file_set = BioFileSet(our_files[0].client_id, our_files) - file_sets.append(file_set) + file_sets = [BioFileSet(client_files[0].client_id, client_files) for client_files in files] return file_sets def model_ids_with_protocol(self, groups=None, protocol=None, **kwargs): return self.__db.model_ids(groups, protocol) def objects(self, groups=None, protocol=None, purposes=None, model_ids=None, **kwargs): - return self.__db.objects(model_ids, groups, purposes, protocol, **kwargs) + return self._make_bio(self.__db.objects(model_ids, groups, purposes, protocol, **kwargs)) def tobjects(self, groups=None, protocol=None, model_ids=None, **kwargs): return [] diff --git a/bob/bio/base/test/test_preprocessor.py b/bob/bio/base/test/test_preprocessor.py index bd78b60d25e4cd716cb8e67e0d2445a22c98480c..e822c1e5207e3bc7d0cacd62f3cfbe61608ace41 100644 --- a/bob/bio/base/test/test_preprocessor.py +++ b/bob/bio/base/test/test_preprocessor.py @@ -1,7 +1,5 @@ import bob.bio.base -from . import utils - def test_filename(): # load extractor preprocessor = bob.bio.base.load_resource("filename", "preprocessor", preferred_package = 'bob.bio.base') @@ -9,7 +7,7 @@ def test_filename(): assert isinstance(preprocessor, bob.bio.base.preprocessor.Filename) # try to load the original image - assert preprocessor.read_original_data("/any/path") is None + assert preprocessor.read_original_data(bob.bio.base.database.file.BioFile(1,"2",3), "/any/path", ".any.extension") is None # try to process assert preprocessor(None, None) == 1 diff --git a/bob/bio/base/tools/command_line.py b/bob/bio/base/tools/command_line.py index 33f0ed662fe7cb153cb48f339afa58889f2a5953..c411e21e5d49a75c16709bd68dbc829906f09eda 100644 --- a/bob/bio/base/tools/command_line.py +++ b/bob/bio/base/tools/command_line.py @@ -8,7 +8,7 @@ logger = bob.core.log.setup("bob.bio.base") from .. import utils from . import FileSelector -from bob.bio.db import BioDatabase +from bob.bio.base.database import BioDatabase """Execute biometric recognition algorithms on a certain biometric database. """ diff --git a/bob/bio/base/tools/preprocessor.py b/bob/bio/base/tools/preprocessor.py index 95a3772a758462f3221ecd31cf58c98344c528be..18cd9882a57924a472de60f3759e10f22335d4fb 100644 --- a/bob/bio/base/tools/preprocessor.py +++ b/bob/bio/base/tools/preprocessor.py @@ -61,18 +61,12 @@ def preprocess(preprocessor, groups = None, indices = None, allow_missing_files for i in index_range: preprocessed_data_file = preprocessed_data_files[i] file_object = data_files[i] - if isinstance(file_object, list): - file_name = [f.make_path(original_directory, original_extension) for f in file_object] - else: - file_name = file_object.make_path(original_directory, original_extension) + file_name = file_object.make_path(original_directory, original_extension) # check for existence if not utils.check_file(preprocessed_data_file, force, 1000): logger.debug("... Processing original data file '%s'", file_name) - if hasattr(file_object, 'load'): - data = file_object.load(original_directory, original_extension) - else: - data = preprocessor.read_original_data(file_name) + data = preprocessor.read_original_data(file_object, original_directory, original_extension) # create output directory before reading the data file (is sometimes required, when relative directories are specified, especially, including a .. somewhere) bob.io.base.create_directories_safe(os.path.dirname(preprocessed_data_file)) diff --git a/develop.cfg b/develop.cfg index ce6a1b19c5e6608bc8c09ff59722bd5c73007465..de0adab95f4cbf87733651db349a770607ed82e5 100644 --- a/develop.cfg +++ b/develop.cfg @@ -25,7 +25,6 @@ develop = src/bob.extension src/bob.learn.linear src/bob.learn.em src/bob.db.atnt - src/bob.bio.db . ; options for bob.buildout @@ -47,7 +46,6 @@ bob.learn.activation = git git@gitlab.idiap.ch:bob/bob.learn.activation bob.learn.linear = git git@gitlab.idiap.ch:bob/bob.learn.linear bob.learn.em = git git@gitlab.idiap.ch:bob/bob.learn.em bob.db.atnt = git git@gitlab.idiap.ch:bob/bob.db.atnt -bob.bio.db = git git@gitlab.idiap.ch:bob/bob.bio.db [scripts] recipe = bob.buildout:scripts diff --git a/doc/implementation.rst b/doc/implementation.rst index b7bdcf7d27b300e493fbc62c53b374205c4ebf45..0777226226cac24dea92cde04eb3663af261ec4f 100644 --- a/doc/implementation.rst +++ b/doc/implementation.rst @@ -218,13 +218,13 @@ Verification Database Interface ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ For most of the data sets, we rely on the database interfaces from Bob_. -Particularly, all databases that are derived from the :py:class:`bob.bio.db.BioDatabase` (click :ref:`here <verification_databases>` for a list of implemented databases) are supported by a special derivation of the databases from above. +Particularly, all databases that are derived from the :py:class:`bob.bio.base.database.BioDatabase` (click :ref:`here <verification_databases>` for a list of implemented databases) are supported by a special derivation of the databases from above. For these databases, the special :py:class:`bob.bio.base.database.DatabaseBob` interface is provided, which takes the Bob_ database as parameter. Several such databases are defined in the according packages, i.e., :ref:`bob.bio.spear <bob.bio.spear>`, :ref:`bob.bio.face <bob.bio.face>` and :ref:`bob.bio.video <bob.bio.video>`. For Bob_'s ZT-norm databases, we provide the :py:class:`bob.bio.base.database.DatabaseBobZT` interface. Additionally, a generic database interface, which is derived from :py:class:`bob.bio.base.database.DatabaseBobZT`, is the :py:class:`bob.bio.base.database.DatabaseFileList`. -This database interfaces with the :py:class:`bob.db.verification.filelist.Database`, which is a generic database based on file lists, implementing the :py:class:`bob.bio.db.BioDatabase` interface. +This database interfaces with the :py:class:`bob.db.verification.filelist.Database`, which is a generic database based on file lists, implementing the :py:class:`bob.bio.base.database.BioDatabase` interface. Defining your own Database ~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/requirements.txt b/requirements.txt index 97c8574647adf403007f648acb78b847d8d1fa1a..5f863c468a4d0b8e4e819b893f7a5baf3c46b72e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,3 @@ -bob.bio.db bob.blitz bob.core bob.db.base