diff --git a/bob/bio/base/__init__.py b/bob/bio/base/__init__.py index 48dbde118eeb3b217a39ed298c8f7c45e6065dfc..5b2461a9b9400164f5010f4413fbabf26d2c17b3 100644 --- a/bob/bio/base/__init__.py +++ b/bob/bio/base/__init__.py @@ -1,4 +1,5 @@ from .utils import * +from . import database from . import preprocessor from . import extractor from . import algorithm diff --git a/bob/bio/base/database/__init__.py b/bob/bio/base/database/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..bb9010855d0e805269330657ab5c5f6f394dd07a --- /dev/null +++ b/bob/bio/base/database/__init__.py @@ -0,0 +1,7 @@ +from .file import BioFile +from .file import BioFileSet +from .database import BioDatabase +from .database import ZTBioDatabase + +# gets sphinx autodoc done right - don't remove it +__all__ = [_ for _ in dir() if not _.startswith('_')] diff --git a/bob/bio/base/database/database.py b/bob/bio/base/database/database.py new file mode 100644 index 0000000000000000000000000000000000000000..2190b1373ba0df18463e1b086783897c65781e32 --- /dev/null +++ b/bob/bio/base/database/database.py @@ -0,0 +1,834 @@ +#!/usr/bin/env python +# vim: set fileencoding=utf-8 : + +import os +import abc +import six +# Nose is detecting a function as a test function, while it is not... +from numpy.testing.decorators import setastest +import bob.db.base + +import bob.bio.base.database + +class BioDatabase(six.with_metaclass(abc.ABCMeta, bob.db.base.Database)): + def __init__( + self, + name, + all_files_options={}, # additional options for the database query that can be used to extract all files + extractor_training_options={}, + # additional options for the database query that can be used to extract the training files for the extractor training + projector_training_options={}, + # additional options for the database query that can be used to extract the training files for the extractor training + enroller_training_options={}, + # additional options for the database query that can be used to extract the training files for the extractor training + check_original_files_for_existence=False, + original_directory=None, + original_extension=None, + annotation_directory=None, + annotation_extension='.pos', + annotation_type=None, + protocol='Default', + training_depends_on_protocol=False, + models_depend_on_protocol=False, + **kwargs + ): + """This class represents the basic API for database access. + Please use this class as a base class for your database access classes. + Do not forget to call the constructor of this base class in your derived class. + + **Parameters:** + + name : str + A unique name for the database. + + all_files_options : dict + Dictionary of options passed to the :py:meth:`bob.bio.base.database.BioDatabase.objects` database query when retrieving all data. + + extractor_training_options : dict + Dictionary of options passed to the :py:meth:`bob.bio.base.database.BioDatabase.objects` database query used to retrieve the files for the extractor training. + + projector_training_options : dict + Dictionary of options passed to the :py:meth:`bob.bio.base.database.BioDatabase.objects` database query used to retrieve the files for the projector training. + + enroller_training_options : dict + Dictionary of options passed to the :py:meth:`bob.bio.base.database.BioDatabase.objects` database query used to retrieve the files for the enroller training. + + check_original_files_for_existence : bool + Enables to test for the original data files when querying the database. + + original_directory : str + The directory where the original data of the database are stored. + + original_extension : str + The file name extension of the original data. + + annotation_directory : str + The directory where the image annotations of the database are stored, if any. + + annotation_extension : str + The file name extension of the annotation files. + + annotation_type : str + The type of the annotation file to read, see :py:func:`bob.bio.base.database.read_annotation_file` for accepted formats. + + protocol : str or ``None`` + The name of the protocol that defines the default experimental setup for this database. + + .. todo:: Check if the ``None`` protocol is supported. + + training_depends_on_protocol : bool + Specifies, if the training set used for training the extractor and the projector depend on the protocol. + This flag is used to avoid re-computation of data when running on the different protocols of the same database. + + models_depend_on_protocol : bool + Specifies, if the models depend on the protocol. + This flag is used to avoid re-computation of models when running on the different protocols of the same database. + + kwargs : ``key=value`` pairs + The arguments of the :py:class:`Database` base class constructor. + + """ + + assert isinstance(name, str) + + self.name = name + + self.all_files_options = all_files_options + self.extractor_training_options = extractor_training_options + self.projector_training_options = projector_training_options + self.enroller_training_options = enroller_training_options + self.check_existence = check_original_files_for_existence + + self._kwargs = kwargs + + self.original_directory = original_directory + self.original_extension = original_extension + self.annotation_directory = annotation_directory + self.annotation_extension = annotation_extension + self.annotation_type = annotation_type + self.protocol = protocol + self.training_depends_on_protocol = training_depends_on_protocol + self.models_depend_on_protocol = models_depend_on_protocol + self.models_depend_on_protocol = models_depend_on_protocol + + # try if the implemented model_ids_with_protocol() and objects() function have at least the required interface + try: + # create a value that is very unlikely a valid value for anything + test_value = '#6T7+§X' + # test if the parameters of the functions apply + self.model_ids_with_protocol(groups=test_value, protocol=test_value) + self.objects(groups=test_value, protocol=test_value, purposes=test_value, model_ids=(test_value,)) + self.annotations(file=bob.bio.base.database.BioFile(test_value, test_value, test_value)) + except TypeError as e: + # type error indicates that the given parameters are not valid. + raise NotImplementedError(str( + e) + "\nPlease implement:\n - the model_ids_with_protocol(...) function with at least the " + "arguments 'groups' and 'protocol'\n - the objects(...) function with at least the " + "arguments 'groups', 'protocol', 'purposes' and 'model_ids'\n - the annotations() " + "function with at least the arguments 'file_id'.") + except: + # any other error is fine at this stage. + pass + + def __str__(self): + """__str__() -> info + + This function returns all parameters of this class. + + **Returns:** + + info : str + A string containing the full information of all parameters of this class. + """ + params = "name=%s, protocol=%s, original_directory=%s, original_extension=%s" % (self.name, self.protocol, self.original_directory, self.original_extension) + params += ", ".join(["%s=%s" % (key, value) for key, value in self._kwargs.items()]) + params += ", original_directory=%s, original_extension=%s" % (self.original_directory, self.original_extension) + if self.all_files_options: + params += ", all_files_options=%s" % self.all_files_options + if self.extractor_training_options: + params += ", extractor_training_options=%s" % self.extractor_training_options + if self.projector_training_options: + params += ", projector_training_options=%s" % self.projector_training_options + if self.enroller_training_options: + params += ", enroller_training_options=%s" % self.enroller_training_options + + return "%s(%s)" % (str(self.__class__), params) + + ########################################################################### + # Helper functions that you might want to use in derived classes + ########################################################################### + def replace_directories(self, replacements=None): + """This helper function replaces the ``original_directory`` and the ``annotation_directory`` of the database with the directories read from the given replacement file. + + This function is provided for convenience, so that the database configuration files do not need to be modified. + Instead, this function uses the given dictionary of replacements to change the original directory and the original extension (if given). + + The given ``replacements`` can be of type ``dict``, including all replacements, or a file name (as a ``str``), in which case the file is read. + The structure of the file should be: + + .. code-block:: text + + # Comments starting with # and empty lines are ignored + + [YOUR_..._DATA_DIRECTORY] = /path/to/your/data + [YOUR_..._ANNOTATION_DIRECTORY] = /path/to/your/annotations + + If no annotation files are available (e.g. when they are stored inside the ``database``), the annotation directory can be left out. + + **Parameters:** + + replacements : dict or str + A dictionary with replacements, or a name of a file to read the dictionary from. + If the file name does not exist, no directories are replaced. + """ + if replacements is None: + return + if isinstance(replacements, str): + if not os.path.exists(replacements): + return + # Open the database replacement file and reads its content + with open(replacements) as f: + replacements = {} + for line in f: + if line.strip() and not line.startswith("#"): + splits = line.split("=") + assert len(splits) == 2 + replacements[splits[0].strip()] = splits[1].strip() + + assert isinstance(replacements, dict) + + if self.original_directory in replacements: + self.original_directory = replacements[self.original_directory] + + try: + if self.annotation_directory in replacements: + self.annotation_directory = replacements[self.annotation_directory] + except AttributeError: + pass + + def sort(self, files): + """sort(files) -> sorted + + Returns a sorted version of the given list of File's (or other structures that define an 'id' data member). + The files will be sorted according to their id, and duplicate entries will be removed. + + **Parameters:** + + files : [:py:class:`File`] + The list of files to be uniquified and sorted. + + **Returns:** + + sorted : [:py:class:`File`] + The sorted list of files, with duplicate :py:attr:`File.id`\s being removed. + """ + # sort files using their sort function + sorted_files = sorted(files) + # remove duplicates + return [f for i, f in enumerate(sorted_files) if not i or sorted_files[i - 1].id != f.id] + + def uses_probe_file_sets(self, protocol=None): + """Defines if, for the current protocol, the database uses several probe files to generate a score. + Returns True if the given protocol specifies file sets for probes, instead of a single probe file. + In this default implementation, False is returned, throughout. + If you need different behavior, please overload this function in your derived class.""" + return False + + def arrange_by_client(self, files): + """arrange_by_client(files) -> files_by_client + + Arranges the given list of files by client id. + This function returns a list of lists of File's. + + **Parameters:** + + files : :py:class:`File` + A list of files that should be split up by :py:attr:`File.client_id`. + + **Returns:** + + files_by_client : [[:py:class:`File`]] + The list of lists of files, where each sub-list groups the files with the same :py:attr:`File.client_id` + """ + client_files = {} + for file in files: + if file.client_id not in client_files: + client_files[file.client_id] = [] + client_files[file.client_id].append(file) + + files_by_clients = [] + for client in sorted(client_files.keys()): + files_by_clients.append(client_files[client]) + return files_by_clients + + def annotations(self, file): + """ + Returns the annotations for the given File object, if available. + It uses :py:func:`bob.bio.base.database.read_annotation_file` to load the annotations. + + **Parameters:** + + file : :py:class:`File` + The file for which annotations should be returned. + + **Returns:** + + annots : dict or None + The annotations for the file, if available. + """ + if self.annotation_directory: + try: + from .annotations import read_annotation_file + annotation_path = os.path.join(self.annotation_directory, file.path + self.annotation_extension) + return read_annotation_file(annotation_path, self.annotation_type) + except ImportError as e: + raise NotImplementedError(str(e) + " Annotations are not read." % e) + + return None + + def file_names(self, files, directory, extension): + """file_names(files, directory, extension) -> paths + + Returns the full path of the given File objects. + + **Parameters:** + + files : [:py:class:`File`] + The list of file object to retrieve the file names for. + + directory : str + The base directory, where the files can be found. + + extension : str + The file name extension to add to all files. + + **Returns:** + + paths : [str] or [[str]] + The paths extracted for the files, in the same order. + If this database provides file sets, a list of lists of file names is returned, one sub-list for each file set. + """ + # return the paths of the files + if self.uses_probe_file_sets() and files and hasattr(files[0], 'files'): + # List of Filesets: do not remove duplicates + return [[f.make_path(directory, extension) for f in file_set.files] for file_set in files] + else: + # List of files, do not remove duplicates + return [f.make_path(directory, extension) for f in files] + + def original_file_names(self, files): + """original_file_names(files) -> paths + + Returns the full path of the original data of the given File objects. + + **Parameters:** + + files : [:py:class:`File`] + The list of file object to retrieve the original data file names for. + + **Returns:** + + paths : [str] or [[str]] + The paths extracted for the files, in the same order. + If this database provides file sets, a list of lists of file names is returned, one sub-list for each file set. + """ + assert self.original_directory is not None + assert self.original_extension is not None + return self.file_names(files, self.original_directory, self.original_extension) + + ################################################################# + ###### Methods to be overwritten by derived classes ############# + ################################################################# + @abc.abstractmethod + def model_ids_with_protocol(self, groups=None, protocol=None, **kwargs): + """model_ids_with_protocol(groups = None, protocol = None, **kwargs) -> ids + + Returns a list of model ids for the given groups and given protocol. + + **Parameters:** + + groups : one or more of ``('world', 'dev', 'eval')`` + The groups to get the model ids for. + + protocol: a protocol name + + **Returns:** + + ids : [int] or [str] + The list of (unique) model ids for the given groups. + """ + raise NotImplementedError("Please implement this function in derived classes") + + def model_ids(self, groups='dev'): + """model_ids(group = 'dev') -> ids + + Returns a list of model ids for the given group, respecting the current protocol. + + **Parameters:** + + group : one of ``('dev', 'eval')`` + The group to get the model ids for. + + **Returns:** + + ids : [int] or [str] + The list of (unique) model ids for models of the given group. + """ + return sorted(self.model_ids_with_protocol(groups=groups, protocol=self.protocol)) + + @abc.abstractmethod + def objects(self, groups=None, protocol=None, purposes=None, model_ids=None, **kwargs): + """This function returns lists of File objects, which fulfill the given restrictions. + + Keyword parameters: + + groups : str or [str] + The groups of which the clients should be returned. + Usually, groups are one or more elements of ('world', 'dev', 'eval') + + protocol + The protocol for which the clients should be retrieved. + The protocol is dependent on your database. + If you do not have protocols defined, just ignore this field. + + purposes : str or [str] + The purposes for which File objects should be retrieved. + Usually, purposes are one of ('enroll', 'probe'). + + model_ids : [various type] + The model ids for which the File objects should be retrieved. + What defines a 'model id' is dependent on the database. + In cases, where there is only one model per client, model ids and client ids are identical. + In cases, where there is one model per file, model ids and file ids are identical. + But, there might also be other cases. + """ + raise NotImplementedError("This function must be implemented in your derived class.") + + ################################################################# + ######### Methods to provide common functionality ############### + ################################################################# + + def original_file_name(self, file): + """This function returns the original file name for the given File object. + + Keyword parameters: + + file : :py:class:`File` or a derivative + The File objects for which the file name should be retrieved + + Return value : str + The original file name for the given File object + """ + # check if directory is set + if not self.original_directory or not self.original_extension: + raise ValueError( + "The original_directory and/or the original_extension were not specified in the constructor.") + # extract file name + file_name = file.make_path(self.original_directory, self.original_extension) + if not self.check_existence or os.path.exists(file_name): + return file_name + raise ValueError("The file '%s' was not found. Please check the original directory '%s' and extension '%s'?" % ( + file_name, self.original_directory, self.original_extension)) + + def all_files(self, groups=None): + """all_files(groups=None) -> files + + Returns all files of the database, respecting the current protocol. + The files can be limited using the ``all_files_options`` in the constructor. + + **Parameters:** + + groups : some of ``('world', 'dev', 'eval')`` or ``None`` + The groups to get the data for. + If ``None``, data for all groups is returned. + + **Returns:** + + files : [:py:class:`File`] + The sorted and unique list of all files of the database. + """ + return self.sort(self.objects(protocol=self.protocol, groups=groups, **self.all_files_options)) + + def training_files(self, step=None, arrange_by_client=False): + """training_files(step = None, arrange_by_client = False) -> files + + Returns all training files for the given step, and arranges them by client, if desired, respecting the current protocol. + The files for the steps can be limited using the ``..._training_options`` defined in the constructor. + + **Parameters:** + + step : one of ``('train_extractor', 'train_projector', 'train_enroller')`` or ``None`` + The step for which the training data should be returned. + + arrange_by_client : bool + Should the training files be arranged by client? + If set to ``True``, training files will be returned in [[:py:class:`bob.bio.base.database.BioFile`]], where each sub-list contains the files of a single client. + Otherwise, all files will be stored in a simple [:py:class:`bob.bio.base.database.BioFile`]. + + **Returns:** + + files : [:py:class:`File`] or [[:py:class:`File`]] + The (arranged) list of files used for the training of the given step. + """ + if step is None: + training_options = self.all_files_options + elif step == 'train_extractor': + training_options = self.extractor_training_options + elif step == 'train_projector': + training_options = self.projector_training_options + elif step == 'train_enroller': + training_options = self.enroller_training_options + else: + raise ValueError( + "The given step '%s' must be one of ('train_extractor', 'train_projector', 'train_enroller')" % step) + + files = self.sort(self.objects(protocol=self.protocol, groups='world', **training_options)) + if arrange_by_client: + return self.arrange_by_client(files) + else: + return files + + @setastest(False) + def test_files(self, groups=['dev']): + """test_files(groups = ['dev']) -> files + + Returns all test files (i.e., files used for enrollment and probing) for the given groups, respecting the current protocol. + The files for the steps can be limited using the ``all_files_options`` defined in the constructor. + + **Parameters:** + + groups : some of ``('dev', 'eval')`` + The groups to get the data for. + + **Returns:** + + files : [:py:class:`File`] + The sorted and unique list of test files of the database. + """ + return self.sort(self.objects(protocol=self.protocol, groups=groups, **self.all_files_options)) + + def enroll_files(self, model_id=None, group='dev'): + """enroll_files(model_id, group = 'dev') -> files + + Returns a list of File objects that should be used to enroll the model with the given model id from the given group, respecting the current protocol. + If the model_id is None (the default), enrollment files for all models are returned. + + **Parameters:** + + model_id : int or str + A unique ID that identifies the model. + + group : one of ``('dev', 'eval')`` + The group to get the enrollment files for. + + **Returns:** + + files : [:py:class:`bob.bio.base.database.BioFile`] + The list of files used for to enroll the model with the given model id. + """ + if model_id: + return self.sort( + self.objects(protocol=self.protocol, groups=group, model_ids=(model_id,), purposes='enroll', + **self.all_files_options)) + else: + return self.sort( + self.objects(protocol=self.protocol, groups=group, purposes='enroll', **self.all_files_options)) + + def probe_files(self, model_id=None, group='dev'): + """probe_files(model_id = None, group = 'dev') -> files + + Returns a list of probe File objects, respecting the current protocol. + If a ``model_id`` is specified, only the probe files that should be compared with the given model id are returned (for most databases, these are all probe files of the given group). + Otherwise, all probe files of the given group are returned. + + **Parameters:** + + model_id : int or str or ``None`` + A unique ID that identifies the model. + + group : one of ``('dev', 'eval')`` + The group to get the enrollment files for. + + **Returns:** + + files : [:py:class:`File`] + The list of files used for to probe the model with the given model id. + """ + if model_id is not None: + files = self.objects(protocol=self.protocol, groups=group, model_ids=(model_id,), purposes='probe', + **self.all_files_options) + else: + files = self.objects(protocol=self.protocol, groups=group, purposes='probe', **self.all_files_options) + return self.sort(files) + + def object_sets(self, groups=None, protocol=None, purposes=None, model_ids=None, **kwargs): + """This function returns lists of FileSet objects, which fulfill the given restrictions. + + Keyword parameters: + + groups : str or [str] + The groups of which the clients should be returned. + Usually, groups are one or more elements of ('world', 'dev', 'eval') + + protocol + The protocol for which the clients should be retrieved. + The protocol is dependent on your database. + If you do not have protocols defined, just ignore this field. + + purposes : str or [str] + The purposes for which File objects should be retrieved. + Usually, purposes are one of ('enroll', 'probe'). + + model_ids : [various type] + The model ids for which the File objects should be retrieved. + What defines a 'model id' is dependent on the database. + In cases, where there is only one model per client, model ids and client ids are identical. + In cases, where there is one model per file, model ids and file ids are identical. + But, there might also be other cases. + """ + raise NotImplementedError("This function must be implemented in your derived class.") + + def probe_file_sets(self, model_id=None, group='dev'): + """probe_file_sets(model_id = None, group = 'dev') -> files + + Returns a list of probe FileSet objects, respecting the current protocol. + If a ``model_id`` is specified, only the probe files that should be compared with the given model id are returned (for most databases, these are all probe files of the given group). + Otherwise, all probe files of the given group are returned. + + **Parameters:** + + model_id : int or str or ``None`` + A unique ID that identifies the model. + + group : one of ``('dev', 'eval')`` + The group to get the enrollment files for. + + **Returns:** + + files : [:py:class:`FileSet`] or something similar + The list of file sets used to probe the model with the given model id.""" + if model_id is not None: + file_sets = self.object_sets(protocol=self.protocol, groups=group, model_ids=(model_id,), purposes='probe', + **self.all_files_options) + else: + file_sets = self.object_sets(protocol=self.protocol, groups=group, purposes='probe', + **self.all_files_options) + return self.sort(file_sets) + + def client_id_from_model_id(self, model_id, group='dev'): + """Return the client id associated with the given model id. + In this base class implementation, it is assumed that only one model is enrolled for each client and, thus, client id and model id are identical. + All key word arguments are ignored. + Please override this function in derived class implementations to change this behavior.""" + return model_id + + +class ZTBioDatabase(BioDatabase): + """This class defines another set of abstract functions that need to be implemented if your database provides the interface for computing scores used for ZT-normalization.""" + + def __init__(self, + name, + z_probe_options={}, # Limit the z-probes + **kwargs): + """**Construtctor Documentation** + + This constructor tests if all implemented functions take the correct arguments. + All keyword parameters will be passed unaltered to the :py:class:`bob.bio.base.database.BioDatabase` constructor. + """ + # call base class constructor + BioDatabase.__init__(self, name, **kwargs) + + self.z_probe_options = z_probe_options + + # try if the implemented tmodel_ids_with_protocol(), tobjects() and zobjects() function have at least the required interface + try: + # create a value that is very unlikely a valid value for anything + test_value = '#F9S%3*Y' + # test if the parameters of the functions apply + self.tmodel_ids_with_protocol(groups=test_value, protocol=test_value) + self.tobjects(groups=test_value, protocol=test_value, model_ids=test_value) + self.zobjects(groups=test_value, protocol=test_value) + except TypeError as e: + # type error indicates that the given parameters are not valid. + raise NotImplementedError(str( + e) + "\nPlease implement:\n - the tmodel_ids_with_protocol(...) function with at least the " + "arguments 'groups' and 'protocol'\n - the tobjects(...) function with at least the arguments " + "'groups', 'protocol' and 'model_ids'\n - the zobjects(...) function with at " + "least the arguments 'groups' and 'protocol'") + except: + # any other error is fine at this stage. + pass + + @abc.abstractmethod + def tobjects(self, groups=None, protocol=None, model_ids=None, **kwargs): + """This function returns the File objects of the T-Norm models of the given groups for the given protocol and the given model ids. + + Keyword parameters: + + groups : str or [str] + The groups of which the model ids should be returned. + Usually, groups are one or more elements of ('dev', 'eval') + + protocol : str + The protocol for which the model ids should be retrieved. + The protocol is dependent on your database. + If you do not have protocols defined, just ignore this field. + + model_ids : [various type] + The model ids for which the File objects should be retrieved. + What defines a 'model id' is dependent on the database. + In cases, where there is only one model per client, model ids and client ids are identical. + In cases, where there is one model per file, model ids and file ids are identical. + But, there might also be other cases. + """ + raise NotImplementedError("This function must be implemented in your derived class.") + + @abc.abstractmethod + def zobjects(self, groups=None, protocol=None, **kwargs): + """This function returns the File objects of the Z-Norm impostor files of the given groups for the given protocol. + + Keyword parameters: + + groups : str or [str] + The groups of which the model ids should be returned. + Usually, groups are one or more elements of ('dev', 'eval') + + protocol : str + The protocol for which the model ids should be retrieved. + The protocol is dependent on your database. + If you do not have protocols defined, just ignore this field. + """ + raise NotImplementedError("This function must be implemented in your derived class.") + + def all_files(self, groups=['dev']): + """all_files(groups=None) -> files + + Returns all files of the database, including those for ZT norm, respecting the current protocol. + The files can be limited using the ``all_files_options`` and the the ``z_probe_options`` in the constructor. + + **Parameters:** + + groups : some of ``('world', 'dev', 'eval')`` or ``None`` + The groups to get the data for. + If ``None``, data for all groups is returned. + + **Returns:** + + files : [:py:class:`File`] + The sorted and unique list of all files of the database. + """ + files = self.objects(protocol=self.protocol, groups=groups, **self.all_files_options) + + # add all files that belong to the ZT-norm + for group in groups: + if group == 'world': + continue + files += self.tobjects(protocol=self.protocol, groups=group, model_ids=None) + files += self.zobjects(protocol=self.protocol, groups=group, **self.z_probe_options) + return self.sort(files) + + @abc.abstractmethod + def tmodel_ids_with_protocol(self, protocol=None, groups=None, **kwargs): + """This function returns the ids of the T-Norm models of the given groups for the given protocol. + + Keyword parameters: + + groups : str or [str] + The groups of which the model ids should be returned. + Usually, groups are one or more elements of ('dev', 'eval') + + protocol : str + The protocol for which the model ids should be retrieved. + The protocol is dependent on your database. + If you do not have protocols defined, just ignore this field. + """ + raise NotImplementedError("This function must be implemented in your derived class.") + + def t_model_ids(self, groups='dev'): + """t_model_ids(group = 'dev') -> ids + + Returns a list of model ids of T-Norm models for the given group, respecting the current protocol. + + **Parameters:** + + group : one of ``('dev', 'eval')`` + The group to get the model ids for. + + **Returns:** + + ids : [int] or [str] + The list of (unique) model ids for T-Norm models of the given group. + """ + return sorted(self.tmodel_ids_with_protocol(protocol=self.protocol, groups=groups)) + + def t_enroll_files(self, t_model_id, group='dev'): + """t_enroll_files(t_model_id, group = 'dev') -> files + + Returns a list of File objects that should be used to enroll the T-Norm model with the given model id from the given group, respecting the current protocol. + + **Parameters:** + + t_model_id : int or str + A unique ID that identifies the model. + + group : one of ``('dev', 'eval')`` + The group to get the enrollment files for. + + **Returns:** + + files : [:py:class:`File`] + The sorted list of files used for to enroll the model with the given model id. + """ + return self.sort(self.tobjects(protocol=self.protocol, groups=group, model_ids=(t_model_id,))) + + def z_probe_files(self, group='dev'): + """z_probe_files(group = 'dev') -> files + + Returns a list of probe files used to compute the Z-Norm, respecting the current protocol. + The Z-probe files can be limited using the ``z_probe_options`` in the query to :py:meth:`ZTBioDatabase.z_probe_files` + + **Parameters:** + + group : one of ``('dev', 'eval')`` + The group to get the Z-norm probe files for. + + **Returns:** + + files : [:py:class:`File`] + The unique list of files used to compute the Z-norm. + """ + return self.sort(self.zobjects(protocol=self.protocol, groups=group, **self.z_probe_options)) + + def z_probe_file_sets(self, group='dev'): + """z_probe_file_sets(group = 'dev') -> files + + Returns a list of probe FileSet objects used to compute the Z-Norm. + This function needs to be implemented in derived class implementations. + + **Parameters:** + + group : one of ``('dev', 'eval')`` + The group to get the Z-norm probe files for. + + **Returns:** + + files : [:py:class:`FileSet`] + The unique list of file sets used to compute the Z-norm. + """ + raise NotImplementedError("Please implement this function in derived classes") + + def client_id_from_t_model_id(self, t_model_id, group='dev'): + """client_id_from_t_model_id(t_model_id, group = 'dev') -> client_id + Returns the client id for the given T-Norm model id. + In this base class implementation, we just use the :py:meth:`client_id_from_model_id` function. + Overload this function if you need another behavior. + **Parameters:** + t_model_id : int or str + A unique ID that identifies the T-Norm model. + group : one of ``('dev', 'eval')`` + The group to get the client ids for. + **Returns:** + client_id : [int] or [str] + A unique ID that identifies the client, to which the T-Norm model belongs. + """ + return self.client_id_from_model_id(t_model_id, group) diff --git a/bob/bio/base/database/file.py b/bob/bio/base/database/file.py new file mode 100644 index 0000000000000000000000000000000000000000..3dca312725b7e1cb231d22d43445aea4acbd14b2 --- /dev/null +++ b/bob/bio/base/database/file.py @@ -0,0 +1,61 @@ +#!/usr/bin/env python +# vim: set fileencoding=utf-8 : + +import bob.db.base + + +class BioFile(bob.db.base.File): + """A simple base class that defines basic properties of File object for the use in verification experiments""" + + def __init__(self, client_id, path, file_id=None): + """**Constructor Documentation** + + Initialize the File object with the minimum required data. + + Parameters: + + client_id : various type + The id of the client this file belongs to. + Its type depends on your implementation. + If you use an SQL database, this should be an SQL type like Integer or String. + For path and file_id, please refer to :py:class:`bob.db.base.File` constructor + """ + bob.db.base.File.__init__(self, path, file_id) + + # just copy the information + self.client_id = client_id + """The id of the client, to which this file belongs to.""" + + +class BioFileSet(BioFile): + """This class defines the minimum interface of a set of database files that needs to be exported. + Use this class, whenever the database provides several files that belong to the same probe. + Each file set has an id, and a list of associated files, which are of type :py:class:`BioFile` of the same client. + The file set id can be anything hashable, but needs to be unique all over the database. + **Parameters:** + file_set_id : str or int + A unique ID that identifies the file set. + files : [:py:class:`BioFile`] + A non-empty list of BioFile objects that should be stored inside this file. + All files of that list need to have the same client ID. + """ + + def __init__(self, file_set_id, files, path=None): + + # don't accept empty file lists + assert len(files), "Cannot create an empty BioFileSet" + + # call base class constructor + BioFile.__init__(self, files[0].client_id, "+".join(f.path for f in files) if path is None else path, file_set_id) + + # check that all files come from the same client + assert all(f.client_id == self.client_id for f in files) + + # The list of files contained in this set + self.files = files + """The list of :py:class:`BioFile` objects stored in this file set""" + + def __lt__(self, other): + """Defines an order between file sets by using the order of the file set ids.""" + # compare two BioFile set objects by comparing their IDs + return self.id < other.id diff --git a/bob/bio/base/preprocessor/Filename.py b/bob/bio/base/preprocessor/Filename.py index 2b6c6d0475506bf67f19192b1ea46671442eb3a6..4c0fddd5c423555eb2357206122ac48762f6359a 100644 --- a/bob/bio/base/preprocessor/Filename.py +++ b/bob/bio/base/preprocessor/Filename.py @@ -10,7 +10,7 @@ from .Preprocessor import Preprocessor class Filename (Preprocessor): """This preprocessor is simply passing over the file name, in order to be used in an extractor that loads the data from file. - The file name that will be returned by the :py:meth:`read_data` function will contain the path of the :py:class:`bob.bio.db.BioFile`, but it might contain more paths (such as the ``--preprocessed-directory`` passed on command line). + The file name that will be returned by the :py:meth:`read_data` function will contain the path of the :py:class:`bob.bio.base.database.BioFile`, but it might contain more paths (such as the ``--preprocessed-directory`` passed on command line). """ def __init__(self): diff --git a/bob/bio/base/test/dummy/database.py b/bob/bio/base/test/dummy/database.py index 32fa5627a37fd13d7d8762998c115d9f828cd19e..c09bf3747e9c41db67537dd845b162dfb5738339 100644 --- a/bob/bio/base/test/dummy/database.py +++ b/bob/bio/base/test/dummy/database.py @@ -1,4 +1,4 @@ -from bob.bio.db import ZTBioDatabase, BioFile +from bob.bio.base.database import ZTBioDatabase from bob.bio.base.test.utils import atnt_database_directory diff --git a/bob/bio/base/test/dummy/fileset.py b/bob/bio/base/test/dummy/fileset.py index ba0fdf6a4b096ea7233cfc0cf0525ffe373e9b9b..e4b3368f90626e23e61d4093f9ddac9775ff5635 100644 --- a/bob/bio/base/test/dummy/fileset.py +++ b/bob/bio/base/test/dummy/fileset.py @@ -1,4 +1,4 @@ -from bob.bio.db import ZTBioDatabase, BioFileSet, BioFile +from bob.bio.base.database import ZTBioDatabase, BioFileSet, BioFile from bob.bio.base.test.utils import atnt_database_directory class DummyDatabase(ZTBioDatabase): diff --git a/bob/bio/base/tools/command_line.py b/bob/bio/base/tools/command_line.py index 33f0ed662fe7cb153cb48f339afa58889f2a5953..c411e21e5d49a75c16709bd68dbc829906f09eda 100644 --- a/bob/bio/base/tools/command_line.py +++ b/bob/bio/base/tools/command_line.py @@ -8,7 +8,7 @@ logger = bob.core.log.setup("bob.bio.base") from .. import utils from . import FileSelector -from bob.bio.db import BioDatabase +from bob.bio.base.database import BioDatabase """Execute biometric recognition algorithms on a certain biometric database. """ diff --git a/requirements.txt b/requirements.txt index 97c8574647adf403007f648acb78b847d8d1fa1a..5f863c468a4d0b8e4e819b893f7a5baf3c46b72e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,3 @@ -bob.bio.db bob.blitz bob.core bob.db.base