From ed259be7182a5f8ec5518904889234cb24ebb03a Mon Sep 17 00:00:00 2001 From: Manuel Guenther <manuel.guenther@idiap.ch> Date: Tue, 23 Jun 2015 21:43:52 +0200 Subject: [PATCH] Added tons of documentation --- bob/bio/base/algorithm/Algorithm.py | 282 ++++++++++++--- bob/bio/base/algorithm/BIC.py | 7 + bob/bio/base/algorithm/LDA.py | 5 +- bob/bio/base/algorithm/PCA.py | 4 + bob/bio/base/algorithm/PLDA.py | 7 + bob/bio/base/config/grid/local.py | 6 +- bob/bio/base/database/Database.py | 407 +++++++++++++++++---- bob/bio/base/database/DatabaseBob.py | 417 ++++++++++++++++++---- bob/bio/base/database/DatabaseFileList.py | 4 +- bob/bio/base/extractor/Extractor.py | 74 +++- bob/bio/base/extractor/Linearize.py | 27 +- bob/bio/base/grid.py | 42 ++- bob/bio/base/preprocessor/Preprocessor.py | 74 +++- bob/bio/base/test/dummy/database.py | 4 +- bob/bio/base/test/test_databases.py | 2 +- bob/bio/base/test/test_scripts.py | 2 +- bob/bio/base/tools/FileSelector.py | 4 +- doc/conf.py | 46 ++- doc/implementation.rst | 2 +- doc/implemented.rst | 15 + doc/index.rst | 15 +- 21 files changed, 1214 insertions(+), 232 deletions(-) diff --git a/bob/bio/base/algorithm/Algorithm.py b/bob/bio/base/algorithm/Algorithm.py index 0124dc55..3a62c1df 100644 --- a/bob/bio/base/algorithm/Algorithm.py +++ b/bob/bio/base/algorithm/Algorithm.py @@ -24,6 +24,45 @@ from .. import utils class Algorithm: """This is the base class for all biometric recognition algorithms. It defines the minimum requirements for all derived algorithm classes. + + Call the constructor in derived class implementations. + If your derived algorithm performs feature projection, please register this here. + If it needs training for the projector or the enroller, please set this here, too. + + **Keyword Arguments:** + + performs_projection : bool + Set to ``True`` if your derived algorithm performs a projection. + Also implement the :py:func:`project` function, and the :py:func:`load_projector` if necessary. + + requires_projector_training : bool + Only valid, when ``performs_projection = True``. + Set this flag to ``False``, when the projection is applied, but the projector does not need to be trained. + + split_training_features_by_client : bool + Only valid, when ``performs_projection = True`` and ``requires_projector_training = True``. + If set to ``True``, the :py:func:`train_projector` function will receive a double list (a list of lists) of data (sorted by identity). + Otherwise, the :py:func:`train_projector` function will receive data in a single list. + + use_projected_features_for_enrollment : bool + Only valid, when ``performs_projection = True``. + If set to false, the enrollment is performed using the original features, otherwise the features projected using the :py:func:`project` function are used for model enrollment. + + requires_enroller_training : bool + Set this flag to ``True``, when the enroller requires specialized training. + Which kind of features are used for training depends on the ``use_projected_features_for_enrollment`` flag. + + multiple_model_scoring : str or ``None`` + The way, scores are fused when multiple features are stored in a one model. + See :py:func:`bob.bio.base.score_fusion_strategy` for possible values. + + multiple_probe_scoring : str or ``None`` + The way, scores are fused when multiple probes are available. + See :py:func:`bob.bio.base.score_fusion_strategy` for possible values. + + kwargs : ``key=value`` pairs + A list of keyword arguments to be written in the :py:func:`__str__` function. + """ def __init__( @@ -38,12 +77,6 @@ class Algorithm: multiple_probe_scoring = 'average', # by default, compute the average between the model and several probes **kwargs # parameters from the derived class that should be reported in the __str__() function ): - """Initializes the Tool. - Call this constructor in derived class implementations. - If your derived tool performs feature projection, please register this here. - If it needs training for the projector or the enroller, please set this here, too. - """ - self.performs_projection = performs_projection self.requires_projector_training = performs_projection and requires_projector_training self.split_training_features_by_client = split_training_features_by_client @@ -56,29 +89,107 @@ class Algorithm: def __str__(self): - """This function returns all parameters of this class (and its derived class).""" + """__str__() -> info + + This function returns all parameters of this class (and its derived class). + + **Returns:** + + info : str + A string containing the full information of all parameters of this (and the derived) class. + """ return "%s(%s)" % (str(self.__class__), ", ".join(["%s=%s" % (key, value) for key,value in self._kwargs.items() if value is not None])) + def project(self, feature): + """project(feature) -> projected + + This function will project the given feature. + It must be overwritten by derived classes, as soon as ``performs_projection = True`` was set in the constructor. + It is assured that the :py:func:`load_projector` was called once before the ``project`` function is executed. + + **Keyword Arguments:** + + feature : object + The feature to be projected. + + **Returns:** + + projected : object + The projected features. + Must be writable with the :py:func:`write_feature` function and readable with the :py:func:`read_feature` function. + + """ + raise NotImplementedError("Please overwrite this function in your derived class") + + def enroll(self, enroll_features): - """This function will enroll and return the model from the given list of features. + """enroll(enroll_features) -> model + + This function will enroll and return the model from the given list of features. It must be overwritten by derived classes. + + **Keyword Arguments:** + + enroll_features : [object] + A list of features used for the enrollment of one model. + + **Returns:** + + model : object + The model enrolled from the ``enroll_features``. + Must be writable with the :py:func:`write_model` function and readable with the :py:func:`read_model` function. + """ raise NotImplementedError("Please overwrite this function in your derived class") def score(self, model, probe): - """This function will compute the score between the given model and probe. + """score(model, probe) -> score + + This function will compute the score between the given model and probe. It must be overwritten by derived classes. + + **Keyword Arguments:** + + model : object + The model to compare the probe with. + The ``model`` was read using the :py:func:`read_model` function. + + probe : object + The probe object to compare the model with. + The ``probe`` was read using the :py:func:`read_probe` function. + + **Returns:** + + score : float + A similarity between ``model`` and ``probe``. + Higher values define higher similarities. """ raise NotImplementedError("Please overwrite this function in your derived class") def score_for_multiple_models(self, models, probe): - """This function computes the score between the given model list and the given probe. - In this base class implementation, it computes the scores for each model using the 'score' method, + """score_for_multiple_models(models, probe) -> score + + This function computes the score between the given model list and the given probe. + In this base class implementation, it computes the scores for each model using the :py:func:`score` method, and fuses the scores using the fusion method specified in the constructor of this class. - Usually this function is called from derived class 'score' functions.""" + Usually this function is called from derived class :py:func:`score` functions. + + **Keyword Arguments:** + + models : [object] + A list of model objects. + + probe : object + The probe object to compare the models with. + + **Returns:** + + score : float + The fused similarity between the given ``models`` and the ``probe``. + """ if isinstance(models, list): return self.model_fusion_function([self.score(model, probe) for model in models]) elif isinstance(models, numpy.ndarray): @@ -88,9 +199,25 @@ class Algorithm: def score_for_multiple_probes(self, model, probes): - """This function computes the score between the given model and the given probe files. - In this base class implementation, it computes the scores for each probe file using the 'score' method, - and fuses the scores using the fusion method specified in the constructor of this class.""" + """score_for_multiple_probes(model, probes) -> score + + This function computes the score between the given model and the given probe files. + In this base class implementation, it computes the scores for each probe file using the :py:func:`score` method, + and fuses the scores using the fusion method specified in the constructor of this class. + + **Keyword Arguments:** + + model : object + A model object to compare the probes with. + + probes : [object] + The list of probe object to compare the models with. + + **Returns:** + + score : float + The fused similarity between the given ``model`` and the ``probes``. + """ if isinstance(probes, list): return self.probe_fusion_function([self.score(model, probe) for probe in probes]) else: @@ -106,55 +233,107 @@ class Algorithm: """Saves the given *projected* feature to a file with the given name. In this base class implementation: - - If the given feature has a 'save' attribute, it calls feature.save(bob.io.base.HDF5File(feature_file), 'w'). + - If the given feature has a ``save`` attribute, it calls ``feature.save(bob.io.base.HDF5File(feature_file), 'w')``. In this case, the given feature_file might be either a file name or a bob.io.base.HDF5File. - - Otherwise, it uses bob.io.base.save to do that. + - Otherwise, it uses :py:func:`bob.io.base.save` to do that. If you have a different format, please overwrite this function. Please register 'performs_projection = True' in the constructor to enable this function. + + **Keyword Arguments:** + + feature : object + A feature as returned by the :py:func:`project` function, which should be written. + + feature_file : str or :py:class:`bob.io.base.HDF5File` + The file open for writing, or the file name to write to. """ utils.save(feature, feature_file) def read_feature(self, feature_file): - """Reads the *projected* feature from file. - In this base class implementation, it uses bob.io.base.load to do that. + """read_feature(feature_file) -> feature + + Reads the *projected* feature from file. + In this base class implementation, it uses :py:func:`bob.io.base.load` to do that. If you have different format, please overwrite this function. - Please register 'performs_projection = True' in the constructor to enable this function. + Please register ``performs_projection = True`` in the constructor to enable this function. + + **Keyword Arguments:** + + feature_file : str or :py:class:`bob.io.base.HDF5File` + The file open for reading, or the file name to read from. + + **Returns:** + + feature : object + The feature that was read from file. """ return utils.load(feature_file) def write_model(self, model, model_file): - """Saves the enrolled model to the given file. + """Writes the enrolled model to the given file. In this base class implementation: - - If the given model has a 'save' attribute, it calls model.save(bob.io.base.HDF5File(model_file), 'w'). - In this case, the given model_file might be either a file name or a bob.io.base.HDF5File. - - Otherwise, it uses bob.io.base.save to do that. + - If the given model has a 'save' attribute, it calls ``model.save(bob.io.base.HDF5File(model_file), 'w')``. + In this case, the given model_file might be either a file name or a :py:class:`bob.io.base.HDF5File`. + - Otherwise, it uses :py:func:`bob.io.base.save` to do that. If you have a different format, please overwrite this function. + + **Keyword Arguments:** + + model : object + A model as returned by the :py:func:`enroll` function, which should be written. + + model_file : str or :py:class:`bob.io.base.HDF5File` + The file open for writing, or the file name to write to. """ utils.save(model, model_file) def read_model(self, model_file): - """Loads the enrolled model from file. - In this base class implementation, it uses bob.io.base.load to do that. + """read_model(model_file) -> model + + Loads the enrolled model from file. + In this base class implementation, it uses :py:func:`bob.io.base.load` to do that. If you have a different format, please overwrite this function. + + **Keyword Arguments:** + + model_file : str or :py:class:`bob.io.base.HDF5File` + The file open for reading, or the file name to read from. + + **Returns:** + + model : object + The model that was read from file. """ return utils.load(model_file) def read_probe(self, probe_file): - """Reads the probe feature from file. + """read_probe(probe_file) -> probe + + Reads the probe feature from file. By default, the probe feature is identical to the projected feature. - Hence, this base class implementation simply calls self.read_feature(...). + Hence, this base class implementation simply calls :py:func:`read_feature`. + + If your algorithm requires different behavior, please overwrite this function. - If your tool requires different behavior, please overwrite this function. + **Keyword Arguments:** + + probe_file : str or :py:class:`bob.io.base.HDF5File` + The file open for reading, or the file name to read from. + + **Returns:** + + probe : object + The probe that was read from file. """ return self.read_feature(probe_file) @@ -163,22 +342,33 @@ class Algorithm: def train_projector(self, training_features, projector_file): """This function can be overwritten to train the feature projector. If you do this, please also register the function by calling this base class constructor - and enabling the training by 'requires_projector_training = True'. + and enabling the training by ``requires_projector_training = True``. - The training function gets two parameters: + **Keyword Arguments:** - - training_features: A list of *extracted* features that can be used for training the extractor. - - projector_file: The file to write. This file should be readable with the 'load_projector' function (see above). + training_features : [object] or [[object]] + A list of *extracted* features that can be used for training the projector. + Features will be provided in a single list, if ``split_training_features_by_client = False`` was specified in the constructor, + otherwise the features will be split into lists, each of which contains the features of a single (training-)client. + + projector_file : str + The file to write. + This file should be readable with the :py:func:`load_projector` function. """ raise NotImplementedError("Please overwrite this function in your derived class, or unset the 'requires_projector_training' option in the constructor.") def load_projector(self, projector_file): """Loads the parameters required for feature projection from file. - This function usually is only useful in combination with the 'train_projector' function (see above). + This function usually is useful in combination with the :py:func:`train_projector` function. In this base class implementation, it does nothing. - Please register 'performs_projection = True' in the constructor to enable this function. + Please register `performs_projection = True` in the constructor to enable this function. + + **Keyword Arguments:** + + projector_file : str + The file to read the projector from. """ pass @@ -186,19 +376,29 @@ class Algorithm: def train_enroller(self, training_features, enroller_file): """This function can be overwritten to train the model enroller. If you do this, please also register the function by calling this base class constructor - and enabling the training by 'require_enroller_training = True'. + and enabling the training by ``require_enroller_training = True``. + + **Keyword Arguments:** - The training function gets two parameters: + training_features : [object] or [[object]] + A list of *extracted* features that can be used for training the projector. + Features will be split into lists, each of which contains the features of a single (training-)client. - - training_features: A dictionary of *extracted* or *projected* features, which are sorted by clients, that can be used for training the extractor. - - enroller_file: The file to write. This file should be readable with the 'load_enroller' function (see above). + enroller_file : str + The file to write. + This file should be readable with the :py:func:`load_enroller` function. """ def load_enroller(self, enroller_file): """Loads the parameters required for model enrollment from file. - This function usually is only useful in combination with the 'train_enroller' function (see above). - This function is always called AFTER calling the 'load_projector'. + This function usually is only useful in combination with the :py:func:`train_enroller` function. + This function is always called **after** calling :py:func:`load_projector`. In this base class implementation, it does nothing. + + **Keyword Arguments:** + + enroller_file : str + The file to read the enroller from. """ pass diff --git a/bob/bio/base/algorithm/BIC.py b/bob/bio/base/algorithm/BIC.py index 33c0e03b..6985ea5e 100644 --- a/bob/bio/base/algorithm/BIC.py +++ b/bob/bio/base/algorithm/BIC.py @@ -148,3 +148,10 @@ class BIC (Algorithm): assert len(diff) == self.bic_machine.input_size scores.append(self.bic_machine(diff)) return self.model_fusion_function(scores) + + # re-define unused functions, just so that they do not get documented + def train_projector(): raise NotImplementedError() + def load_projector(): raise NotImplementedError() + def project(): raise NotImplementedError() + def write_feature(): raise NotImplementedError() + def read_feature(): raise NotImplementedError() diff --git a/bob/bio/base/algorithm/LDA.py b/bob/bio/base/algorithm/LDA.py index 9689f998..d6b3e9e3 100644 --- a/bob/bio/base/algorithm/LDA.py +++ b/bob/bio/base/algorithm/LDA.py @@ -25,7 +25,6 @@ class LDA (Algorithm): uses_variances = False, **kwargs # parameters directly sent to the base class ): - """Initializes the LDA tool with the given configuration""" # call base class constructor and register that the LDA tool performs projection and need the training features split by client Algorithm.__init__( @@ -185,3 +184,7 @@ class LDA (Algorithm): else: # single model, single probe (multiple probes have already been handled) return self.factor * self.distance_function(model, probe) + + # re-define unused functions, just so that they do not get documented + def train_enroller(): raise NotImplementedError() + def load_enroller(): raise NotImplementedError() diff --git a/bob/bio/base/algorithm/PCA.py b/bob/bio/base/algorithm/PCA.py index 515eb88b..47e04923 100644 --- a/bob/bio/base/algorithm/PCA.py +++ b/bob/bio/base/algorithm/PCA.py @@ -125,3 +125,7 @@ class PCA (Algorithm): else: # single model, single probe (multiple probes have already been handled) return self.factor * self.distance_function(model, probe) + + # re-define unused functions, just so that they do not get documented + def train_enroller(): raise NotImplementedError() + def load_enroller(): raise NotImplementedError() diff --git a/bob/bio/base/algorithm/PLDA.py b/bob/bio/base/algorithm/PLDA.py index d8661b7d..15025644 100644 --- a/bob/bio/base/algorithm/PLDA.py +++ b/bob/bio/base/algorithm/PLDA.py @@ -161,3 +161,10 @@ class PLDA (Algorithm): return model.log_likelihood_ratio(numpy.vstack(probes)) else: return self.score_set([model.log_likelihood_ratio(probe) for probe in probes]) + + # re-define unused functions, just so that they do not get documented + def train_projector(): raise NotImplementedError() + def load_projector(): raise NotImplementedError() + def project(): raise NotImplementedError() + def write_feature(): raise NotImplementedError() + def read_feature(): raise NotImplementedError() diff --git a/bob/bio/base/config/grid/local.py b/bob/bio/base/config/grid/local.py index f2f3a873..baddc809 100644 --- a/bob/bio/base/config/grid/local.py +++ b/bob/bio/base/config/grid/local.py @@ -2,19 +2,19 @@ import bob.bio.base # define the queue using all the default parameters grid = bob.bio.base.grid.Grid( - grid = 'local', + grid_type = 'local', number_of_parallel_processes = 4 ) # define a queue that is highly parallelized grid_p8 = bob.bio.base.grid.Grid( - grid = 'local', + grid_type = 'local', number_of_parallel_processes = 8 ) # define a queue that is highly parallelized grid_p16 = bob.bio.base.grid.Grid( - grid = 'local', + grid_type = 'local', number_of_parallel_processes = 16 ) diff --git a/bob/bio/base/database/Database.py b/bob/bio/base/database/Database.py index 7347455c..e33e4ddb 100644 --- a/bob/bio/base/database/Database.py +++ b/bob/bio/base/database/Database.py @@ -1,7 +1,44 @@ class Database: """This class represents the basic API for database access. Please use this class as a base class for your database access classes. - Do not forget to call the constructor of this base class in your derived class.""" + Do not forget to call the constructor of this base class in your derived class. + + **Keyword Arguments:** + + name : str + A unique name for the database. + + original_directory : str + The directory where the original data of the database are stored. + + original_extension : str + The file name extension of the original data. + + annotation_directory : str + The directory where the image annotations of the database are stored, if any. + + annotation_extension : str + The file name extension of the annotation files. + + annotation_type : str + The type of the annotation file to read, see :py:func:`bob.db.verification.utils.read_annotation_file` for accepted formats. + + protocol : str or ``None`` + The name of the protocol that defines the default experimental setup for this database. + + .. todo:: Check if the ``None`` protocol is supported. + + training_depends_on_protocol : bool + Specifies, if the training set used for training the extractor and the projector depend on the protocol. + This flag is used to avoid re-computation of data when running on the different protocols of the same database. + + models_depend_on_protocol : bool + Specifies, if the models depend on the protocol. + This flag is used to avoid re-computation of models when running on the different protocols of the same database. + + kwargs + Ignored extra arguments. + """ def __init__( self, @@ -16,39 +53,7 @@ class Database: models_depend_on_protocol = False, **kwargs ): - """ - Parameters to the constructor of the Database: - - name - A unique name for the database. - - original_directory : str - The directory where the original data of the database are stored. - - original_extension : str - The file name extension of the original data. - - annotation_directory : str - The directory where the image annotations of the database are stored, if any. - - annotation_extension : str - The file name extension of the annotation files. - - annotation_type : str - The type of the annotation file to read, see :py:func:`bob.db.verification.utils.read_annotation_file` for accepted formats. - - protocol : str - The name of the protocol that defines the default experimental setup for this database. - - training_depends_on_protocol : bool - Specifies, if the training set used for training the extractor and the projector depend on the protocol - - models_depend_on_protocol : bool - Specifies, if the models depend on the protocol - - kwargs - Ignored extra arguments. - """ + assert isinstance(name, str) self.name = name self.original_directory = original_directory @@ -62,7 +67,15 @@ class Database: def __str__(self): - """This function returns a string containing all parameters of this class.""" + """__str__() -> info + + This function returns all parameters of this class. + + **Returns:** + + info : str + A string containing the full information of all parameters of this class. + """ params = "name=%s, protocol=%s, original_directory=%s, original_extension=%s" % (self.name, self.protocol, self.original_directory, self.original_extension) if self.annotation_type is not None: params += ", annotation_type=%s" % annotation_type @@ -76,8 +89,21 @@ class Database: ### Helper functions that you might want to use in derived classes ########################################################################### def sort(self, files): - """Returns a sorted version of the given list of File's (or other structures that define an 'id' data member). - The files will be sorted according to their id, and duplicate entries will be removed.""" + """sort(files) -> sorted + + Returns a sorted version of the given list of File's (or other structures that define an 'id' data member). + The files will be sorted according to their id, and duplicate entries will be removed. + + **Keyword Parameters:** + + files : [:py:class:`File`] + The list of files to be uniquified and sorted. + + **Returns:** + + sorted : [:py:class:`File`] + The sorted list of files, with duplicate :py:attr:`File.id`\s being removed. + """ # sort files using their sort function sorted_files = sorted(files) # remove duplicates @@ -85,8 +111,21 @@ class Database: def arrange_by_client(self, files): - """Arranges the given list of files by client id. - This function returns a list of lists of File's.""" + """arrange_by_client(files) -> files_by_client + + Arranges the given list of files by client id. + This function returns a list of lists of File's. + + **Keyword Parameters:** + + files : :py:class:`File` + A list of files that should be split up by :py:attr:`File.client_id`. + + **Returns:** + + files_by_client : [[:py:class:`File`]] + The list of lists of files, where each sub-list groups the files with the same :py:attr:`File.client_id` + """ client_files = {} for file in files: if file.client_id not in client_files: @@ -100,7 +139,21 @@ class Database: def annotations(self, file): - """Returns the annotations for the given File object, if available.""" + """annotations(file) -> annots + + Returns the annotations for the given File object, if available. + It uses :py:func:`bob.db.verification.utils.read_annotation_file` to load the annotations. + + **Keyword Parameters:** + + file : :py:class:`File` + The file for which annotations should be returned. + + **Returns:** + + annots : dict or None + The annotations for the file, if available. + """ if self.annotation_directory: try: import bob.db.verification.utils @@ -115,12 +168,32 @@ class Database: def uses_probe_file_sets(self): """Defines if, for the current protocol, the database uses several probe files to generate a score. - By default, False is returned. Overwrite the default if you need different behavior.""" + By default, ``False`` is returned. Overwrite the default if you need different behavior.""" return False def file_names(self, files, directory, extension): - """Returns the full path of the given File objects.""" + """file_names(files, directory, extension) -> paths + + Returns the full path of the given File objects. + + **Keyword Parameters:** + + files : [:py:class:`File`] + The list of file object to retrieve the file names for. + + directory : str + The base directory, where the files can be found. + + extension : str + The file name extension to add to all files. + + **Returns:** + + paths : [str] or [[str]] + The paths extracted for the files, in the same order. + If this database provides file sets, a list of lists of file names is returned, one sub-list for each file set. + """ # return the paths of the files if self.uses_probe_file_sets() and files and hasattr(files[0], 'files'): # List of Filesets: do not remove duplicates @@ -130,7 +203,21 @@ class Database: return [f.make_path(directory, extension) for f in files] def original_file_names(self, files): - """Returns the full path of the original data of the given File objects.""" + """original_file_names(files) -> paths + + Returns the full path of the original data of the given File objects. + + **Keyword Parameters:** + + files : [:py:class:`File`] + The list of file object to retrieve the original data file names for. + + **Returns:** + + paths : [str] or [[str]] + The paths extracted for the files, in the same order. + If this database provides file sets, a list of lists of file names is returned, one sub-list for each file set. + """ assert self.original_directory is not None assert self.original_extension is not None return self.file_names(files, self.original_directory, self.original_extension) @@ -141,63 +228,261 @@ class Database: ########################################################################### def all_files(self, groups = None): - """Returns all files of the database""" + """all_files(groups=None) -> files + + Returns all files of the database. + This function needs to be implemented in derived class implementations. + + **Keyword Arguments:** + + groups : some of ``('world', 'dev', 'eval')`` or ``None`` + The groups to get the data for. + If ``None``, data for all groups is returned. + + **Returns:** + + files : [:py:class:`File`] + The sorted and unique list of all files of the database. + """ raise NotImplementedError("Please implement this function in derived classes") def training_files(self, step = None, arrange_by_client = False): - """Returns all training File objects for the given step (might be 'train_extractor', 'train_projector', 'train_enroller' or None), and arranges them by client, if desired""" + """training_files(step = None, arrange_by_client = False) -> files + + Returns all training File objects for the given step, and arranges them by client, if desired. + This function needs to be implemented in derived class implementations. + + **Keyword Arguments:** + + step : one of ``('train_extractor', 'train_projector', 'train_enroller')`` or ``None`` + The step for which the training data should be returned. + Might be ignored in derived class implementations. + + arrange_by_client : bool + Should the training files be arranged by client? + + .. note:: + You can use :py:func:`arrange_by_client` in derived class implementations to arrange the files. + + **Returns:** + + files : [:py:class:`File`] or [[:py:class:`File`]] + The (arranged) list of files used for the training of the given step. + """ raise NotImplementedError("Please implement this function in derived classes") def model_ids(self, group = 'dev'): - """Returns a list of model ids for the given group""" + """model_ids(group = 'dev') -> ids + + Returns a list of model ids for the given group. + This function needs to be implemented in derived class implementations. + + **Keyword Arguments:** + + group : one of ``('dev', 'eval')`` + The group to get the model ids for. + + **Returns:** + + ids : [int] or [str] + The list of (unique) model ids for the given group. + """ raise NotImplementedError("Please implement this function in derived classes") def client_id_from_model_id(self, model_id, group = 'dev'): - """Returns the client id for the given model id""" + """client_id_from_model_id(model_id, group = 'dev') -> client_id + + In some databases, each client can contain several models. + Hence, client and model ids differ. + This function converts the given model id into its according the client id. + This function needs to be implemented in derived class implementations. + + **Keyword Arguments:** + + model_id : int or str + A unique ID that identifies the model for the client. + + group : one of ``('dev', 'eval')`` + The group to get the client ids for. + + **Returns:** + + client_id : [int] or [str] + A unique ID that identifies the client, to which the model belongs. + """ raise NotImplementedError("Please implement this function in derived classes") def enroll_files(self, model_id, group = 'dev'): - """Returns a list of enrollment File objects for the given model id and the given group""" + """enroll_files(model_id, group = 'dev') -> files + + Returns a list of File objects that should be used to enroll the model with the given model id from the given group. + This function needs to be implemented in derived class implementations. + + **Keyword Arguments:** + + model_id : int or str + A unique ID that identifies the model. + + group : one of ``('dev', 'eval')`` + The group to get the enrollment files for. + + **Returns:** + + files : [:py:class:`File`] + The list of files used for to enroll the model with the given model id. + """ raise NotImplementedError("Please implement this function in derived classes") def probe_files(self, model_id = None, group = 'dev'): - """Returns a list of probe File object in a specific format that should be compared with the model belonging to the given model id of the specified group""" + """probe_files(model_id = None, group = 'dev') -> files + + Returns a list of probe File objects. + If a ``model_id`` is specified, only the probe files that should be compared with the given model id are returned (for most databases, these are all probe files of the given group). + Otherwise, all probe files of the given group are returned. + This function needs to be implemented in derived class implementations. + + **Keyword Arguments:** + + model_id : int or str or ``None`` + A unique ID that identifies the model. + + group : one of ``('dev', 'eval')`` + The group to get the enrollment files for. + + **Returns:** + + files : [:py:class:`File`] + The list of files used for to probe the model with the given model id. + """ raise NotImplementedError("Please implement this function in derived classes") def probe_file_sets(self, model_id = None, group = 'dev'): - """Returns a list of probe FileSet object in a specific format that should be compared with the model belonging to the given model id of the specified group""" + """probe_file_sets(model_id = None, group = 'dev') -> files + + Returns a list of probe FileSet objects. + If a ``model_id`` is specified, only the probe files that should be compared with the given model id are returned (for most databases, these are all probe files of the given group). + Otherwise, all probe files of the given group are returned. + This function needs to be implemented in derived class implementations, if the :py:meth:`uses_probe_file_sets` returns ``True``. + + **Keyword Arguments:** + + model_id : int or str or ``None`` + A unique ID that identifies the model. + + group : one of ``('dev', 'eval')`` + The group to get the enrollment files for. + + **Returns:** + + files : [:py:class:`FileSet`] + The list of file sets used to probe the model with the given model id.""" raise NotImplementedError("Please implement this function in derived classes") class DatabaseZT (Database): """This class defines additional API functions that are required to compute ZT score normalization. - During construction, please call the constructor of the base class 'Database' directly.""" + This class does not define a constructor. + During construction of a derived class, please call the constructor of the base class :py:class:`Database` directly.""" def t_model_ids(self, group = 'dev'): - """Returns a list of T-Norm model ids for the given group""" + """t_model_ids(group = 'dev') -> ids + + Returns a list of model ids of T-Norm models for the given group. + This function needs to be implemented in derived class implementations. + + **Keyword Arguments:** + + group : one of ``('dev', 'eval')`` + The group to get the model ids for. + + **Returns:** + + ids : [int] or [str] + The list of (unique) model ids for T-Norm models of the given group. + """ raise NotImplementedError("Please implement this function in derived classes") + def client_id_from_t_model_id(self, t_model_id, group = 'dev'): - """Returns the client id for the given T-model id. - In this base class implementation, we just use the :py:meth:`client_id_from_model_id` function. - Overload this function if you need another behavior.""" + """Returns the client id for the given T-Norm model id. + In this base class implementation, we just use the :py:func:`client_id_from_model_id` function. + Overload this function if you need another behavior. + + **Keyword Arguments:** + + t_model_id : int or str + A unique ID that identifies the T-Norm model. + + group : one of ``('dev', 'eval')`` + The group to get the client ids for. + + **Returns:** + + client_id : [int] or [str] + A unique ID that identifies the client, to which the T-Norm model belongs. + """ return self.client_id_from_model_id(t_model_id, group) - def t_enroll_files(self, model_id, group = 'dev'): - """Returns a list of enrollment files for the given T-Norm model id and the given group""" + def t_enroll_files(self, t_model_id, group = 'dev'): + """t_enroll_files(t_model_id, group = 'dev') -> files + + Returns a list of File objects that should be used to enroll the T-Norm model with the given model id from the given group. + This function needs to be implemented in derived class implementations. + + **Keyword Arguments:** + + t_model_id : int or str + A unique ID that identifies the model. + + group : one of ``('dev', 'eval')`` + The group to get the enrollment files for. + + **Returns:** + + files : [:py:class:`File`] + The list of files used for to enroll the model with the given model id. + """ raise NotImplementedError("Please implement this function in derived classes") - def z_probe_files(self, model_id = None, group = 'dev'): - """Returns a list of Z-probe objects in a specific format that should be compared with the model belonging to the given model id of the specified group""" + def z_probe_files(self, group = 'dev'): + """z_probe_files(group = 'dev') -> files + + Returns a list of probe File objects used to compute the Z-Norm. + This function needs to be implemented in derived class implementations. + + **Keyword Arguments:** + + group : one of ``('dev', 'eval')`` + The group to get the Z-norm probe files for. + + **Returns:** + + files : [:py:class:`File`] + The unique list of files used to compute the Z-norm. + """ raise NotImplementedError("Please implement this function in derived classes") - def z_probe_file_sets(self, model_id = None, group = 'dev'): - """Returns a list of Z-probe FileSets object in a specific format that should be compared with the model belonging to the given model id of the specified group""" + def z_probe_file_sets(self, group = 'dev'): + """z_probe_file_sets(group = 'dev') -> files + + Returns a list of probe FileSet objects used to compute the Z-Norm. + This function needs to be implemented in derived class implementations. + + **Keyword Arguments:** + + group : one of ``('dev', 'eval')`` + The group to get the Z-norm probe files for. + + **Returns:** + + files : [:py:class:`FileSet`] + The unique list of file sets used to compute the Z-norm. + """ raise NotImplementedError("Please implement this function in derived classes") diff --git a/bob/bio/base/database/DatabaseBob.py b/bob/bio/base/database/DatabaseBob.py index 5eed659f..fd6202a8 100644 --- a/bob/bio/base/database/DatabaseBob.py +++ b/bob/bio/base/database/DatabaseBob.py @@ -1,8 +1,36 @@ from .Database import Database, DatabaseZT import os +import bob.db.verification.utils + class DatabaseBob (Database): - """This class can be used whenever you have a database that follows the default Bob database interface.""" + """This class can be used whenever you have a database that follows the Bob verification database interface, which is defined in :py:class:`bob.db.verification.utils.Database` + + **Keyword Parameter:** + + database : derivative of :py:class:`bob.db.verification.utils.Database` + The database instance (such as a :py:class:`bob.db.atnt.Database`) that provides the actual interface, see :ref:`verification_databases` for a list. + + all_files_options : dict + Dictionary of options passed to the :py:meth:`bob.db.verification.utils.Database.objects` database query when retrieving all data. + + extractor_training_options : dict + Dictionary of options passed to the :py:meth:`bob.db.verification.utils.Database.objects` database query used to retrieve the files for the extractor training. + + projector_training_options : dict + Dictionary of options passed to the :py:meth:`bob.db.verification.utils.Database.objects` database query used to retrieve the files for the projector training. + + enroller_training_options : dict + Dictionary of options passed to the :py:meth:`bob.db.verification.utils.Database.objects` database query used to retrieve the files for the enroller training. + + check_original_files_for_existence : bool + Enables to test for the original data files when querying the database. + + kwargs : ``key=value`` pairs + The arguments of the :py:class:`Database` base class constructor. + + .. note:: Usually, the ``name``, ``protocol``, ``training_depends_on_protocol`` and ``models_depend_on_protocol`` keyword parameters of the base class constructor need to be specified. + """ def __init__( self, @@ -14,42 +42,14 @@ class DatabaseBob (Database): check_original_files_for_existence = False, **kwargs # The default parameters of the base class ): - """ - Parameters of the constructor of this database: - - database : derivative of :py:class:`bob.db.verification.utils.Database` - the bob.db.___ database that provides the actual interface, see :ref:`verification_databases` for a list. - - image_directory - The directory where the original images are stored. - - image_extension - The file extension of the original images. - - all_files_options - Options passed to the database query used to retrieve all data. - - extractor_training_options - Options passed to the database query used to retrieve the images for the extractor training. - - projector_training_options - Options passed to the database query used to retrieve the images for the projector training. - - enroller_training_options - Options passed to the database query used to retrieve the images for the enroller training. - - check_original_files_for_existence - Enables the test for the original data files when querying the database. - - kwargs - The arguments of the base class - """ Database.__init__( self, **kwargs ) + assert isinstance(database, bob.db.verification.utils.Database), "Only databases derived from bob.db.verification.utils.Database are supported by this interface. Please implement your own bob.bio.base.database.Database interface." + self.database = database self.original_directory = database.original_directory try: @@ -67,7 +67,15 @@ class DatabaseBob (Database): def __str__(self): - """This function returns a string containing all parameters of this class (and its derived class).""" + """__str__() -> info + + This function returns all parameters of this class (and its derived class). + + **Returns:** + + info : str + A string containing the full information of all parameters of this (and the derived) class. + """ params = ", ".join(["%s=%s" % (key, value) for key, value in self._kwargs.items()]) params += ", original_directory=%s, original_extension=%s" % (self.original_directory, self.original_extension) if self.all_files_options: params += ", all_files_options=%s"%self.all_files_options @@ -79,7 +87,29 @@ class DatabaseBob (Database): def replace_directories(self, replacements = None): - """This function replaces the original_directory and the annotation_directory with the directories read from the given replacement file.""" + """This helper function replaces the ``original_directory`` and the ``annotation_directory`` of the database with the directories read from the given replacement file. + + This function is provided for convenience, so that the database configuration files do not need to be modified. + Instead, this function uses the given dictionary of replacements to change the original directory and the original extension (if given). + + The given ``replacements`` can be of type ``dict``, including all replacements, or a file name (as a ``str``), in which case the file is read. + The structure of the file should be: + + .. code-block:: text + + # Comments starting with # and empty lines are ignored + + [YOUR_..._DATA_DIRECTORY] = /path/to/your/data + [YOUR_..._ANNOTATION_DIRECTORY] = /path/to/your/annotations + + If no annotation files are available (e.g. when they are stored inside the ``database``), the annotation directory can be left out. + + **Keyword Parameter:** + + replacements : dict or str + A dictionary with replacements, or a name of a file to read the dictionary from. + If the file name does not exist, no directories are replaced. + """ if replacements is None: return if isinstance(replacements, str): @@ -110,17 +140,52 @@ class DatabaseBob (Database): def uses_probe_file_sets(self): """Defines if, for the current protocol, the database uses several probe files to generate a score.""" - return self.protocol != 'None' and self.database.provides_file_set_for_protocol(self.protocol) + return self.database.provides_file_set_for_protocol(self.protocol) def all_files(self, groups = None): - """Returns all File objects of the database for the current protocol. If the current protocol is 'None' (a string), None (NoneType) will be used instead""" - files = self.database.objects(protocol = self.protocol if self.protocol != 'None' else None, groups = groups, **self.all_files_options) - return self.sort(files) + """all_files(groups=None) -> files + + Returns all files of the database, respecting the current protocol. + The files can be limited using the ``all_files_options`` in the constructor. + + **Keyword Arguments:** + + groups : some of ``('world', 'dev', 'eval')`` or ``None`` + The groups to get the data for. + If ``None``, data for all groups is returned. + + **Returns:** + + files : [:py:class:`bob.db.verification.utils.File`] + The sorted and unique list of all files of the database. + """ + return self.sort(self.database.objects(protocol = self.protocol, groups = groups, **self.all_files_options)) def training_files(self, step = None, arrange_by_client = False): - """Returns all training File objects of the database for the current protocol.""" + """training_files(step = None, arrange_by_client = False) -> files + + Returns all training files for the given step, and arranges them by client, if desired, respecting the current protocol. + The files for the steps can be limited using the ``..._training_options`` defined in the constructor. + + **Keyword Arguments:** + + step : one of ``('train_extractor', 'train_projector', 'train_enroller')`` or ``None`` + The step for which the training data should be returned. + Might be ignored in derived class implementations. + + arrange_by_client : bool + Should the training files be arranged by client? + + .. note:: + You can use :py:func:`arrange_by_client` in derived class implementations to arrange the files. + + **Returns:** + + files : [:py:class:`bob.db.verification.utils.File`] or [[:py:class:`bob.db.verification.utils.File`]] + The (arranged) list of files used for the training of the given step. + """ if step is None: training_options = self.all_files_options elif step == 'train_extractor': @@ -139,34 +204,104 @@ class DatabaseBob (Database): return files def test_files(self, groups = ['dev']): - """Returns the test files (i.e., enrollment and probe files) for the given groups.""" + """test_files(groups = ['dev']) -> files + + Returns all test files (i.e., files used for enrollment and probing) for the given groups, respecting the current protocol. + The files for the steps can be limited using the ``all_files_options`` defined in the constructor. + + **Keyword Arguments:** + + groups : some of ``('dev', 'eval')`` + The groups to get the data for. + + **Returns:** + + files : [:py:class:`bob.db.verification.utils.File`] + The sorted and unique list of test files of the database. + """ return self.sort(self.database.test_files(protocol = self.protocol, groups = groups, **self.all_files_options)) def model_ids(self, group = 'dev'): - """Returns the model ids for the given group and the current protocol.""" - if hasattr(self.database, 'model_ids'): - return sorted(self.database.model_ids(protocol = self.protocol, groups = group)) - else: - return sorted([model.id for model in self.database.models(protocol = self.protocol, groups = group)]) + """model_ids(group = 'dev') -> ids + + Returns a list of model ids for the given group, respecting the current protocol. + + **Keyword Arguments:** + + group : one of ``('dev', 'eval')`` + The group to get the model ids for. + + **Returns:** + + ids : [int] or [str] + The list of (unique) model ids for the given group. + """ + return sorted(self.database.model_ids(protocol = self.protocol, groups = group)) def client_id_from_model_id(self, model_id, group = 'dev'): - """Returns the client id for the given model id.""" - if hasattr(self.database, 'get_client_id_from_model_id'): - return self.database.get_client_id_from_model_id(model_id) - else: - return model_id + """client_id_from_model_id(model_id, group = 'dev') -> client_id + + Uses :py:meth:`bob.db.verification.utils.Database.get_client_id_from_model_id` to retrieve the client id for the given model id. + + **Keyword Arguments:** + + model_id : int or str + A unique ID that identifies the model for the client. + + group : one of ``('dev', 'eval')`` + The group to get the client ids for. + + **Returns:** + + client_id : [int] or [str] + A unique ID that identifies the client, to which the model belongs. + """ + return self.database.get_client_id_from_model_id(model_id) def enroll_files(self, model_id, group = 'dev'): - """Returns the list of enrollment File objects for the given model id.""" - files = self.database.objects(protocol = self.protocol, groups = group, model_ids = (model_id,), purposes = 'enroll', **self.all_files_options) - return self.sort(files) + """enroll_files(model_id, group = 'dev') -> files + + Returns a list of File objects that should be used to enroll the model with the given model id from the given group, respecting the current protocol. + + **Keyword Arguments:** + + model_id : int or str + A unique ID that identifies the model. + + group : one of ``('dev', 'eval')`` + The group to get the enrollment files for. + + **Returns:** + + files : [:py:class:`bob.db.verification.utils.File`] + The list of files used for to enroll the model with the given model id. + """ + return self.sort(self.database.objects(protocol = self.protocol, groups = group, model_ids = (model_id,), purposes = 'enroll', **self.all_files_options)) def probe_files(self, model_id = None, group = 'dev'): - """Returns the list of probe File objects (for the given model id, if given).""" - if model_id: + """probe_files(model_id = None, group = 'dev') -> files + + Returns a list of probe File objects, respecting the current protocol. + If a ``model_id`` is specified, only the probe files that should be compared with the given model id are returned (for most databases, these are all probe files of the given group). + Otherwise, all probe files of the given group are returned. + + **Keyword Arguments:** + + model_id : int or str or ``None`` + A unique ID that identifies the model. + + group : one of ``('dev', 'eval')`` + The group to get the enrollment files for. + + **Returns:** + + files : [:py:class:`bob.db.verification.utils.File`] + The list of files used for to probe the model with the given model id. + """ + if model_id is not None: files = self.database.objects(protocol = self.protocol, groups = group, model_ids = (model_id,), purposes = 'probe', **self.all_files_options) else: files = self.database.objects(protocol = self.protocol, groups = group, purposes = 'probe', **self.all_files_options) @@ -174,7 +309,24 @@ class DatabaseBob (Database): def probe_file_sets(self, model_id = None, group = 'dev'): - """Returns the list of probe File objects (for the given model id, if given).""" + """probe_file_sets(model_id = None, group = 'dev') -> files + + Returns a list of probe FileSet objects, respecting the current protocol. + If a ``model_id`` is specified, only the probe files that should be compared with the given model id are returned (for most databases, these are all probe files of the given group). + Otherwise, all probe files of the given group are returned. + + **Keyword Arguments:** + + model_id : int or str or ``None`` + A unique ID that identifies the model. + + group : one of ``('dev', 'eval')`` + The group to get the enrollment files for. + + **Returns:** + + files : [:py:class:`FileSet`] or something similar + The list of file sets used to probe the model with the given model id.""" if model_id: file_sets = self.database.object_sets(protocol = self.protocol, groups = group, model_ids = (model_id,), purposes = 'probe', **self.all_files_options) else: @@ -183,63 +335,174 @@ class DatabaseBob (Database): def annotations(self, file): - """Returns the annotations for the given File object, if available.""" + """annotations(file) -> annots + + Returns the annotations for the given File object, if available. + + **Keyword Parameters:** + + file : :py:class:`bob.db.verification.utils.File` + The file for which annotations should be returned. + + **Returns:** + + annots : dict or None + The annotations for the file, if available. + """ return self.database.annotations(file) def original_file_names(self, files): - """Returns the full path of the original data of the given File objects.""" + """original_file_names(files) -> paths + + Returns the full path of the original data of the given File objects, as returned by :py:meth:`bob.db.verification.utils.Database.original_file_names`. + + **Keyword Parameters:** + + files : [:py:class:`bob.db.verification.utils.File`] + The list of file object to retrieve the original data file names for. + + **Returns:** + + paths : [str] or [[str]] + The paths extracted for the files, in the same order. + If this database provides file sets, a list of lists of file names is returned, one sub-list for each file set. + """ return self.database.original_file_names(files, self.check_existence) class DatabaseBobZT (DatabaseBob, DatabaseZT): - """This class can be used whenever you have a database that follows the default Bob database interface defining file lists for ZT score normalization.""" + """This class can be used whenever you have a database that follows the Bob ZT-norm verification database interface, which is defined in :py:class:`bob.db.verification.utils.ZTDatabase`. + + **Keyword Parameters:** + + database : derivative of :py:class:`bob.db.verification.utils.ZTDatabase` + The database instance (such as a :py:class:`bob.db.mobio.Database`) that provides the actual interface, see :ref:`verification_databases` for a list. + + z_probe_options : dict + Dictionary of options passed to the :py:meth:`bob.db.verification.utils.ZTDatabase.z_objects` database query when retrieving files for Z-probing. + + kwargs : ``key=value`` pairs + The arguments of the :py:class:`DatabaseBob` base class constructor. + + .. note:: Usually, the ``name``, ``protocol``, ``training_depends_on_protocol`` and ``models_depend_on_protocol`` keyword parameters of the :py:class:`Database` base class constructor need to be specified. + """ def __init__( self, + database, z_probe_options = {}, # Limit the z-probes **kwargs ): +# assert isinstance(database, bob.db.verification.utils.ZTDatabase) // fails in tests # call base class constructor, passing all the parameters to it - DatabaseBob.__init__(self, z_probe_options = z_probe_options, **kwargs) + DatabaseBob.__init__(self, database = database, z_probe_options = z_probe_options, **kwargs) - self.m_z_probe_options = z_probe_options + self.z_probe_options = z_probe_options def all_files(self, groups = ['dev']): - """Returns all File objects of the database for the current protocol. If the current protocol is 'None' (a string), None (NoneType) will be used instead""" - files = self.database.objects(protocol = self.protocol if self.protocol != 'None' else None, groups = groups, **self.all_files_options) + """all_files(groups=None) -> files + + Returns all files of the database, including those for ZT norm, respecting the current protocol. + The files can be limited using the ``all_files_options`` and the the ``z_probe_options`` in the constructor. + + **Keyword Arguments:** + + groups : some of ``('world', 'dev', 'eval')`` or ``None`` + The groups to get the data for. + If ``None``, data for all groups is returned. + + **Returns:** + + files : [:py:class:`bob.db.verification.utils.File`] + The sorted and unique list of all files of the database. + """ + files = self.database.objects(protocol = self.protocol, groups = groups, **self.all_files_options) # add all files that belong to the ZT-norm for group in groups: if group == 'world': continue - files += self.database.tobjects(protocol = self.protocol if self.protocol != 'None' else None, groups = group, model_ids = None) - files += self.database.zobjects(protocol = self.protocol if self.protocol != 'None' else None, groups = group, **self.m_z_probe_options) + files += self.database.tobjects(protocol = self.protocol, groups = group, model_ids = None) + files += self.database.zobjects(protocol = self.protocol, groups = group, **self.z_probe_options) return self.sort(files) def t_model_ids(self, group = 'dev'): - """Returns the T-Norm model ids for the given group and the current protocol.""" - if hasattr(self.database, 'tmodel_ids'): - return sorted(self.database.tmodel_ids(protocol = self.protocol, groups = group)) - else: - return sorted([model.id for model in self.database.tmodels(protocol = self.protocol, groups = group)]) + """t_model_ids(group = 'dev') -> ids + Returns a list of model ids of T-Norm models for the given group, respecting the current protocol. - def t_enroll_files(self, model_id, group = 'dev'): - """Returns the list of enrollment File objects for the given T-Norm model id.""" - files = self.database.tobjects(protocol = self.protocol, groups = group, model_ids = (model_id,)) - return self.sort(files) + **Keyword Arguments:** + + group : one of ``('dev', 'eval')`` + The group to get the model ids for. + + **Returns:** + + ids : [int] or [str] + The list of (unique) model ids for T-Norm models of the given group. + """ + return sorted(self.database.t_model_ids(protocol = self.protocol, groups = group)) + + + def t_enroll_files(self, t_model_id, group = 'dev'): + """t_enroll_files(t_model_id, group = 'dev') -> files + + Returns a list of File objects that should be used to enroll the T-Norm model with the given model id from the given group, respecting the current protocol. + + **Keyword Arguments:** + + t_model_id : int or str + A unique ID that identifies the model. + + group : one of ``('dev', 'eval')`` + The group to get the enrollment files for. + + **Returns:** + + files : [:py:class:`bob.db.verification.utils.File`] + The sorted list of files used for to enroll the model with the given model id. + """ + return self.sort(self.database.t_enroll_files(protocol = self.protocol, groups = group, model_id = t_model_id)) def z_probe_files(self, group = 'dev'): - """Returns the list of Z-probe File objects.""" - files = self.database.zobjects(protocol = self.protocol, groups = group, **self.m_z_probe_options) + """z_probe_files(group = 'dev') -> files + + Returns a list of probe files used to compute the Z-Norm, respecting the current protocol. + The Z-probe files can be limited using the ``z_probe_options`` in the query to :py:meth:`bob.db.verification.utils.ZTDatabase.zobjects` + + **Keyword Arguments:** + + group : one of ``('dev', 'eval')`` + The group to get the Z-norm probe files for. + + **Returns:** + + files : [:py:class:`bob.db.verification.utils.File`] + The unique list of files used to compute the Z-norm. + """ + files = self.database.z_probe_files(protocol = self.protocol, groups = group, **self.z_probe_options) return self.sort(files) def z_probe_file_sets(self, group = 'dev'): - """Returns the list of Z-probe Fileset objects.""" - file_sets = self.database.zobject_sets(protocol = self.protocol, groups = group, **self.m_z_probe_options) + """z_probe_file_sets(group = 'dev') -> files + + Returns a list of probe FileSet objects used to compute the Z-Norm. + The Z-probe files can be limited using the ``z_probe_options`` in the query to + + **Keyword Arguments:** + + group : one of ``('dev', 'eval')`` + The group to get the Z-norm probe files for. + + **Returns:** + + files : [:py:class:`FileSet`] or similar + The unique list of file sets used to compute the Z-norm. + """ + file_sets = self.database.z_probf_file_sets(protocol = self.protocol, groups = group, **self.z_probe_options) return self.sort(file_sets) diff --git a/bob/bio/base/database/DatabaseFileList.py b/bob/bio/base/database/DatabaseFileList.py index a4234425..5655fc53 100644 --- a/bob/bio/base/database/DatabaseFileList.py +++ b/bob/bio/base/database/DatabaseFileList.py @@ -54,7 +54,7 @@ class DatabaseFileList (DatabaseBobZT): if group == 'world': continue if self.database.implements_zt(protocol = self.protocol if self.protocol != 'None' else None, groups = group): files += self.database.tobjects(protocol = self.protocol if self.protocol != 'None' else None, groups = group, model_ids = None) - files += self.database.zobjects(protocol = self.protocol if self.protocol != 'None' else None, groups = group, **self.m_z_probe_options) + files += self.database.zobjects(protocol = self.protocol if self.protocol != 'None' else None, groups = group, **self.z_probe_options) return self.sort(files) @@ -75,7 +75,7 @@ class DatabaseFileList (DatabaseBobZT): def client_id_from_t_model_id(self, t_model_id, group = 'dev'): """Returns the client id for the given T-model id.""" - return self.database.get_client_id_from_tmodel_id(t_model_id, groups=group, protocol=self.protocol if self.protocol != 'None' else None) + return self.database.get_client_id_from_tmodel_id(t_model_id, groups = group, protocol = self.protocol if self.protocol != 'None' else None) def t_model_ids(self, group = 'dev'): diff --git a/bob/bio/base/extractor/Extractor.py b/bob/bio/base/extractor/Extractor.py index 47da218a..9f878f52 100644 --- a/bob/bio/base/extractor/Extractor.py +++ b/bob/bio/base/extractor/Extractor.py @@ -25,15 +25,20 @@ class Extractor: """This is the base class for all feature extractors. It defines the minimum requirements that a derived feature extractor class need to implement. - The constructor takes two parameters: + If your derived class requires training, please register this here. + + **Keyword Arguments** requires_training : bool Set this flag to ``True`` if your feature extractor needs to be trained. - In that case, please override the :py:meth:`train` and :py:meth:`load` methods + In that case, please override the :py:func:`train` and :py:func:`load` methods split_training_data_by_client : bool Set this flag to ``True`` if your feature extractor requires the training data to be split by clients. Ignored, if ``requires_training`` is ``False`` + + kwargs : ``key=value`` pairs + A list of keyword arguments to be written in the :py:func:`__str__` function. """ def __init__( @@ -54,15 +59,35 @@ class Extractor: ############################################################ def __call__(self, data): - """This function will actually perform the feature extraction. + """__call__(data) -> feature + + This function will actually perform the feature extraction. It must be overwritten by derived classes. It takes the (preprocessed) data and returns the features extracted from the data. + + **Keyword Arguments** + + data : object (usually :py:class:`numpy.ndarray`) + The *preprocessed* data from which features should be extracted. + + **Returns:** + + feature : object (usually :py:class:`numpy.ndarray`) + The extracted feature. """ raise NotImplementedError("Please overwrite this function in your derived class") def __str__(self): - """This function returns a string containing all parameters of this class (and its derived class).""" + """__str__() -> info + + This function returns all parameters of this class (and its derived class). + + **Returns:** + + info : str + A string containing the full information of all parameters of this (and the derived) class. + """ return "%s(%s)" % (str(self.__class__), ", ".join(["%s=%s" % (key, value) for key,value in self._kwargs.items() if value is not None])) @@ -74,6 +99,14 @@ class Extractor: """Writes the given *extracted* feature to a file with the given name. In this base class implementation, we simply use :py:func:`bob.bio.base.save` for that. If you have a different format, please overwrite this function. + + **Keyword Arguments:** + + feature : object + The extracted feature, i.e., what is returned from :py:func:`__call__`. + + feature_file : str or :py:class:`bob.io.base.HDF5File` + The file open for writing, or the name of the file to write. """ utils.save(feature, feature_file) @@ -82,26 +115,47 @@ class Extractor: """Reads the *extracted* feature from file. In this base class implementation, it uses :py:func:`bob.bio.base.load` to do that. If you have different format, please overwrite this function. + + **Keyword Arguments:** + + feature_file : str or :py:class:`bob.io.base.HDF5File` + The file open for reading or the name of the file to read from. + + **Returns:** + + feature : object (usually :py:class:`numpy.ndarray`) + The feature read from file. """ return utils.load(feature_file) def load(self, extractor_file): """Loads the parameters required for feature extraction from the extractor file. - This function usually is only useful in combination with the 'train' function (see below). + This function usually is only useful in combination with the :py:func:`train` function. In this base class implementation, it does nothing. + + **Keyword Arguments:** + + extractor_file : str + The file to read the extractor from. """ pass - def train(self, data_list, extractor_file): + def train(self, training_data, extractor_file): """This function can be overwritten to train the feature extractor. If you do this, please also register the function by calling this base class constructor - and enabling the training by 'requires_training = True'. + and enabling the training by ``requires_training = True``. + + **Keyword Arguments:** - The training function gets two parameters: + training_data : [object] or [[object]] + A list of *preprocessed* data that can be used for training the extractor. + Data will be provided in a single list, if ``split_training_features_by_client = False`` was specified in the constructor, + otherwise the data will be split into lists, each of which contains the data of a single (training-)client. - - data_list: A list of data that can be used for training the extractor. - - extractor_file: The file to write. This file should be readable with the 'load' function (see above). + extractor_file : str + The file to write. + This file should be readable with the :py:func:`load` function. """ raise NotImplementedError("Please overwrite this function in your derived class, or unset the 'requires_training' option in the constructor.") diff --git a/bob/bio/base/extractor/Linearize.py b/bob/bio/base/extractor/Linearize.py index ab3585b3..30109495 100644 --- a/bob/bio/base/extractor/Linearize.py +++ b/bob/bio/base/extractor/Linearize.py @@ -21,7 +21,10 @@ from .Extractor import Extractor import numpy class Linearize (Extractor): - """Extracts features by simply concatenating all pixels of the data into one long vector""" + """Extracts features by simply concatenating all elements of the data into one long vector. + + If a ``dtype`` is specified in the contructor, it is assured that the resulting + """ def __init__(self, dtype=None): """If the ``dtype`` parameter is given, it specifies the data type that is enforced for the features.""" @@ -29,8 +32,28 @@ class Linearize (Extractor): self.dtype = dtype def __call__(self, data): - """Takes data of arbitrary dimensions and linearizes it into a 1D vector; enforcing the data type, if desired""" + """__call__(data) -> data + + Takes data of arbitrary dimensions and linearizes it into a 1D vector; enforcing the data type, if desired. + + **Keyword Parameters:** + + data : :py:class:`numpy.ndarray` + The preprocessed data to be transformed into one vector. + + **Returns:** + + data : 1D :py:class:`numpy.ndarray` + The extracted feature vector, of the desired ``dtype`` (if specified). + """ + assert isinstance(data, numpy.ndarray) + linear = numpy.reshape(data, data.size) if self.dtype is not None: linear = linear.astype(self.dtype) return linear + + + # re-define unused functions, just so that they do not get documented + def train(): raise NotImplementedError() + def load(): raise NotImplementedError() diff --git a/bob/bio/base/grid.py b/bob/bio/base/grid.py index 424d3925..06acf2f8 100644 --- a/bob/bio/base/grid.py +++ b/bob/bio/base/grid.py @@ -33,13 +33,41 @@ PREDEFINED_QUEUES = { } class Grid: - """This class is defining the options that are required to submit parallel jobs to the SGE grid. + """This class is defining the options that are required to submit parallel jobs to the SGE grid, or jobs to the local queue. + + If the given ``grid`` is ``'sge'`` (the default), this configuration is set up to submit algorithms to the SGE grid. + In this setup, specific SGE queues can be specified for different steps of the tool chain, and different numbers of parallel processes can be specified for each step. + Currently, only the SGE at Idiap_ is tested and supported, for other SGE's we do not assure compatibility. + + If the given ``grid`` is ``'local'``, this configuration is set up to run using a local scheduler on a single machine. + In this case, only the ``number_of_parallel_processes`` and ``scheduler_sleep_time`` options will be taken into account. + + **Keyword Parameters:** + + grid_type : one of ``('sge', 'local')`` + The type of submission system, which should be used. + Currently, only sge and local submissions are supported. + + number_of_preprocessing_jobs, number_of_extraction_jobs, number_of_projection_jobs, number_of_enrollment_jobs, number_of_scoring_jobs : int + Only valid if ``grid = 'sge'``. + The number of parallel processes that should be executed for preprocessing, extraction, projection, enrollment or scoring. + + training_queue, preprocessing_queue, extraction_queue, projection_queue, enrollment_queue, scoring_queue : str or dict + SGE queues that should be used for training, preprocessing, extraction, projection, enrollment or scoring. + The queue can be defined using a dictionary of keywords that will directly passed to the :py:func:`gridtk.tools.qsub` function, or one of our :py:data:`PREDEFINED_QUEUES`, which are adapted for Idiap_. + + number_of_parallel_processes : int + Only valid if ``grid = 'local'``. + The number of parallel processes, with which the preprocessing, extraction, projection, enrollment and scoring should be executed. + + scheduler_sleep_time : float + The time (in seconds) that the local scheduler will sleep between its iterations. """ def __init__( self, # grid type, currently supported 'local' and 'sge' - grid = 'sge', + grid_type = 'sge', # parameters for the splitting of jobs into array jobs; ignored by the local scheduler number_of_preprocessing_jobs = 32, number_of_extraction_jobs = 32, @@ -60,7 +88,7 @@ class Grid: scheduler_sleep_time = 1.0 # sleep time for scheduler in seconds ): - self.grid_type = grid + self.grid_type = grid_type # the numbers if self.is_local(): @@ -90,7 +118,13 @@ class Grid: def queue(self, params): - """Helper function to translate the given queue parameters to grid options.""" + """queue(params) -> dict + + This helper function translates the given queue parameters to grid options. + When the given ``params`` are a dictionary already, they are simply returned. + If ``params`` is a string, the :py:data:`PREDEFINED_QUEUES` are indexed with them. + If ``params`` is ``None``, or the :py:attr:`grid_type` is ``'local'``, an empty dictionary is returned. + """ if self.is_local(): return {} if isinstance(params, str) and params in PREDEFINED_QUEUES: diff --git a/bob/bio/base/preprocessor/Preprocessor.py b/bob/bio/base/preprocessor/Preprocessor.py index bad93597..0db9292e 100644 --- a/bob/bio/base/preprocessor/Preprocessor.py +++ b/bob/bio/base/preprocessor/Preprocessor.py @@ -26,6 +26,11 @@ from .. import utils class Preprocessor: """This is the base class for all preprocessors. It defines the minimum requirements for all derived proprocessor classes. + + **Keyword Arguments:** + + kwargs : ``key=value`` pairs + A list of keyword arguments to be written in the :py:func:`__str__` function. """ def __init__(self, **kwargs): @@ -37,21 +42,38 @@ class Preprocessor: # The call function (i.e. the operator() in C++ terms) def __call__(self, data, annotations): - """This is the call function that you have to overwrite in the derived class. + """__call__(data, annotations) -> dara + + This is the call function that you have to overwrite in the derived class. The parameters that this function will receive are: + **Keyword Parameters:** + data : object - The data that needs preprocessing, usually a :py:class:`numpy.ndarray`, but might be different + The original data that needs preprocessing, usually a :py:class:`numpy.ndarray`, but might be different. annotations : {} or None - The annotations (if any), as a dictionary. - The type + The annotations (if any) that belongs to the given ``data``; as a dictionary. + The type of the annotation depends on your kind of problem. + + **Returns:** + + data : object + The *preprocessed* data, usually a :py:class:`numpy.ndarray`, but might be different. """ raise NotImplementedError("Please overwrite this function in your derived class") def __str__(self): - """This function returns a string containing all parameters of this class (and its derived class).""" + """__str__() -> info + + This function returns all parameters of this class (and its derived class). + + **Returns:** + + info : str + A string containing the full information of all parameters of this (and the derived) class. + """ return "%s(%s)" % (str(self.__class__), ", ".join(["%s=%s" % (key, value) for key,value in self._kwargs.items() if value is not None])) ############################################################ @@ -59,24 +81,56 @@ class Preprocessor: ############################################################ def read_original_data(self, original_file_name): - """Reads the *original* data (usually something like an image) from file. - In this base class implementation, it uses ``bob.io.base.load`` to do that. + """read_original_data(original_file_name) -> data + + Reads the *original* data (usually something like an image) from file. + In this base class implementation, it uses :py:func:`bob.io.base.load` to do that. If you have different format, please overwrite this function. + + **Keyword Arguments:** + + original_file_name : str + The file name to read the original data from. + + **Returns:** + + data : object (usually :py:class:`numpy.ndarray`) + The original data read from file. """ return bob.io.base.load(original_file_name) def write_data(self, data, data_file): """Writes the given *preprocessed* data to a file with the given name. - In this base class implementation, we simply use :py:func:`bob.bio.base.utils.save` for that. + In this base class implementation, we simply use :py:func:`bob.bio.base.save` for that. If you have a different format (e.g. not images), please overwrite this function. + + **Keyword Arguments:** + + data : object + The preprocessed data, i.e., what is returned from :py:func:`__call__`. + + data_file : str or :py:class:`bob.io.base.HDF5File` + The file open for writing, or the name of the file to write. """ utils.save(data, data_file) def read_data(self, data_file): - """Reads the *preprocessed* data from file. - In this base class implementation, it uses :py:func:`bob.bio.base.utils.load` to do that. + """read_data(data_file) -> data + + Reads the *preprocessed* data from file. + In this base class implementation, it uses :py:func:`bob.bio.base.load` to do that. If you have different format, please overwrite this function. + + **Keyword Arguments:** + + data_file : str or :py:class:`bob.io.base.HDF5File` + The file open for reading or the name of the file to read from. + + **Returns:** + + data : object (usually :py:class:`numpy.ndarray`) + The preprocessed data read from file. """ return utils.load(data_file) diff --git a/bob/bio/base/test/dummy/database.py b/bob/bio/base/test/dummy/database.py index 748b5123..90c440ea 100644 --- a/bob/bio/base/test/dummy/database.py +++ b/bob/bio/base/test/dummy/database.py @@ -25,8 +25,8 @@ class DummyDatabase (DatabaseBobZT): def t_model_ids(self, group = 'dev'): return self.model_ids(group) - def t_enroll_files(self, model_id, group = 'dev'): - return self.enroll_files(model_id, group) + def t_enroll_files(self, t_model_id, group = 'dev'): + return self.enroll_files(t_model_id, group) def z_probe_files(self, group = 'dev'): return self.probe_files(None, group) diff --git a/bob/bio/base/test/test_databases.py b/bob/bio/base/test/test_databases.py index 4b899657..8691421b 100644 --- a/bob/bio/base/test/test_databases.py +++ b/bob/bio/base/test/test_databases.py @@ -51,7 +51,7 @@ def test_verification_filelist(): _check_files(db1.enroll_files(model_id=22), db2.enroll_files(model_id='22')) _check_files(db1.probe_files(model_id=22), db2.probe_files(model_id='22')) - _check_files(db1.t_enroll_files(model_id=22), db2.t_enroll_files(model_id='22')) + _check_files(db1.t_enroll_files(t_model_id=22), db2.t_enroll_files(t_model_id='22')) _check_files(db1.z_probe_files(), db2.z_probe_files()) f1 = db1.all_files()[0] diff --git a/bob/bio/base/test/test_scripts.py b/bob/bio/base/test/test_scripts.py index 8e000648..87a727f5 100644 --- a/bob/bio/base/test/test_scripts.py +++ b/bob/bio/base/test/test_scripts.py @@ -135,7 +135,7 @@ def test_verify_parallel(): '-s', 'test_parallel', '--temp-directory', test_dir, '--result-directory', test_dir, - '-g', 'bob.bio.base.grid.Grid(grid = "local", number_of_parallel_processes = 2, scheduler_sleep_time = 0.1)', '-G', test_database, '--run-local-scheduler', '--stop-on-failure', + '-g', 'bob.bio.base.grid.Grid(grid_type = "local", number_of_parallel_processes = 2, scheduler_sleep_time = 0.1)', '-G', test_database, '--run-local-scheduler', '--stop-on-failure', '--import', 'bob.io.image' ] diff --git a/bob/bio/base/tools/FileSelector.py b/bob/bio/base/tools/FileSelector.py index f99ad166..6bc5c8ec 100644 --- a/bob/bio/base/tools/FileSelector.py +++ b/bob/bio/base/tools/FileSelector.py @@ -142,9 +142,9 @@ class FileSelector: """Returns the sorted list of T-Norm-model ids from the given group.""" return sorted(self.database.t_model_ids(group = group)) - def t_enroll_files(self, model_id, group, directory_type): + def t_enroll_files(self, t_model_id, group, directory_type): """Returns the list of T-norm model files used for enrollment of the given model_id from the given group.""" - files = self.database.t_enroll_files(group = group, model_id = model_id) + files = self.database.t_enroll_files(group = group, t_model_id = t_model_id) return self.get_paths(files, directory_type) def t_model_file(self, model_id, group): diff --git a/doc/conf.py b/doc/conf.py index 63ed152c..054b83c3 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -242,12 +242,50 @@ man_pages = [ # Default processing flags for sphinx autoclass_content = 'both' autodoc_member_order = 'bysource' -autodoc_default_flags = ['members', 'undoc-members', 'inherited-members', 'show-inheritance'] +autodoc_default_flags = ['members', 'inherited-members', 'show-inheritance'] # For inter-documentation mapping: from bob.extension.utils import link_documentation -intersphinx_mapping = link_documentation(['python', 'numpy', 'bob.io.base', 'bob.db.verification.utils', 'bob.bio.face', 'bob.bio.speaker', 'bob.bio.gmm', 'bob.bio.video', 'bob.bio.csu']) - +intersphinx_mapping = link_documentation(['python', 'numpy', 'bob.io.base', 'bob.db.verification.utils', 'bob.bio.face', 'bob.bio.speaker', 'bob.bio.gmm', 'bob.bio.video', 'bob.bio.csu', 'gridtk']) + + +def skip(app, what, name, obj, skip, options): + # Do not skip the __call__ and the __str__ functions as we have special implementations for them. + if name in ("__str__", "__call__"): + return False + return skip + +# getting dictionaries printed nicely. +# see: http://stackoverflow.com/questions/7250659/python-code-to-generate-part-of-sphinx-documentation-is-it-possible/18143318#18143318 +try: + from StringIO import StringIO +except ImportError: + from io import StringIO + +from sphinx.util.compat import Directive +from docutils import nodes, statemachine + +class ExecDirective(Directive): + """Execute the specified python code and insert the output into the document""" + has_content = True + + def run(self): + oldStdout, sys.stdout = sys.stdout, StringIO() + + tab_width = self.options.get('tab-width', self.state.document.settings.tab_width) + source = self.state_machine.input_lines.source(self.lineno - self.state_machine.input_offset - 1) + + try: + exec('\n'.join(self.content)) + text = sys.stdout.getvalue() + lines = statemachine.string2lines(text, tab_width, convert_whitespace=True) + self.state_machine.insert_input(lines, source) + return [] + except Exception: + return [nodes.error(None, nodes.paragraph(text = "Unable to execute python code at %s:%d:" % (os.path.basename(source), self.lineno)), nodes.paragraph(text = str(sys.exc_info()[1])))] + finally: + sys.stdout = oldStdout def setup(app): - pass + app.connect("autodoc-skip-member", skip) + app.add_directive('exec', ExecDirective) diff --git a/doc/implementation.rst b/doc/implementation.rst index d0dcd02d..972c9a72 100644 --- a/doc/implementation.rst +++ b/doc/implementation.rst @@ -25,7 +25,7 @@ This will assure that all parameters of the experiments are stored into the ``Ex .. note:: All tools are based on reading, processing and writing files. By default, any type of file is allowed to be handled, and file names are provided to the ``read_...`` and ``write_...`` functions as strings. - However, some of the extensions -- particularly the :ref:`bob.bio.video <bob.bio.video>` extension -- requires the read and write functions to handle HDF5 files. + However, some of the extensions -- particularly the :ref:`bob.bio.video <bob.bio.video>` extension -- requires the read and write functions to handle files of type :py:class:`bob.io.base.HDF5File`. If you plan to write your own tools, please assure that you are following the following structure. diff --git a/doc/implemented.rst b/doc/implemented.rst index a07bc48a..5290419c 100644 --- a/doc/implemented.rst +++ b/doc/implemented.rst @@ -32,4 +32,19 @@ Grid Configuration .. automodule:: bob.bio.base.grid +.. data:: PREDEFINED_QUEUES + + A dictionary of predefined queue keywords, which are adapted to the Idiap_ SGE. + + + .. adapted from http://stackoverflow.com/a/29789910/3301902 to ge a nice dictionary content view + + .. exec:: + import json + from bob.bio.base.grid import PREDEFINED_QUEUES + json_obj = json.dumps(PREDEFINED_QUEUES, sort_keys=True, indent=2) + json_obj = json_obj.replace("\n", "\n ") + print ('.. code-block:: JavaScript\n\n PREDEFINED_QUEUES = %s\n\n' % json_obj) + + .. include:: links.rst diff --git a/doc/index.rst b/doc/index.rst index ab0da819..0619eb6f 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -20,9 +20,10 @@ To design a biometric recognition experiment, one has to choose: For any of these parts, several different types are implemented in the ``bob.bio`` packages, and basically any combination of the five parts can be executed. For each type, several meta-parameters can be tested. This results in a nearly infinite amount of possible experiments that can be run using the current setup. -But it is also possible to use your own database, preprocessing, feature type, or biometric recognition algorithm and test this against the baseline algorithms implemented in the our packages. +But it is also possible to use your own database, preprocessor, feature extractor, or biometric recognition algorithm and test this against the baseline algorithms implemented in the our packages. -The ``bob.bio`` packages derived from the former `FaceRecLib <http://pypi.python.org/pypi/facereclib>`__, which is herewith outdated. +.. note:: + The ``bob.bio`` packages are derived from the former `FaceRecLib <http://pypi.python.org/pypi/facereclib>`__, which is herewith outdated. This package :py:mod:`bob.bio.base` includes the basic definition of a biometric recognition experiment, as well as a generic script, which can execute the full biometric experiment in a single command line. Changing the employed tolls such as the database, protocol, preprocessor, feature extractor or recognition algorithm is as simple as changing a command line parameter. @@ -50,9 +51,6 @@ Users Guide installation experiments implementation - implemented - py_api -.. evaluate ================ Reference Manual @@ -61,11 +59,8 @@ Reference Manual .. toctree:: :maxdepth: 2 - manual_databases - manual_preprocessors - manual_features - manual_tools - manual_utils + implemented + py_api ToDo-List -- GitLab