diff --git a/bob/bio/base/algorithm/Algorithm.py b/bob/bio/base/algorithm/Algorithm.py index 1e687d431496c40ac26635cc192d9e98b8c88fb9..ab73d596a13909df3e1f5751adb0dc78048c11a6 100644 --- a/bob/bio/base/algorithm/Algorithm.py +++ b/bob/bio/base/algorithm/Algorithm.py @@ -29,7 +29,7 @@ class Algorithm: If your derived algorithm performs feature projection, please register this here. If it needs training for the projector or the enroller, please set this here, too. - **Keyword Arguments:** + **Parameters:** performs_projection : bool Set to ``True`` if your derived algorithm performs a projection. @@ -108,7 +108,7 @@ class Algorithm: It must be overwritten by derived classes, as soon as ``performs_projection = True`` was set in the constructor. It is assured that the :py:meth:`load_projector` was called once before the ``project`` function is executed. - **Keyword Arguments:** + **Parameters:** feature : object The feature to be projected. @@ -129,7 +129,7 @@ class Algorithm: This function will enroll and return the model from the given list of features. It must be overwritten by derived classes. - **Keyword Arguments:** + **Parameters:** enroll_features : [object] A list of features used for the enrollment of one model. @@ -150,7 +150,7 @@ class Algorithm: This function will compute the score between the given model and probe. It must be overwritten by derived classes. - **Keyword Arguments:** + **Parameters:** model : object The model to compare the probe with. @@ -177,7 +177,7 @@ class Algorithm: and fuses the scores using the fusion method specified in the constructor of this class. Usually this function is called from derived class :py:meth:`score` functions. - **Keyword Arguments:** + **Parameters:** models : [object] A list of model objects. @@ -205,7 +205,7 @@ class Algorithm: In this base class implementation, it computes the scores for each probe file using the :py:meth:`score` method, and fuses the scores using the fusion method specified in the constructor of this class. - **Keyword Arguments:** + **Parameters:** model : object A model object to compare the probes with. @@ -241,7 +241,7 @@ class Algorithm: Please register 'performs_projection = True' in the constructor to enable this function. - **Keyword Arguments:** + **Parameters:** feature : object A feature as returned by the :py:meth:`project` function, which should be written. @@ -261,7 +261,7 @@ class Algorithm: Please register ``performs_projection = True`` in the constructor to enable this function. - **Keyword Arguments:** + **Parameters:** feature_file : str or :py:class:`bob.io.base.HDF5File` The file open for reading, or the file name to read from. @@ -284,7 +284,7 @@ class Algorithm: If you have a different format, please overwrite this function. - **Keyword Arguments:** + **Parameters:** model : object A model as returned by the :py:meth:`enroll` function, which should be written. @@ -303,7 +303,7 @@ class Algorithm: If you have a different format, please overwrite this function. - **Keyword Arguments:** + **Parameters:** model_file : str or :py:class:`bob.io.base.HDF5File` The file open for reading, or the file name to read from. @@ -325,7 +325,7 @@ class Algorithm: If your algorithm requires different behavior, please overwrite this function. - **Keyword Arguments:** + **Parameters:** probe_file : str or :py:class:`bob.io.base.HDF5File` The file open for reading, or the file name to read from. @@ -344,7 +344,7 @@ class Algorithm: If you do this, please also register the function by calling this base class constructor and enabling the training by ``requires_projector_training = True``. - **Keyword Arguments:** + **Parameters:** training_features : [object] or [[object]] A list of *extracted* features that can be used for training the projector. @@ -365,7 +365,7 @@ class Algorithm: Please register `performs_projection = True` in the constructor to enable this function. - **Keyword Arguments:** + **Parameters:** projector_file : str The file to read the projector from. @@ -378,7 +378,7 @@ class Algorithm: If you do this, please also register the function by calling this base class constructor and enabling the training by ``require_enroller_training = True``. - **Keyword Arguments:** + **Parameters:** training_features : [object] or [[object]] A list of *extracted* features that can be used for training the projector. @@ -396,7 +396,7 @@ class Algorithm: This function is always called **after** calling :py:meth:`load_projector`. In this base class implementation, it does nothing. - **Keyword Arguments:** + **Parameters:** enroller_file : str The file to read the enroller from. diff --git a/bob/bio/base/algorithm/PCA.py b/bob/bio/base/algorithm/PCA.py index 47e0492359cb0d400af9dc52972c6b1558a3f34a..78281fe0da29d2b894a31ef00058f71b89b85235 100644 --- a/bob/bio/base/algorithm/PCA.py +++ b/bob/bio/base/algorithm/PCA.py @@ -14,7 +14,28 @@ import logging logger = logging.getLogger("bob.bio.base") class PCA (Algorithm): - """Performs PCA on the given data""" + """Performs PCA on the given data. + + This algorithm computes a PCA projection (:py:class:`bob.learn.linear.PCATrainer`) on the given training features, projects the features to face space and computes the distance of two projected features in face space. + For eaxmple, the eigenface algorithm as proposed by [TP91]_ can be run with this class. + + **Parameters:** + + subspace_dimension : int or float + If specified as ``int``, defines the number of eigenvectors used in the PCA projection matrix. + If specified as ``float`` (between 0 and 1), the number of eigenvectors is calculated such that the given percentage of variance is kept. + + distance_function : function + A function taking two parameters and returns a float. + If ``uses_variances`` is set to ``True``, the function is provided with a third parameter, which is the vector of variances (aka. eigenvalues). + + is_distance_function : bool + Set this flag to ``False`` if the given ``distance_function`` computes a similarity value (i.e., higher values are better) + + use_variances : bool + If set to ``True``, the ``distance_function`` is provided with a third argument, which is the vector of variances (aka. eigenvalues). + + """ def __init__( self, @@ -25,8 +46,7 @@ class PCA (Algorithm): **kwargs # parameters directly sent to the base class ): - """Initializes the PCA Algorithm with the given setup""" - # call base class constructor and register that the tool performs a projection + # call base class constructor and register that the algorithm performs a projection Algorithm.__init__( self, performs_projection = True, @@ -56,7 +76,16 @@ class PCA (Algorithm): def train_projector(self, training_features, projector_file): - """Generates the PCA covariance matrix""" + """Generates the PCA covariance matrix and writes it into the given projector_file. + + **Parameters:** + + training_features : [1D :py:class:`numpy.ndarray`] + A list of 1D training arrays (vectors) to train the PCA projection matrix with. + + projector_file : str + A writable file, into which the PCA projection matrix (as a :py:class:`bob.learn.linear.Machine`) and the eigenvalues will be written. + """ # Assure that all data are 1D [self._check_feature(feature) for feature in training_features] @@ -89,7 +118,13 @@ class PCA (Algorithm): def load_projector(self, projector_file): - """Reads the PCA projection matrix from file""" + """Reads the PCA projection matrix and the eigenvalues from file. + + **Parameters:** + + projector_file : str + An existing file, from which the PCA projection matrix and the eigenvalues are read. + """ # read PCA projector f = bob.io.base.HDF5File(projector_file) self.variances = f.read("Eigenvalues") @@ -98,14 +133,40 @@ class PCA (Algorithm): def project(self, feature): - """Projects the data using the stored covariance matrix""" + """project(feature) -> projected + + Projects the given feature into eigenspace. + + **Parameters:** + + feature : 1D :py:class:`numpy.ndarray` + The 1D feature to be projected. + + **Returns:** + + projected : 1D :py:class:`numpy.ndarray` + The ``feature`` projected into eigenspace. + """ self._check_feature(feature) # Projects the data return self.machine(feature) def enroll(self, enroll_features): - """Enrolls the model by storing all given input vectors""" + """enroll(enroll_features) -> model + + Enrolls the model by storing all given input vectors. + + **Parameters:** + + enroll_features : [1D :py:class:`numpy.ndarray`] + The list of projected features to enroll the model from. + + **Returns:** + + model : 2D :py:class:`numpy.ndarray` + The enrolled model. + """ assert len(enroll_features) [self._check_feature(feature, True) for feature in enroll_features] # just store all the features @@ -113,7 +174,24 @@ class PCA (Algorithm): def score(self, model, probe): - """Computes the distance of the model to the probe using the distance function""" + """score(model, probe) -> float + + Computes the distance of the model to the probe using the distance function specified in the constructor. + + **Parameters:** + + model : 2D :py:class:`numpy.ndarray` + The model storing all enrollment features. + + probe : 1D :py:class:`numpy.ndarray` + The probe feature vector in eigenspace. + + **Returns:** + + score : float + A similarity value between ``model`` and ``probe`` + + """ self._check_feature(probe, True) # return the negative distance (as a similarity measure) if len(model.shape) == 2: diff --git a/bob/bio/base/database/Database.py b/bob/bio/base/database/Database.py index 35d54a9c70cc5172e8e12ebc284fc2a2cbfdcf1e..cb9cc675149c819307f025922e467fbd5cd41e04 100644 --- a/bob/bio/base/database/Database.py +++ b/bob/bio/base/database/Database.py @@ -3,7 +3,7 @@ class Database: Please use this class as a base class for your database access classes. Do not forget to call the constructor of this base class in your derived class. - **Keyword Arguments:** + **Parameters:** name : str A unique name for the database. @@ -94,7 +94,7 @@ class Database: Returns a sorted version of the given list of File's (or other structures that define an 'id' data member). The files will be sorted according to their id, and duplicate entries will be removed. - **Keyword Parameters:** + **Parameters:** files : [:py:class:`File`] The list of files to be uniquified and sorted. @@ -116,7 +116,7 @@ class Database: Arranges the given list of files by client id. This function returns a list of lists of File's. - **Keyword Parameters:** + **Parameters:** files : :py:class:`File` A list of files that should be split up by :py:attr:`File.client_id`. @@ -144,7 +144,7 @@ class Database: Returns the annotations for the given File object, if available. It uses :py:func:`bob.db.verification.utils.read_annotation_file` to load the annotations. - **Keyword Parameters:** + **Parameters:** file : :py:class:`File` The file for which annotations should be returned. @@ -177,7 +177,7 @@ class Database: Returns the full path of the given File objects. - **Keyword Parameters:** + **Parameters:** files : [:py:class:`File`] The list of file object to retrieve the file names for. @@ -207,7 +207,7 @@ class Database: Returns the full path of the original data of the given File objects. - **Keyword Parameters:** + **Parameters:** files : [:py:class:`File`] The list of file object to retrieve the original data file names for. @@ -233,7 +233,7 @@ class Database: Returns all files of the database. This function needs to be implemented in derived class implementations. - **Keyword Arguments:** + **Parameters:** groups : some of ``('world', 'dev', 'eval')`` or ``None`` The groups to get the data for. @@ -253,7 +253,7 @@ class Database: Returns all training File objects for the given step, and arranges them by client, if desired. This function needs to be implemented in derived class implementations. - **Keyword Arguments:** + **Parameters:** step : one of ``('train_extractor', 'train_projector', 'train_enroller')`` or ``None`` The step for which the training data should be returned. @@ -279,7 +279,7 @@ class Database: Returns a list of model ids for the given group. This function needs to be implemented in derived class implementations. - **Keyword Arguments:** + **Parameters:** group : one of ``('dev', 'eval')`` The group to get the model ids for. @@ -300,7 +300,7 @@ class Database: This function converts the given model id into its according the client id. This function needs to be implemented in derived class implementations. - **Keyword Arguments:** + **Parameters:** model_id : int or str A unique ID that identifies the model for the client. @@ -322,7 +322,7 @@ class Database: Returns a list of File objects that should be used to enroll the model with the given model id from the given group. This function needs to be implemented in derived class implementations. - **Keyword Arguments:** + **Parameters:** model_id : int or str A unique ID that identifies the model. @@ -346,7 +346,7 @@ class Database: Otherwise, all probe files of the given group are returned. This function needs to be implemented in derived class implementations. - **Keyword Arguments:** + **Parameters:** model_id : int or str or ``None`` A unique ID that identifies the model. @@ -370,7 +370,7 @@ class Database: Otherwise, all probe files of the given group are returned. This function needs to be implemented in derived class implementations, if the :py:meth:`uses_probe_file_sets` returns ``True``. - **Keyword Arguments:** + **Parameters:** model_id : int or str or ``None`` A unique ID that identifies the model. @@ -397,7 +397,7 @@ class DatabaseZT (Database): Returns a list of model ids of T-Norm models for the given group. This function needs to be implemented in derived class implementations. - **Keyword Arguments:** + **Parameters:** group : one of ``('dev', 'eval')`` The group to get the model ids for. @@ -411,11 +411,13 @@ class DatabaseZT (Database): def client_id_from_t_model_id(self, t_model_id, group = 'dev'): - """Returns the client id for the given T-Norm model id. + """client_id_from_t_model_id(t_model_id, group = 'dev') -> client_id + + Returns the client id for the given T-Norm model id. In this base class implementation, we just use the :py:meth:`client_id_from_model_id` function. Overload this function if you need another behavior. - **Keyword Arguments:** + **Parameters:** t_model_id : int or str A unique ID that identifies the T-Norm model. @@ -436,7 +438,7 @@ class DatabaseZT (Database): Returns a list of File objects that should be used to enroll the T-Norm model with the given model id from the given group. This function needs to be implemented in derived class implementations. - **Keyword Arguments:** + **Parameters:** t_model_id : int or str A unique ID that identifies the model. @@ -457,7 +459,7 @@ class DatabaseZT (Database): Returns a list of probe File objects used to compute the Z-Norm. This function needs to be implemented in derived class implementations. - **Keyword Arguments:** + **Parameters:** group : one of ``('dev', 'eval')`` The group to get the Z-norm probe files for. @@ -475,7 +477,7 @@ class DatabaseZT (Database): Returns a list of probe FileSet objects used to compute the Z-Norm. This function needs to be implemented in derived class implementations. - **Keyword Arguments:** + **Parameters:** group : one of ``('dev', 'eval')`` The group to get the Z-norm probe files for. diff --git a/bob/bio/base/database/DatabaseBob.py b/bob/bio/base/database/DatabaseBob.py index 2e70e84c18e01168039e6b2e80309e727a236abf..f6c36cfeabac1a1463fbcb2cb4698365ffab4bde 100644 --- a/bob/bio/base/database/DatabaseBob.py +++ b/bob/bio/base/database/DatabaseBob.py @@ -6,7 +6,7 @@ import bob.db.verification.utils class DatabaseBob (Database): """This class can be used whenever you have a database that follows the Bob verification database interface, which is defined in :py:class:`bob.db.verification.utils.Database` - **Keyword Parameter:** + **Parameters:** database : derivative of :py:class:`bob.db.verification.utils.Database` The database instance (such as a :py:class:`bob.db.atnt.Database`) that provides the actual interface, see :ref:`verification_databases` for a list. @@ -104,7 +104,7 @@ class DatabaseBob (Database): If no annotation files are available (e.g. when they are stored inside the ``database``), the annotation directory can be left out. - **Keyword Parameter:** + **Parameters:** replacements : dict or str A dictionary with replacements, or a name of a file to read the dictionary from. @@ -149,7 +149,7 @@ class DatabaseBob (Database): Returns all files of the database, respecting the current protocol. The files can be limited using the ``all_files_options`` in the constructor. - **Keyword Arguments:** + **Parameters:** groups : some of ``('world', 'dev', 'eval')`` or ``None`` The groups to get the data for. @@ -169,7 +169,7 @@ class DatabaseBob (Database): Returns all training files for the given step, and arranges them by client, if desired, respecting the current protocol. The files for the steps can be limited using the ``..._training_options`` defined in the constructor. - **Keyword Arguments:** + **Parameters:** step : one of ``('train_extractor', 'train_projector', 'train_enroller')`` or ``None`` The step for which the training data should be returned. @@ -207,7 +207,7 @@ class DatabaseBob (Database): Returns all test files (i.e., files used for enrollment and probing) for the given groups, respecting the current protocol. The files for the steps can be limited using the ``all_files_options`` defined in the constructor. - **Keyword Arguments:** + **Parameters:** groups : some of ``('dev', 'eval')`` The groups to get the data for. @@ -224,7 +224,7 @@ class DatabaseBob (Database): Returns a list of model ids for the given group, respecting the current protocol. - **Keyword Arguments:** + **Parameters:** group : one of ``('dev', 'eval')`` The group to get the model ids for. @@ -242,7 +242,7 @@ class DatabaseBob (Database): Uses :py:meth:`bob.db.verification.utils.Database.get_client_id_from_model_id` to retrieve the client id for the given model id. - **Keyword Arguments:** + **Parameters:** model_id : int or str A unique ID that identifies the model for the client. @@ -263,7 +263,7 @@ class DatabaseBob (Database): Returns a list of File objects that should be used to enroll the model with the given model id from the given group, respecting the current protocol. - **Keyword Arguments:** + **Parameters:** model_id : int or str A unique ID that identifies the model. @@ -286,7 +286,7 @@ class DatabaseBob (Database): If a ``model_id`` is specified, only the probe files that should be compared with the given model id are returned (for most databases, these are all probe files of the given group). Otherwise, all probe files of the given group are returned. - **Keyword Arguments:** + **Parameters:** model_id : int or str or ``None`` A unique ID that identifies the model. @@ -313,7 +313,7 @@ class DatabaseBob (Database): If a ``model_id`` is specified, only the probe files that should be compared with the given model id are returned (for most databases, these are all probe files of the given group). Otherwise, all probe files of the given group are returned. - **Keyword Arguments:** + **Parameters:** model_id : int or str or ``None`` A unique ID that identifies the model. @@ -337,7 +337,7 @@ class DatabaseBob (Database): Returns the annotations for the given File object, if available. - **Keyword Parameters:** + **Parameters:** file : :py:class:`bob.db.verification.utils.File` The file for which annotations should be returned. @@ -355,7 +355,7 @@ class DatabaseBob (Database): Returns the full path of the original data of the given File objects, as returned by :py:meth:`bob.db.verification.utils.Database.original_file_names`. - **Keyword Parameters:** + **Parameters:** files : [:py:class:`bob.db.verification.utils.File`] The list of file object to retrieve the original data file names for. @@ -373,7 +373,7 @@ class DatabaseBob (Database): class DatabaseBobZT (DatabaseBob, DatabaseZT): """This class can be used whenever you have a database that follows the Bob ZT-norm verification database interface, which is defined in :py:class:`bob.db.verification.utils.ZTDatabase`. - **Keyword Parameters:** + **Parameters:** database : derivative of :py:class:`bob.db.verification.utils.ZTDatabase` The database instance (such as a :py:class:`bob.db.mobio.Database`) that provides the actual interface, see :ref:`verification_databases` for a list. @@ -406,7 +406,7 @@ class DatabaseBobZT (DatabaseBob, DatabaseZT): Returns all files of the database, including those for ZT norm, respecting the current protocol. The files can be limited using the ``all_files_options`` and the the ``z_probe_options`` in the constructor. - **Keyword Arguments:** + **Parameters:** groups : some of ``('world', 'dev', 'eval')`` or ``None`` The groups to get the data for. @@ -432,7 +432,7 @@ class DatabaseBobZT (DatabaseBob, DatabaseZT): Returns a list of model ids of T-Norm models for the given group, respecting the current protocol. - **Keyword Arguments:** + **Parameters:** group : one of ``('dev', 'eval')`` The group to get the model ids for. @@ -450,7 +450,7 @@ class DatabaseBobZT (DatabaseBob, DatabaseZT): Returns a list of File objects that should be used to enroll the T-Norm model with the given model id from the given group, respecting the current protocol. - **Keyword Arguments:** + **Parameters:** t_model_id : int or str A unique ID that identifies the model. @@ -472,7 +472,7 @@ class DatabaseBobZT (DatabaseBob, DatabaseZT): Returns a list of probe files used to compute the Z-Norm, respecting the current protocol. The Z-probe files can be limited using the ``z_probe_options`` in the query to :py:meth:`bob.db.verification.utils.ZTDatabase.z_probe_files` - **Keyword Arguments:** + **Parameters:** group : one of ``('dev', 'eval')`` The group to get the Z-norm probe files for. @@ -492,7 +492,7 @@ class DatabaseBobZT (DatabaseBob, DatabaseZT): Returns a list of probe FileSet objects used to compute the Z-Norm. The Z-probe files can be limited using the ``z_probe_options`` in the query to - **Keyword Arguments:** + **Parameters:** group : one of ``('dev', 'eval')`` The group to get the Z-norm probe files for. diff --git a/bob/bio/base/database/DatabaseFileList.py b/bob/bio/base/database/DatabaseFileList.py index b95126830b659d37bf6043fbab1166acb6b548ab..e78bf9465633f8295cb6f539e4d160b6614ec55c 100644 --- a/bob/bio/base/database/DatabaseFileList.py +++ b/bob/bio/base/database/DatabaseFileList.py @@ -19,24 +19,27 @@ from .DatabaseBob import DatabaseBobZT +import bob.db.verification.filelist class DatabaseFileList (DatabaseBobZT): - """This class should be used whenever you have an :py:class:`bob.db.verification.filelist.Database`.""" + """This class can be used whenever you have a database that uses the Bob filelist database interface, which is defined in :py:class:`bob.db.verification.filelist.Database` + + **Parameters:** + + database : a :py:class:`bob.db.verification.filelist.Database` + The database instance that provides the actual interface. + + kwargs : ``key=value`` pairs + The arguments of the :py:class:`DatabaseBobZT` or :py:class:`DatabaseBob` base class constructors. + + .. note:: Usually, the ``name``, ``protocol``, ``training_depends_on_protocol`` and ``models_depend_on_protocol`` keyword parameters of the base class constructor need to be specified. + """ def __init__( self, database, # The bob database that is used **kwargs # The default parameters of the base class ): - """ - Parameters of the constructor of this database: - - database : :py:class:`bob.db.verification.filelist.Database` - The database that provides the actual interface - - kwargs - Keyword arguments directly passed to the :py:class:`DatabaseBobZT` base class constructor - """ DatabaseBobZT.__init__( self, @@ -44,40 +47,122 @@ class DatabaseFileList (DatabaseBobZT): **kwargs ) + assert isinstance(database, bob.db.verification.filelist.Database) + def all_files(self, groups = ['dev']): - """Returns all File objects of the database for the current protocol. If the current protocol is 'None' (a string), None (NoneType) will be used instead""" - files = self.database.objects(protocol = self.protocol if self.protocol != 'None' else None, groups = groups, **self.all_files_options) + """all_files(groups=None) -> files + + Returns all files of the database, respecting the current protocol. + If the current protocol is ``'None'``, ``None`` will be used instead. + When the underlying file list database provides files for ZT score normalization, these files are returned as well. + The files can be limited using the ``all_files_options`` in the constructor. + + **Parameters:** + + groups : some of ``('world', 'dev', 'eval')`` or ``None`` + The groups to get the data for. + If ``None``, data for all groups is returned. + + **Returns:** + + files : [:py:class:`bob.db.verification.filelist.File`] + The sorted and unique list of all files of the database. + """ + protocol = self.protocol if self.protocol != 'None' else None + files = self.database.objects(protocol = protocol, groups = groups, **self.all_files_options) # add all files that belong to the ZT-norm for group in groups: if group == 'world': continue - if self.database.implements_zt(protocol = self.protocol if self.protocol != 'None' else None, groups = group): - files += self.database.tobjects(protocol = self.protocol if self.protocol != 'None' else None, groups = group, model_ids = None) - files += self.database.zobjects(protocol = self.protocol if self.protocol != 'None' else None, groups = group, **self.z_probe_options) + if self.database.implements_zt(protocol = protocol, groups = group): + files += self.database.tobjects(protocol = protocol, groups = group, model_ids = None) + files += self.database.zobjects(protocol = protocol, groups = group, **self.z_probe_options) return self.sort(files) def uses_probe_file_sets(self): - """Defines if, for the current protocol, the database uses several probe files to generate a score.""" + """File sets are not (yet) supported in the :py:class:`bob.db.verification.filelist.Database`, so this function returns ``False`` throughout.""" return False def model_ids(self, group = 'dev'): - """Returns the model ids for the given group and the current protocol.""" + """model_ids(group = 'dev') -> ids + + Returns a list of model ids for the given group, respecting the current protocol. + If the current protocol is ``'None'``, ``None`` will be used instead. + + **Parameters:** + + group : one of ``('dev', 'eval')`` + The group to get the model ids for. + + **Returns:** + + ids : [str] + The list of (unique) model ids for the given group. + """ return sorted(self.database.model_ids(protocol = self.protocol if self.protocol != 'None' else None, groups = group)) def client_id_from_model_id(self, model_id, group = 'dev'): - """Returns the client id for the given model id.""" + """client_id_from_model_id(model_id, group = 'dev') -> client_id + + Uses :py:meth:`bob.db.verification.filelist.Database.get_client_id_from_model_id` to retrieve the client id for the given model id. + If the current protocol is ``'None'``, ``None`` will be used instead. + + **Parameters:** + + model_id : str + A unique ID that identifies the model for the client. + + group : one of ``('dev', 'eval')`` + The group to get the client ids for. + + **Returns:** + + client_id : str + A unique ID that identifies the client, to which the model belongs. + """ return self.database.get_client_id_from_model_id(model_id, groups = group, protocol = self.protocol if self.protocol != 'None' else None) def client_id_from_t_model_id(self, t_model_id, group = 'dev'): - """Returns the client id for the given T-model id.""" + """client_id_from_t_model_idt_(model_id, group = 'dev') -> client_id + + Uses :py:meth:`bob.db.verification.filelist.Database.get_client_id_from_t_model_id` to retrieve the client id for the T-norm given model id. + If the current protocol is ``'None'``, ``None`` will be used instead. + + **Parameters:** + + t_model_id : str + A unique ID that identifies the T-Norm model. + + group : one of ``('dev', 'eval')`` + The group to get the client ids for. + + **Returns:** + + client_id : str + A unique ID that identifies the client, to which the T-Norm model belongs. + """ return self.database.get_client_id_from_tmodel_id(t_model_id, groups = group, protocol = self.protocol if self.protocol != 'None' else None) def t_model_ids(self, group = 'dev'): - """Returns the T-Norm model ids for the given group and the current protocol.""" + """t_model_ids(group = 'dev') -> ids + + Returns a list of model ids of T-Norm models for the given group, respecting the current protocol. + If the current protocol is ``'None'``, ``None`` will be used instead. + + **Parameters:** + + group : one of ``('dev', 'eval')`` + The group to get the model ids for. + + **Returns:** + + ids : [int] or [str] + The list of (unique) model ids for T-Norm models of the given group. + """ return sorted(self.database.tmodel_ids(protocol = self.protocol if self.protocol != 'None' else None, groups = group)) diff --git a/bob/bio/base/extractor/Extractor.py b/bob/bio/base/extractor/Extractor.py index 726957421d37bc83a8e96eab841186be2fdf96cb..83ee6c6d6097afec3d366d8155b93af3b5302af9 100644 --- a/bob/bio/base/extractor/Extractor.py +++ b/bob/bio/base/extractor/Extractor.py @@ -27,7 +27,7 @@ class Extractor: If your derived class requires training, please register this here. - **Keyword Arguments** + **Parameters** requires_training : bool Set this flag to ``True`` if your feature extractor needs to be trained. @@ -65,7 +65,7 @@ class Extractor: It must be overwritten by derived classes. It takes the (preprocessed) data and returns the features extracted from the data. - **Keyword Arguments** + **Parameters** data : object (usually :py:class:`numpy.ndarray`) The *preprocessed* data from which features should be extracted. @@ -100,7 +100,7 @@ class Extractor: In this base class implementation, we simply use :py:func:`bob.bio.base.save` for that. If you have a different format, please overwrite this function. - **Keyword Arguments:** + **Parameters:** feature : object The extracted feature, i.e., what is returned from :py:meth:`__call__`. @@ -116,7 +116,7 @@ class Extractor: In this base class implementation, it uses :py:func:`bob.bio.base.load` to do that. If you have different format, please overwrite this function. - **Keyword Arguments:** + **Parameters:** feature_file : str or :py:class:`bob.io.base.HDF5File` The file open for reading or the name of the file to read from. @@ -134,7 +134,7 @@ class Extractor: This function usually is only useful in combination with the :py:meth:`train` function. In this base class implementation, it does nothing. - **Keyword Arguments:** + **Parameters:** extractor_file : str The file to read the extractor from. @@ -147,7 +147,7 @@ class Extractor: If you do this, please also register the function by calling this base class constructor and enabling the training by ``requires_training = True``. - **Keyword Arguments:** + **Parameters:** training_data : [object] or [[object]] A list of *preprocessed* data that can be used for training the extractor. diff --git a/bob/bio/base/extractor/Linearize.py b/bob/bio/base/extractor/Linearize.py index 30109495be8940fa60628012dbc4e277c4ec739f..d6ff09beb85cda107e5b8182e627729e6a8891b6 100644 --- a/bob/bio/base/extractor/Linearize.py +++ b/bob/bio/base/extractor/Linearize.py @@ -36,7 +36,7 @@ class Linearize (Extractor): Takes data of arbitrary dimensions and linearizes it into a 1D vector; enforcing the data type, if desired. - **Keyword Parameters:** + **Parameters:** data : :py:class:`numpy.ndarray` The preprocessed data to be transformed into one vector. diff --git a/bob/bio/base/grid.py b/bob/bio/base/grid.py index d9ae9c2add40fa3a0f6c734e7cb937a85b5c03d1..f11fbd3bc0868554c2b6b30594b60ad1f4bd2ae3 100644 --- a/bob/bio/base/grid.py +++ b/bob/bio/base/grid.py @@ -42,7 +42,7 @@ class Grid: If the given ``grid_type`` is ``'local'``, this configuration is set up to run using a local scheduler on a single machine. In this case, only the ``number_of_parallel_processes`` and ``scheduler_sleep_time`` options will be taken into account. - **Keyword Parameters:** + **Parameters:** grid_type : one of ``('sge', 'local')`` The type of submission system, which should be used. diff --git a/bob/bio/base/preprocessor/Preprocessor.py b/bob/bio/base/preprocessor/Preprocessor.py index 6e70ef3f34bb867e93415af24696f3ee0e19041a..a01026a72a3ed2f976b0b75e37d1a005af05afa2 100644 --- a/bob/bio/base/preprocessor/Preprocessor.py +++ b/bob/bio/base/preprocessor/Preprocessor.py @@ -27,7 +27,7 @@ class Preprocessor: """This is the base class for all preprocessors. It defines the minimum requirements for all derived proprocessor classes. - **Keyword Arguments:** + **Parameters:** kwargs : ``key=value`` pairs A list of keyword arguments to be written in the :py:meth:`__str__` function. @@ -47,7 +47,7 @@ class Preprocessor: This is the call function that you have to overwrite in the derived class. The parameters that this function will receive are: - **Keyword Parameters:** + **Parameters:** data : object The original data that needs preprocessing, usually a :py:class:`numpy.ndarray`, but might be different. @@ -87,7 +87,7 @@ class Preprocessor: In this base class implementation, it uses :py:func:`bob.io.base.load` to do that. If you have different format, please overwrite this function. - **Keyword Arguments:** + **Parameters:** original_file_name : str The file name to read the original data from. @@ -105,7 +105,7 @@ class Preprocessor: In this base class implementation, we simply use :py:func:`bob.bio.base.save` for that. If you have a different format (e.g. not images), please overwrite this function. - **Keyword Arguments:** + **Parameters:** data : object The preprocessed data, i.e., what is returned from :py:meth:`__call__`. @@ -123,7 +123,7 @@ class Preprocessor: In this base class implementation, it uses :py:func:`bob.bio.base.load` to do that. If you have different format, please overwrite this function. - **Keyword Arguments:** + **Parameters:** data_file : str or :py:class:`bob.io.base.HDF5File` The file open for reading or the name of the file to read from. diff --git a/bob/bio/base/script/grid_search.py b/bob/bio/base/script/grid_search.py index 62379c5f195cee599b2cb3402bde3cb412e1639f..5a5131a98ff8c123bf8332595296cb6958358863 100755 --- a/bob/bio/base/script/grid_search.py +++ b/bob/bio/base/script/grid_search.py @@ -70,22 +70,22 @@ def command_line_options(command_line_parameters): parser.add_argument('-l', '--parallel', type=int, help = 'Run the algorithms in parallel on the local machine, using the given number of parallel threads') - parser.add_argument('-L', '--gridtk-database-split-level', type=int, default=-1, + parser.add_argument('-L', '--gridtk-database-split-level', metavar='LEVEL', type=int, default=-1, help = 'Split the gridtk databases after the following level -1 - never split; 0 - preprocess; 1 - extract; 2 -- project; 3 -- enroll; 4 -- score;') - parser.add_argument('-x', '--executable', + parser.add_argument('-x', '--executable', metavar='X', help = '(optional) The executable to be executed instead of bob/bio/base/verify.py (taken *always* from bob.bio.base, not from the bin directory)') - parser.add_argument('-R', '--result-directory', default = os.path.join("/idiap/user", os.environ["USER"]), + parser.add_argument('-R', '--result-directory', metavar='DIR', help = 'The directory where to write the resulting score files to.') - parser.add_argument('-T', '--temp-directory', default = os.path.join("/idiap/temp", os.environ["USER"]), + parser.add_argument('-T', '--temp-directory', metavar='DIR', help = 'The directory where to write temporary files into.') - parser.add_argument('-i', '--preprocessed-directory', + parser.add_argument('-i', '--preprocessed-directory', metavar='DIR', help = '(optional) The directory where to read the already preprocessed data from (no preprocessing is performed in this case).') - parser.add_argument('-G', '--gridtk-database-directory', default = 'grid_db', + parser.add_argument('-G', '--gridtk-database-directory', metavar='DIR', default = 'grid_db', help = 'Directory where the submitted.sql3 files should be written into (will create sub-directories on need)') parser.add_argument('-w', '--write-commands', @@ -101,7 +101,7 @@ def command_line_options(command_line_parameters): help = 'Use the given variable instead of the "replace" keyword in the configuration file') parser.add_argument('parameters', nargs = argparse.REMAINDER, - help = "Parameters directly passed to the verify script. Use -- to separate this parameters from the parameters of this script. See 'bin/verify.py --help' for a complete list of options.") + help = "Parameters directly passed to the verify.py script. Use -- to separate this parameters from the parameters of this script. See './bin/verify.py --help' for a complete list of options.") bob.core.log.add_command_line_option(parser) @@ -109,6 +109,14 @@ def command_line_options(command_line_parameters): args = parser.parse_args(command_line_parameters) bob.core.log.set_verbosity_level(logger, args.verbose) + # set base directories + is_idiap = os.path.isdir("/idiap") + if args.temp_directory is None: + args.temp_directory = "/idiap/temp/%s/grid_search" % os.environ["USER"] if is_idiap else "temp/grid_search" + if args.result_directory is None: + args.result_directory = "/idiap/user/%s/grid_search" % os.environ["USER"] if is_idiap else "results/grid_search" + + if args.executable: global verify verify = __import__('importlib').import_module(args.executable) diff --git a/bob/bio/base/script/verify.py b/bob/bio/base/script/verify.py index 869e88a29e1b3494f5fee7e47f1b30a6adb823fc..861706efc1b7f4cde9be5d01cd404e07bd4a9be6 100644 --- a/bob/bio/base/script/verify.py +++ b/bob/bio/base/script/verify.py @@ -324,7 +324,8 @@ def execute(args): indices = tools.indices(fs.model_ids(args.group), None if args.grid is None else args.grid.number_of_scoring_jobs), groups = [args.group], types = [args.score_type], - force = args.force) + force = args.force, + write_compressed = args.write_compressed_score_files) elif args.score_type in ['C', 'D']: tools.compute_scores( @@ -333,22 +334,27 @@ def execute(args): indices = tools.indices(fs.t_model_ids(args.group), None if args.grid is None else args.grid.number_of_scoring_jobs), groups = [args.group], types = [args.score_type], - force = args.force) + force = args.force, + write_compressed = args.write_compressed_score_files) else: - tools.zt_norm(groups = [args.group]) + tools.zt_norm( + groups = [args.group], + write_compressed = args.write_compressed_score_files) # concatenate elif args.sub_task == 'concatenate': tools.concatenate( args.zt_norm, - groups = [args.group]) + groups = [args.group], + write_compressed = args.write_compressed_score_files) # calibrate scores elif args.sub_task == 'calibrate': tools.calibrate( - norms = ['nonorm', 'ztnorm'] if args.zt_norm else ['nonorm'], - groups = args.groups) + args.zt_norm, + groups = args.groups, + write_compressed = args.write_compressed_score_files) # Test if the keyword was processed else: diff --git a/bob/bio/base/tools/FileSelector.py b/bob/bio/base/tools/FileSelector.py index 6bc5c8ec1a0308605c58351ec4b30b0753dd29d6..0dc71caa39db05b7abb4fcac0efc20e11a164237 100644 --- a/bob/bio/base/tools/FileSelector.py +++ b/bob/bio/base/tools/FileSelector.py @@ -7,7 +7,53 @@ from .. import utils @utils.Singleton class FileSelector: - """This class provides shortcuts for selecting different files for different stages of the verification process""" + """This class provides shortcuts for selecting different files for different stages of the verification process. + + It communicates with the database and provides lists of file names for all steps of the tool chain. + + .. todo:: Find a way the this class' methods get correctly documented, instead of the :py:class:`bob.bio.base.Singleton` wrapper class. + + **Parameters:** + + database : :py:class:`bob.bio.base.database.Database` or derived + The database object that provides the list of files. + + preprocessed_directory : str + The directory, where preprocessed data should be written to. + + extractor_file : str + The filename, where the extractor should be written to (if any). + + extracted_directory : str + The directory, where extracted features should be written to. + + projector_file : str + The filename, where the projector should be written to (if any). + + projected_directory : str + The directory, where projetced features should be written to (if required). + + enroller_file : str + The filename, where the enroller should be written to (if required). + + model_directories : (str, str) + The directories, where models and t-norm models should be written to. + + score_directories : (str, str) + The directories, where score files for no-norm and ZT-norm should be written to. + + zt_score_directories : (str, str, str, str, str) or ``None`` + If given, specify the directories, where intermediate score files required to compute the ZT-norm should be written. + The 5 directories are for 1: normal scores; 2: Z-scores; 3: T-scores; 4: ZT-scores; 5: ZT-samevalue scores. + + default_extension : str + The default extension of all intermediate files. + + compressed_extension : str + The extension for writing compressed score files. + By default, no compression is performed. + + """ def __init__( self, diff --git a/bob/bio/base/tools/algorithm.py b/bob/bio/base/tools/algorithm.py index d19c7c054bb449727bd000ac0943044c40d40b73..b6105d105e5e4191c2bffea251b1140afd5c9828 100644 --- a/bob/bio/base/tools/algorithm.py +++ b/bob/bio/base/tools/algorithm.py @@ -9,99 +9,193 @@ from .extractor import read_features from .. import utils -def train_projector(algorithm, extractor, force=False): - """Train the feature projector with the extracted features of the world group.""" - if algorithm.requires_projector_training: - # the file selector object - fs = FileSelector.instance() - - if utils.check_file(fs.projector_file, force, 1000): - logger.info("- Projection: projector '%s' already exists.", fs.projector_file) - else: - bob.io.base.create_directories_safe(os.path.dirname(fs.projector_file)) - # train projector - logger.info("- Projection: loading training data") - train_files = fs.training_list('extracted', 'train_projector', arrange_by_client = algorithm.split_training_features_by_client) - train_features = read_features(train_files, extractor, algorithm.split_training_features_by_client) - if algorithm.split_training_features_by_client: - logger.info("- Projection: training projector '%s' using %d identities: ", fs.projector_file, len(train_files)) - else: - logger.info("- Projection: training projector '%s' using %d training files: ", fs.projector_file, len(train_files)) +def train_projector(algorithm, extractor, force = False): + """Trains the feature projector using extracted features of the ``'world'`` group, if the algorithm requires projector training. - # perform training - algorithm.train_projector(train_features, fs.projector_file) + This function should only be called, when the ``algorithm`` actually requires projector training. + The projector of the given ``algorithm`` is trained using extracted features. + It writes the projector to the file specified by the :py:class:`bob.bio.base.tools.FileSelector`. + By default, if the target file already exist, it is not re-created. + **Parameters:** + algorithm : py:class:`bob.bio.base.algorithm.Algorithm` or derived + The algorithm, in which the projector should be trained. -def project(algorithm, extractor, groups = None, indices = None, force=False): - """Projects the features for all files of the database.""" - # load the projector file - if algorithm.performs_projection: - # the file selector object - fs = FileSelector.instance() + extractor : py:class:`bob.bio.base.extractor.Extractor` or derived + The extractor, used for reading the training data. - # load the projector - algorithm.load_projector(fs.projector_file) + force : bool + If given, the projector file is regenerated, even if it already exists. + """ + if not algorithm.requires_projector_training: + logger.warn("The train_projector function should not have been called, since the algorithm does not need projector training.") + return - feature_files = fs.feature_list(groups=groups) - projected_files = fs.projected_list(groups=groups) + # the file selector object + fs = FileSelector.instance() - # select a subset of indices to iterate - if indices != None: - index_range = range(indices[0], indices[1]) - logger.info("- Projection: splitting of index range %s", str(indices)) + if utils.check_file(fs.projector_file, force, 1000): + logger.info("- Projection: projector '%s' already exists.", fs.projector_file) + else: + bob.io.base.create_directories_safe(os.path.dirname(fs.projector_file)) + # train projector + logger.info("- Projection: loading training data") + train_files = fs.training_list('extracted', 'train_projector', arrange_by_client = algorithm.split_training_features_by_client) + train_features = read_features(train_files, extractor, algorithm.split_training_features_by_client) + if algorithm.split_training_features_by_client: + logger.info("- Projection: training projector '%s' using %d identities: ", fs.projector_file, len(train_files)) else: - index_range = range(len(feature_files)) + logger.info("- Projection: training projector '%s' using %d training files: ", fs.projector_file, len(train_files)) - logger.info("- Projection: projecting %d features from directory '%s' to directory '%s'", len(index_range), fs.directories['extracted'], fs.directories['projected']) - # extract the features - for i in index_range: - feature_file = str(feature_files[i]) - projected_file = str(projected_files[i]) + # perform training + algorithm.train_projector(train_features, fs.projector_file) - if not utils.check_file(projected_file, force, 1000): - # load feature - feature = extractor.read_feature(feature_file) - # project feature - projected = algorithm.project(feature) - # write it - bob.io.base.create_directories_safe(os.path.dirname(projected_file)) - algorithm.write_feature(projected, projected_file) +def project(algorithm, extractor, groups = None, indices = None, force = False): + """Projects the features for all files of the database. -def train_enroller(algorithm, extractor, force=False): - """Trains the model enroller using the extracted or projected features, depending on your setup of the agorithm.""" - if algorithm.requires_enroller_training: - # the file selector object - fs = FileSelector.instance() + The given ``algorithm`` is used to project all features required for the current experiment. + It writes the projected data into the directory specified by the :py:class:`bob.bio.base.tools.FileSelector`. + By default, if target files already exist, they are not re-created. - if utils.check_file(fs.enroller_file, force, 1000): - logger.info("- Enrollment: enroller '%s' already exists.", fs.enroller_file) - else: - # define the tool that is required to read the features - reader = algorithm if algorithm.use_projected_features_for_enrollment else extractor - bob.io.base.create_directories_safe(os.path.dirname(fs.enroller_file)) + The extractor is only used to load the data in a coherent way. + + **Parameters:** + + algorithm : py:class:`bob.bio.base.algorithm.Algorithm` or derived + The algorithm, used for projecting features and writing them to file. + + extractor : py:class:`bob.bio.base.extractor.Extractor` or derived + The extractor, used for reading the extracted features, which should be projected. + + groups : some of ``('world', 'dev', 'eval')`` or ``None`` + The list of groups, for which the data should be projected. + + indices : (int, int) or None + If specified, only the features for the given index range ``range(begin, end)`` should be projected. + This is usually given, when parallel threads are executed. + + force : bool + If given, files are regenerated, even if they already exist. + """ + if not algorithm.performs_projection: + logger.warn("The project function should not have been called, since the algorithm does not perform projection.") + return + + # the file selector object + fs = FileSelector.instance() + + # load the projector + algorithm.load_projector(fs.projector_file) + + feature_files = fs.feature_list(groups=groups) + projected_files = fs.projected_list(groups=groups) + + # select a subset of indices to iterate + if indices != None: + index_range = range(indices[0], indices[1]) + logger.info("- Projection: splitting of index range %s", str(indices)) + else: + index_range = range(len(feature_files)) + + logger.info("- Projection: projecting %d features from directory '%s' to directory '%s'", len(index_range), fs.directories['extracted'], fs.directories['projected']) + # extract the features + for i in index_range: + feature_file = str(feature_files[i]) + projected_file = str(projected_files[i]) + + if not utils.check_file(projected_file, force, 1000): + # load feature + feature = extractor.read_feature(feature_file) + # project feature + projected = algorithm.project(feature) + # write it + bob.io.base.create_directories_safe(os.path.dirname(projected_file)) + algorithm.write_feature(projected, projected_file) + + + +def train_enroller(algorithm, extractor, force = False): + """Trains the model enroller using the extracted or projected features, depending on your setup of the algorithm. + + This function should only be called, when the ``algorithm`` actually requires enroller training. + The enroller of the given ``algorithm`` is trained using extracted or projected features. + It writes the enroller to the file specified by the :py:class:`bob.bio.base.tools.FileSelector`. + By default, if the target file already exist, it is not re-created. + + **Parameters:** + + algorithm : py:class:`bob.bio.base.algorithm.Algorithm` or derived + The algorithm, in which the enroller should be trained. + It is assured that the projector file is read (if required) before the enroller training is started. + + extractor : py:class:`bob.bio.base.extractor.Extractor` or derived + The extractor, used for reading the training data, if unprojected features are used for enroller training. + + force : bool + If given, the enroller file is regenerated, even if it already exists. + """ + if not algorithm.requires_enroller_training: + logger.warn("The train_enroller function should not have been called, since the algorithm does not need enroller training.") + return + + # the file selector object + fs = FileSelector.instance() + + if utils.check_file(fs.enroller_file, force, 1000): + logger.info("- Enrollment: enroller '%s' already exists.", fs.enroller_file) + else: + # define the tool that is required to read the features + reader = algorithm if algorithm.use_projected_features_for_enrollment else extractor + bob.io.base.create_directories_safe(os.path.dirname(fs.enroller_file)) + + # first, load the projector + algorithm.load_projector(fs.projector_file) + + # load training data + train_files = fs.training_list('projected' if algorithm.use_projected_features_for_enrollment else 'extracted', 'train_enroller', arrange_by_client = True) + logger.info("- Enrollment: loading %d enroller training files", len(train_files)) + train_features = read_features(train_files, reader, True) + + # perform training + logger.info("- Enrollment: training enroller '%s' using %d identities: ", fs.enroller_file, len(train_features)) + algorithm.train_enroller(train_features, fs.enroller_file) + + + +def enroll(algorithm, extractor, compute_zt_norm, indices = None, groups = ['dev', 'eval'], types = ['N', 'T'], force = False): + """Enroll the models for the given groups, eventually for both models and T-Norm-models. + This function uses the extracted or projected features to compute the models, depending on your setup of the given ``algorithm``. + + The given ``algorithm`` is used to enroll all models required for the current experiment. + It writes the models into the directories specified by the :py:class:`bob.bio.base.tools.FileSelector`. + By default, if target files already exist, they are not re-created. + + The extractor is only used to load features in a coherent way. - # first, load the projector - algorithm.load_projector(fs.projector_file) + **Parameters:** - # load training data - train_files = fs.training_list('projected' if algorithm.use_projected_features_for_enrollment else 'extracted', 'train_enroller', arrange_by_client = True) - logger.info("- Enrollment: loading %d enroller training files", len(train_files)) - train_features = read_features(train_files, reader, True) + algorithm : py:class:`bob.bio.base.algorithm.Algorithm` or derived + The algorithm, used for enrolling model and writing them to file. - # perform training - logger.info("- Enrollment: training enroller '%s' using %d identities: ", fs.enroller_file, len(train_features)) - algorithm.train_enroller(train_features, fs.enroller_file) + extractor : py:class:`bob.bio.base.extractor.Extractor` or derived + The extractor, used for reading the extracted features, if the algorithm enrolls models from unprojected data. + compute_zt_norm : bool + If set to ``True`` and `'T'`` is part of the ``types``, also T-norm models are extracted. + indices : (int, int) or None + If specified, only the models for the given index range ``range(begin, end)`` should be enrolled. + This is usually given, when parallel threads are executed. -def enroll(algorithm, extractor, compute_zt_norm, indices = None, groups = ['dev', 'eval'], types = ['N', 'T'], force=False): - """Enroll the models for 'dev' and 'eval' groups, for both models and T-Norm-models. - This function uses the extracted or projected features to compute the models, - depending on your setup of the base class Algorithm.""" + groups : some of ``('dev', 'eval')`` + The list of groups, for which models should be enrolled. + force : bool + If given, files are regenerated, even if they already exist. + """ # the file selector object fs = FileSelector.instance() # read the projector file, if needed diff --git a/bob/bio/base/tools/command_line.py b/bob/bio/base/tools/command_line.py index cfe4ff27a97bec9a34e5325891c703e37878ac4c..c43d251b9fbf14d5014c1c89f24232aaf62fbc99 100644 --- a/bob/bio/base/tools/command_line.py +++ b/bob/bio/base/tools/command_line.py @@ -7,15 +7,36 @@ logger = bob.core.log.setup("bob.bio.base") from .. import utils from . import FileSelector +from .. import database """Execute biometric recognition algorithms on a certain biometric database. """ def command_line_parser(description=__doc__, exclude_resources_from=[]): - """Creates an :py:class:`argparse.ArgumentParser` object that includes the minimum set of command line options. - The description can be overwritten, but has a (small) default. + """command_line_parser(description=__doc__, exclude_resources_from=[]) -> parsers + + Creates an :py:class:`argparse.ArgumentParser` object that includes the minimum set of command line options (which is not so few). + The ``description`` can be overwritten, but has a (small) default. + + Included in the parser, several groups are defined. + Each group specifies a set of command line options. + For the configurations, registered resources are listed, which can be limited by the ``exclude_resources_from`` list of extensions. It returns a dictionary, containing the parser object itself (in the ``'main'`` keyword), and a list of command line groups. + + **Parameters:** + + description : str + The documentation of the script. + + exclude_resources_from : [str] + A list of extension packages, for which resources should not be listed. + + **Returns:** + + parsers : dict + A dictionary of parser groups, with the main parser under the 'main' key. + Feel free to add more options to any of the parser groups. """ parser = argparse.ArgumentParser(description=description, formatter_class=argparse.ArgumentDefaultsHelpFormatter, conflict_handler='resolve') @@ -121,8 +142,34 @@ def command_line_parser(description=__doc__, exclude_resources_from=[]): def initialize(parsers, command_line_parameters = None, skips = []): - """Parses the command line and arranges the arguments accordingly, and returns the arguments. + """initialize(parsers, command_line_parameters = None, skips = []) -> args + + Parses the command line and arranges the arguments accordingly. + Afterward, it loads the resources for the database, preprocessor, extractor, algorithm and grid (if specified), and stores the results into the returned args. + + This function also initializes the :py:class:`FileSelector` instance by arranging the directories and files according to the command line parameters. + If the ``skips`` are given, an '--execute-only' parameter is added to the parser, according skips are selected. + + **Parameters:** + + parsers : dict + The dictionary of command line parsers, as returned from :py:func:`command_line_parser`. + Additional arguments might have been added. + + command_line_parameters : [str] or None + The command line parameters that should be interpreted. + By default, the parameters specified by the user on command line are considered. + + skips : [str] + A list of possible ``--skip-...`` options to be added and evaluated automatically. + + **Returns:** + + args : namespace + A namespace of arguments as read from the command line. + + .. note:: The database, preprocessor, extractor, algorithm and grid (if specified) are actual instances of the according classes. """ # execute-only @@ -185,7 +232,8 @@ def initialize(parsers, command_line_parameters = None, skips = []): model_sub_dir = protocol if args.database.models_depend_on_protocol else enroller_sub_dir # Database directories, which should be automatically replaced - args.database.replace_directories(args.database_directories_file) + if isinstance(args.database, database.DatabaseBob): + args.database.replace_directories(args.database_directories_file) # initialize the file selector FileSelector.create( @@ -208,7 +256,21 @@ def initialize(parsers, command_line_parameters = None, skips = []): def groups(args): - """Checks the groups, for which the files must be preprocessed, and features must be extracted and projected.""" + """groups(args) -> groups + + Returns the groups, for which the files must be preprocessed, and features must be extracted and projected. + This function should be used in order to eliminate the training files (the ``'world'`` group), when no training is required in this experiment. + + **Parameters:** + + args : namespace + The interpreted command line arguments as returned by the :py:func:`initialize` function. + + **Returns:** + + groups : [str] + A list of groups, for which data needs to be treated. + """ groups = args.groups[:] if args.extractor.requires_training or args.algorithm.requires_projector_training or args.algorithm.requires_enroller_training: groups.append('world') @@ -216,7 +278,21 @@ def groups(args): def command_line(cmdline): - """Converts the given options to a string that can be executed on command line.""" + """command_line(cmdline) -> str + + Converts the given options to a string that can be executed in a terminal. + Parameters are enclosed into ``'...'`` quotes so that the command line can interpret them (e.g., if they contain spaces or special characters). + + **Parameters:** + + cmdline : [str] + A list of command line options to be converted into a string. + + **Returns:** + + str : str + The command line string that can be copy-pasted into the terminal. + """ c = "" for cmd in cmdline: if cmd[0] in '/-': @@ -227,6 +303,20 @@ def command_line(cmdline): def write_info(args, command_line_parameters, executable): + """Writes information about the current experimental setup into a file specified on command line. + + **Parameters:** + + args : namespace + The interpreted command line arguments as returned by the :py:func:`initialize` function. + + command_line_parameters : [str] or ``None`` + The command line parameters that have been interpreted. + If ``None``, the parameters specified by the user on command line are considered. + + executable : str + The name of the executable (such as ``'./bin/verify.py'``) that is used to run the experiments. + """ if command_line_parameters is None: command_line_parameters = sys.argv[1:] # write configuration diff --git a/bob/bio/base/tools/extractor.py b/bob/bio/base/tools/extractor.py index cf30238c3f71eebb29c9a9acd72a70405ea39d1b..e608da2e9f59fa06de0c31ad3d4ccc58560a308d 100644 --- a/bob/bio/base/tools/extractor.py +++ b/bob/bio/base/tools/extractor.py @@ -9,29 +9,75 @@ from .preprocessor import read_preprocessed_data from .. import utils def train_extractor(extractor, preprocessor, force = False): - """Trains the feature extractor using preprocessed data of the 'world' set, if the feature extractor requires training.""" - if extractor.requires_training: - # the file selector object - fs = FileSelector.instance() - # the file to write - if utils.check_file(fs.extractor_file, force, 1000): - logger.info("- Extraction: extractor '%s' already exists.", fs.extractor_file) + """Trains the feature extractor using preprocessed data of the ``'world'`` group, if the feature extractor requires training. + + This function should only be called, when the ``extractor`` actually requires training. + The given ``extractor`` is trained using preprocessed data. + It writes the extractor to the file specified by the :py:class:`bob.bio.base.tools.FileSelector`. + By default, if the target file already exist, it is not re-created. + + **Parameters:** + + extractor : py:class:`bob.bio.base.extractor.Extractor` or derived + The extractor to be trained. + + preprocessor : py:class:`bob.bio.base.preprocessor.Preprocessor` or derived + The preprocessor, used for reading the preprocessed data. + + force : bool + If given, the extractor file is regenerated, even if it already exists. + """ + + if not extractor.requires_training: + logger.warn("The train_extractor function should not have been called, since the extractor does not need training.") + return + + # the file selector object + fs = FileSelector.instance() + # the file to write + if utils.check_file(fs.extractor_file, force, 1000): + logger.info("- Extraction: extractor '%s' already exists.", fs.extractor_file) + else: + # read training files + train_files = fs.training_list('preprocessed', 'train_extractor', arrange_by_client = extractor.split_training_data_by_client) + train_data = read_preprocessed_data(train_files, preprocessor, extractor.split_training_data_by_client) + if extractor.split_training_data_by_client: + logger.info("- Extraction: training extractor '%s' using %d identities:", fs.extractor_file, len(train_files)) else: - # read training files - train_files = fs.training_list('preprocessed', 'train_extractor', arrange_by_client = extractor.split_training_data_by_client) - train_data = read_preprocessed_data(train_files, preprocessor, extractor.split_training_data_by_client) - if extractor.split_training_data_by_client: - logger.info("- Extraction: training extractor '%s' using %d identities:", fs.extractor_file, len(train_files)) - else: - logger.info("- Extraction: training extractor '%s' using %d training files:", fs.extractor_file, len(train_files)) - # train model - bob.io.base.create_directories_safe(os.path.dirname(fs.extractor_file)) - extractor.train(train_data, fs.extractor_file) + logger.info("- Extraction: training extractor '%s' using %d training files:", fs.extractor_file, len(train_files)) + # train model + bob.io.base.create_directories_safe(os.path.dirname(fs.extractor_file)) + extractor.train(train_data, fs.extractor_file) + + + +def extract(extractor, preprocessor, groups=None, indices = None, force = False): + """Extracts features from the preprocessed data using the given extractor. + The given ``extractor`` is used to extract all features required for the current experiment. + It writes the extracted data into the directory specified by the :py:class:`bob.bio.base.tools.FileSelector`. + By default, if target files already exist, they are not re-created. + The preprocessor is only used to load the data in a coherent way. -def extract(extractor, preprocessor, groups=None, indices = None, force=False): - """Extracts the features from the preprocessed data using the given extractor.""" + **Parameters:** + + extractor : py:class:`bob.bio.base.extractor.Extractor` or derived + The extractor, used for extracting and writing the features. + + preprocessor : py:class:`bob.bio.base.preprocessor.Preprocessor` or derived + The preprocessor, used for reading the preprocessed data. + + groups : some of ``('world', 'dev', 'eval')`` or ``None`` + The list of groups, for which the data should be extracted. + + indices : (int, int) or None + If specified, only the features for the given index range ``range(begin, end)`` should be extracted. + This is usually given, when parallel threads are executed. + + force : bool + If given, files are regenerated, even if they already exist. + """ # the file selector object fs = FileSelector.instance() extractor.load(fs.extractor_file) @@ -59,9 +105,28 @@ def extract(extractor, preprocessor, groups=None, indices = None, force=False): bob.io.base.create_directories_safe(os.path.dirname(feature_file)) extractor.write_feature(feature, feature_file) -def read_features(file_names, extractor, split_by_client=False): - """Reads the features from ``file_names`` using the given ``extractor``. +def read_features(file_names, extractor, split_by_client = False): + """read_features(file_names, extractor, split_by_client = False) -> extracted + + Reads the extracted features from ``file_names`` using the given ``extractor``. If ``split_by_client`` is set to ``True``, it is assumed that the ``file_names`` are already sorted by client. + + **Parameters:** + + file_names : [str] or [[str]] + A list of names of files to be read. + If ``split_by_client = True``, file names are supposed to be split into groups. + + extractor : py:class:`bob.bio.base.extractor.Extractor` or derived + The extractor, used for reading the extracted features. + + split_by_client : bool + Indicates if the given ``file_names`` are split into groups. + + **Returns:** + + extracted : [object] or [[object]] + The list of extracted features, in the same order as in the ``file_names``. """ if split_by_client: return [[extractor.read_feature(str(f)) for f in client_files] for client_files in file_names] diff --git a/bob/bio/base/tools/preprocessor.py b/bob/bio/base/tools/preprocessor.py index 3652b1d53eaca119fb6462e29b583c814271cbcc..dde752f39a99f55249b0df3be20c6a2dff1a231a 100644 --- a/bob/bio/base/tools/preprocessor.py +++ b/bob/bio/base/tools/preprocessor.py @@ -7,8 +7,28 @@ logger = logging.getLogger("bob.bio.base") from .FileSelector import FileSelector from .. import utils -def preprocess(preprocessor, groups=None, indices=None, force=False): - """Preprocesses the original data of the database with the given preprocessor.""" +def preprocess(preprocessor, groups = None, indices = None, force = False): + """Preprocesses the original data of the database with the given preprocessor. + + The given ``preprocessor`` is used to preprocess all data required for the current experiment. + It writes the preprocessed data into the directory specified by the :py:class:`bob.bio.base.tools.FileSelector`. + By default, if target files already exist, they are not re-created. + + **Parameters:** + + preprocessor : py:class:`bob.bio.base.preprocessor.Preprocessor` or derived + The preprocessor, which should be applied to all data. + + groups : some of ``('world', 'dev', 'eval')`` or ``None`` + The list of groups, for which the data should be preprocessed. + + indices : (int, int) or None + If specified, only the data for the given index range ``range(begin, end)`` should be preprocessed. + This is usually given, when parallel threads are executed. + + force : bool + If given, files are regenerated, even if they already exist. + """ # the file selector object fs = FileSelector.instance() @@ -50,9 +70,28 @@ def preprocess(preprocessor, groups=None, indices=None, force=False): preprocessor.write_data(preprocessed_data, preprocessed_data_file) -def read_preprocessed_data(file_names, preprocessor, split_by_client=False): - """Reads the preprocessed data from ``file_names`` using the given preprocessor. +def read_preprocessed_data(file_names, preprocessor, split_by_client = False): + """read_preprocessed_data(file_names, preprocessor, split_by_client = False) -> preprocessed + + Reads the preprocessed data from ``file_names`` using the given preprocessor. If ``split_by_client`` is set to ``True``, it is assumed that the ``file_names`` are already sorted by client. + + **Parameters:** + + file_names : [str] or [[str]] + A list of names of files to be read. + If ``split_by_client = True``, file names are supposed to be split into groups. + + preprocessor : py:class:`bob.bio.base.preprocessor.Preprocessor` or derived + The preprocessor, which can read the preprocessed data. + + split_by_client : bool + Indicates if the given ``file_names`` are split into groups. + + **Returns:** + + preprocessed : [object] or [[object]] + The list of preprocessed data, in the same order as in the ``file_names``. """ if split_by_client: return [[preprocessor.read_data(str(f)) for f in client_files] for client_files in file_names] diff --git a/bob/bio/base/tools/scoring.py b/bob/bio/base/tools/scoring.py index e2c13d7f4088f0028d96bf379b008c02553933ea..a30266092037fcd68963f6d49585cf880a431507 100644 --- a/bob/bio/base/tools/scoring.py +++ b/bob/bio/base/tools/scoring.py @@ -3,7 +3,8 @@ import bob.learn.em import bob.learn.linear import bob.measure import numpy -import os +import os, sys +import tarfile import logging logger = logging.getLogger("bob.bio.base") @@ -14,8 +15,6 @@ from .. import utils def _scores(algorithm, model, probes): """Compute scores for the given model and a list of probes. - If ``preloaded`` is set to ``True``, the ``probes`` are expected to be features, - otherwise ``probes`` are considred to be probe file names. """ # the file selector object fs = FileSelector.instance() @@ -40,7 +39,7 @@ def _scores(algorithm, model, probes): def _open_to_read(score_file): - """check for the existence of the normal and the compressed version of the file, and calls bob.measure.load.open_file for the existing one.""" + """Checks for the existence of the normal and the compressed version of the file, and calls :py:func:`bob.measure.load.open_file` for the existing one.""" if not os.path.exists(score_file): score_file += '.tar.bz2' if not os.path.exists(score_file): @@ -51,6 +50,7 @@ def _open_to_read(score_file): def _open_to_write(score_file, write_compressed): + """Opens the given score file for writing. If write_compressed is set to ``True``, a file-like structure is returned.""" bob.io.base.create_directories_safe(os.path.dirname(score_file)) if write_compressed: if sys.version_info[0] <= 2: @@ -66,11 +66,12 @@ def _open_to_write(score_file, write_compressed): return f def _close_written(score_file, f, write_compressed): + """Closes the file f that was opened with :py:func:`_open_to_read`""" if write_compressed: f.seek(0) tarinfo = tarfile.TarInfo(os.path.basename(score_file)) tarinfo.size = len(f.buf if sys.version_info[0] <= 2 else f.getbuffer()) - tar = tarfile.open(result_file, 'w') + tar = tarfile.open(score_file, 'w') tar.addfile(tarinfo, f) tar.close() # close the file @@ -78,7 +79,7 @@ def _close_written(score_file, f, write_compressed): def _save_scores(score_file, scores, probe_objects, client_id, write_compressed=False): - """Saves the scores into a text file.""" + """Saves the scores of one model into a text file that can be interpreted by :py:func:`bob.measure.load.split_four_column`.""" assert len(probe_objects) == scores.shape[1] # open file for writing @@ -92,7 +93,7 @@ def _save_scores(score_file, scores, probe_objects, client_id, write_compressed= def _scores_a(algorithm, model_ids, group, compute_zt_norm, force, write_compressed=False): - """Computes A scores. For non-ZT-norm, these are the only scores that are actually computed.""" + """Computes A scores for the models with the given model_ids. If ``compute_zt_norm = False``, these are the only scores that are actually computed.""" # the file selector object fs = FileSelector.instance() @@ -125,7 +126,7 @@ def _scores_a(algorithm, model_ids, group, compute_zt_norm, force, write_compres def _scores_b(algorithm, model_ids, group, force): - """Computes B scores.""" + """Computes B scores for the given model ids.""" # the file selector object fs = FileSelector.instance() @@ -147,7 +148,7 @@ def _scores_b(algorithm, model_ids, group, force): bob.io.base.save(b, score_file, True) def _scores_c(algorithm, t_model_ids, group, force): - """Computes C scores.""" + """Computes C scores for the given t-norm model ids.""" # the file selector object fs = FileSelector.instance() @@ -169,7 +170,7 @@ def _scores_c(algorithm, t_model_ids, group, force): bob.io.base.save(c, score_file, True) def _scores_d(algorithm, t_model_ids, group, force): - """Computes D scores.""" + """Computes D scores for the given t-norm model ids. Both the D matrix and the D-samevalue matrix are written.""" # the file selector object fs = FileSelector.instance() @@ -199,8 +200,40 @@ def _scores_d(algorithm, t_model_ids, group, force): bob.io.base.save(d_same_value_tm, same_score_file, True) -def compute_scores(algorithm, compute_zt_norm, force = False, indices = None, groups = ['dev', 'eval'], types = ['A', 'B', 'C', 'D']): - """Computes the scores for the given groups (by default 'dev' and 'eval').""" +def compute_scores(algorithm, compute_zt_norm, force = False, indices = None, groups = ['dev', 'eval'], types = ['A', 'B', 'C', 'D'], write_compressed = False): + """Computes the scores for the given groups. + + This function computes all scores for the experiment, and writes them to files, one per model. + When ``compute_zt_norm`` is enabled, scores are computed for all four matrices, i.e. A: normal scores; B: Z-norm scores; C: T-norm scores; D: ZT-norm scores and ZT-samevalue scores. + By default, scores are computed for both groups ``'dev'`` and ``'eval'``. + + **Parameters:** + + algorithm : py:class:`bob.bio.base.algorithm.Algorithm` or derived + The algorithm, used for enrolling model and writing them to file. + + force : bool + If given, files are regenerated, even if they already exist. + + compute_zt_norm : bool + If set to ``True``, also ZT-norm scores are computed. + + indices : (int, int) or None + If specified, scores are computed only for the models in the given index range ``range(begin, end)``. + This is usually given, when parallel threads are executed. + + .. note:: The probe files are not limited by the ``indices``. + + groups : some of ``('dev', 'eval')`` + The list of groups, for which scores should be computed. + + types : some of ``['A', 'B', 'C', 'D']`` + A list of score types to be computed. + If ``compute_zt_norm = False``, only the ``'A'`` scores are computed. + + write_compressed : bool + If enabled, score files are compressed as ``.tar.bz2`` files. + """ # the file selector object fs = FileSelector.instance() @@ -221,7 +254,7 @@ def compute_scores(algorithm, compute_zt_norm, force = False, indices = None, gr # compute A scores if 'A' in types: - _scores_a(algorithm, model_ids, group, compute_zt_norm, force) + _scores_a(algorithm, model_ids, group, compute_zt_norm, force, write_compressed) if compute_zt_norm: # compute B scores @@ -300,7 +333,19 @@ def _scores_d_normalize(t_model_ids, group): def zt_norm(groups = ['dev', 'eval'], write_compressed=False): - """Computes ZT-Norm using the previously generated A, B, C, and D files""" + """Computes ZT-Norm using the previously generated A, B, C, D and D-samevalue matrix files. + + This function computes the ZT-norm scores for all model ids for all desired groups and writes them into files defined by the :py:class:`bob.bio.base.tools.FileSelector`. + It loads the A, B, C, D and D-samevalue matrix files that need to be computed beforehand. + + **Parameters:** + + groups : some of ``('dev', 'eval')`` + The list of groups, for which ZT-norm should be applied. + + write_compressed : bool + If enabled, score files are compressed as ``.tar.bz2`` files. + """ # the file selector object fs = FileSelector.instance() @@ -338,7 +383,7 @@ def zt_norm(groups = ['dev', 'eval'], write_compressed=False): def _concat(score_files, output, write_compressed): - + """Concatenates a list of score files into a single score file.""" f = _open_to_write(output, write_compressed) # Concatenates the scores @@ -351,7 +396,22 @@ def _concat(score_files, output, write_compressed): def concatenate(compute_zt_norm, groups = ['dev', 'eval'], write_compressed=False): - """Concatenates all results into one (or two) score files per group.""" + """Concatenates all results into one (or two) score files per group. + + Score files, which were generated per model, are concatenated into a single score file, which can be interpreter by :py:func:`bob.measure.load.split_four_column`. + The score files are always re-computed, regardless if they exist or not. + + **Parameters:** + + compute_zt_norm : bool + If set to ``True``, also score files for ZT-norm are concatenated. + + groups : some of ``('dev', 'eval')`` + The list of groups, for which score files should be concatenated. + + write_compressed : bool + If enabled, concatenated score files are compressed as ``.tar.bz2`` files. + """ # the file selector object fs = FileSelector.instance() for group in groups: @@ -369,11 +429,30 @@ def concatenate(compute_zt_norm, groups = ['dev', 'eval'], write_compressed=Fals logger.info("- Scoring: wrote score file '%s'", result_file) -def calibrate(norms = ['nonorm', 'ztnorm'], groups = ['dev', 'eval'], prior = 0.5, write_compressed=False): - """Calibrates the score files by learning a linear calibration from the dev files (first element of the groups) and executing the on all groups, separately for all given norms.""" +def calibrate(compute_zt_norm, groups = ['dev', 'eval'], prior = 0.5, write_compressed = False): + """Calibrates the score files by learning a linear calibration from the dev files (first element of the groups) and executing the on all groups. + + This function is intended to compute the calibration parameters on the scores of the development set using the :py:class:`bob.learn.linear.CGLogRegTrainer`. + Afterward, both the scores of the development and evaluation sets are calibrated and written to file. + For ZT-norm scores, the calibration is performed independently, if enabled. + The names of the calibrated score files that should be written are obtained from the :py:class:`bob.bio.base.tools.FileSelector`. + + **Parameters:** + + compute_zt_norm : bool + If set to ``True``, also score files for ZT-norm are calibrated. + + groups : some of ``('dev', 'eval')`` + The list of groups, for which score files should be calibrated. + The first of the given groups is used to train the logistic regression parameters, while the calibration is performed for all given groups. + + write_compressed : bool + If enabled, calibrated score files are compressed as ``.tar.bz2`` files. + """ # the file selector object fs = FileSelector.instance() # read score files of the first group (assuming that the first group is 'dev') + norms = ['nonorm', 'ztnorm'] if compute_zt_norm else ["nonorm"] for norm in norms: training_score_file = fs.no_norm_result_file(groups[0]) if norm == 'nonorm' else fs.zt_norm_result_file(groups[0]) if norm == 'ztnorm' else None diff --git a/bob/bio/base/utils/resources.py b/bob/bio/base/utils/resources.py index 29156ed113709eeb9644ac77fc257e384070f13b..46f8039021b9fdb72a3000ff579c0a76aec46127 100644 --- a/bob/bio/base/utils/resources.py +++ b/bob/bio/base/utils/resources.py @@ -32,13 +32,31 @@ import logging logger = logging.getLogger("bob.bio.base") +#: Keywords for which resources are defined. valid_keywords = ('database', 'preprocessor', 'extractor', 'algorithm', 'grid') - def read_config_file(filename, keyword = None): - """Use this function to read the given configuration file. + """read_config_file(filename, keyword = None) -> config + + Use this function to read the given configuration file. If a keyword is specified, only the configuration according to this keyword is returned. - Otherwise a dictionary of the configurations read from the configuration file is returned.""" + Otherwise a dictionary of the configurations read from the configuration file is returned. + + **Parameters:** + + filename : str + The name of the configuration file to read. + + keyword : str or ``None`` + If specified, only the contents of the variable with the given name is returned. + If ``None``, the whole configuration is returned (a local namespace) + + **Returns:** + + config : object or namespace + If ``keyword`` is specified, the object inside the configuration with the given name is returned. + Otherwise, the whole configuration is returned (as a local namespace). + """ if not os.path.exists(filename): raise IOError("The given configuration file '%s' could not be found" % file) @@ -62,15 +80,35 @@ def _get_entry_points(keyword, strip = []): return [entry_point for entry_point in pkg_resources.iter_entry_points('bob.bio.' + keyword) if not entry_point.name.startswith(tuple(strip))] -def load_resource(resource, keyword, imports = ['bob.bio.base'], preferred_distribution = None): - """Loads the given resource that is registered with the given keyword. +def load_resource(resource, keyword, imports = ['bob.bio.base'], preferred_extension = None): + """load_resource(resource, keyword, imports = ['bob.bio.base'], preferred_extension = None) -> resource + + Loads the given resource that is registered with the given keyword. The resource can be: - * a resource as defined in the setup.py - * a configuration file - * a string defining the construction of an object. If imports are required for the construction of this object, they can be given as list of strings. + 1. a resource as defined in the setup.py + 2. a configuration file + 3. a string defining the construction of an object. If imports are required for the construction of this object, they can be given as list of strings. + + **Parameters:** + + resource : str + Any string interpretable as a resource (see above). + + keyword : str + A valid resource keyword, can be one of :py:attr:`valid_keywords`. + + imports : [str] + A list of strings defining which modules to import, when constructing new objects (option 3). + + preferred_extension : str or ``None`` + When several resources with the same name are found in different extension (in different ``bob.bio`` packages), this specifies the preferred extension to load the resource from. + If not specified, the extension that is **not** ``bob.bio.base`` is selected. + + **Returns:** - In any case, the resulting resource object is returned. + resource : object + The resulting resource object is returned, either read from file or resource, or created newly. """ # first, look if the resource is a file name @@ -91,9 +129,9 @@ def load_resource(resource, keyword, imports = ['bob.bio.base'], preferred_distr # Now: check if there are only two entry points, and one is from the bob.bio.base, then use the other one index = -1 - if preferred_distribution: + if preferred_extension: for i,p in enumerate(entry_points): - if p.dist.project_name == preferred_distribution: index = i + if p.dist.project_name == preferred_extension: index = i if index == -1: if len(entry_points) == 2: @@ -121,41 +159,17 @@ def load_resource(resource, keyword, imports = ['bob.bio.base'], preferred_distr raise ImportError("The given command line option '%s' is neither a resource for a '%s', nor an existing configuration file, nor could be interpreted as a command (error: %s)"%(resource, keyword, str(e))) -def read_file_resource(resource, keyword): - """Treats the given resource as a file and reads its configuration""" - # first, look if the resource is a file name - if os.path.isfile(resource): - # load it without the keyword -> all entries of the resource file are read - return read_config_file(resource) - - if keyword not in valid_keywords: - raise ValueError("The given keyword '%s' is not valid. Please use one of %s!" % (str(keyword), str(valid_keywords))) - - entry_points = [entry_point for entry_point in _get_entry_points(keyword) if entry_point.name == resource] - - if not len(entry_points): - raise ImportError("The given option '%s' is neither a resource, nor an existing configuration file for resource type '%s'"%(resource, keyword)) - - if len(entry_points) == 1: - return entry_points[0].load() - else: - # TODO: extract current package name and use this one, if possible +def extensions(keywords=valid_keywords): + """extensions(keywords=valid_keywords) -> extensions - # Now: check if there are only two entry points, and one is from the bob.bio.base, then use the other one - index = -1 - if len(entry_points) == 2: - if entry_points[0].dist.project_name == 'bob.bio.base': index = 1 - elif entry_points[1].dist.project_name == 'bob.bio.base': index = 0 + Returns a list of packages that define extensions using the given keywords. - if index != -1: - logger.info("RESOURCES: Using the resource '%s' from '%s', and ignoring the one from '%s'" %(resource, entry_points[index].module_name, entry_points[1-index].module_name)) - return entry_points[index].load() - else: - raise ImportError("Under the desired name '%s', there are multiple entry points defined: %s" %(resource, [entry_point.module_name for entry_point in entry_points])) + **Parameters:** - -def extensions(keywords=valid_keywords): - """Returns a list of packages that define extensions using the given keywords, which default to all keywords.""" + keywords : [str] + A list of keywords to load entry points for. + Defaults to all :py:attr:`valid_keywords`. + """ entry_points = [entry_point for keyword in keywords for entry_point in _get_entry_points(keyword)] return sorted(list(set(entry_point.dist.project_name for entry_point in entry_points))) diff --git a/bob/bio/base/utils/singleton.py b/bob/bio/base/utils/singleton.py index e1bcbaef38e50d62673a32ca118b588eb3b6b598..cfcce7a56d6aef39562676de77c113cef5a6eb06 100644 --- a/bob/bio/base/utils/singleton.py +++ b/bob/bio/base/utils/singleton.py @@ -9,11 +9,19 @@ class Singleton: To get the singleton instance, use the :py:meth:`instance` method. Trying to use :py:meth:`__call__` will result in a :py:class:`TypeError` being raised. - Limitations: The decorated class cannot be inherited from. + Limitations: + + * The decorated class cannot be inherited from. + * The documentation of the decorated class is replaced with the documentation of this class. """ def __init__(self, decorated): self._decorated = decorated + import functools + for attr in functools.WRAPPER_ASSIGNMENTS: + setattr(self, attr, getattr(decorated, attr)) + self.__bases__ = [] + self._instance = None def create(self, *args, **kwargs): diff --git a/doc/conf.py b/doc/conf.py index f4e0cfd47cd1ed4f49b2c701663e0042f03646dc..b489afd0515fb34e0e24d1962055d3c8848d903c 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -246,7 +246,7 @@ autodoc_default_flags = ['members', 'inherited-members', 'show-inheritance'] # For inter-documentation mapping: from bob.extension.utils import link_documentation -intersphinx_mapping = link_documentation(['python', 'numpy', 'bob.io.base', 'bob.db.verification.utils', 'bob.db.verification.filelist', 'bob.bio.face', 'bob.bio.speaker', 'bob.bio.gmm', 'bob.bio.video', 'bob.bio.csu', 'gridtk']) +intersphinx_mapping = link_documentation(['python', 'numpy', 'bob.bio.face', 'bob.bio.speaker', 'bob.bio.gmm', 'bob.bio.video', 'bob.bio.csu', 'gridtk', 'bob.db.youtube']) def skip(app, what, name, obj, skip, options): diff --git a/doc/experiments.rst b/doc/experiments.rst index aa4bf47c125c50af76404b6be1e3d047ee58a842..ba73ca30e5864f54b50a5496e466ce9367700613 100644 --- a/doc/experiments.rst +++ b/doc/experiments.rst @@ -44,8 +44,10 @@ In these cases, the according steps are skipped. ``bob.bio`` takes care that always the correct files are forwarded to the subsequent steps. -Running Experiments -------------------- +.. _running_part_1: + +Running Experiments (part I) +---------------------------- To run an experiment, we provide a generic script ``./bin/verify.py``, which is highly parametrizable. To get a complete list of command line options, please run: @@ -72,7 +74,7 @@ These five options are: The first four parameters, i.e., the ``database``, the ``preprocessor``, the ``extractor`` and the ``algorithm`` can be specified in several different ways. For the start, we will use only the registered :ref:`Resources <resources>`. -These resources define the source code that will be used to compute the experiments, as well as all the meta-parameters of the algorithms (which we will call the **configuration**). +These resources define the source code that will be used to compute the experiments, as well as all the meta-parameters of the algorithms (which we will call the *configuration*). To get a list of registered resources, please call: .. code-block:: sh @@ -84,7 +86,9 @@ If only ``bob.bio.base`` is installed, no databases and no preprocessors will be .. note:: You will also find some ``grid`` resources being listed. - These type of resources will be explained :ref:`later <grid>`. + These type of resources will be explained :ref:`later <running_in_parallel>`. + +Before going into :ref:`more details about the configurations <running_part_2>`, we will provide information about running default experiments. One command line option, which is not required, but recommended, is the ``--verbose`` option. By default, the algorithms are set up to execute quietly, and only errors are reported. @@ -114,6 +118,7 @@ By default, you can find them in a sub-directory the ``result`` directory, but y At Idiap_, the default result directory differs, see ``./bin/verify.py --help`` for your directory. + Evaluating Experiments ---------------------- @@ -133,6 +138,8 @@ This file is a pure text file and contains the complete configuration of the exp With this configuration it is possible to inspect all default parameters of the algorithms, and even to re-run the exact same experiment. +.. _running_in_parallel: + Running in Parallel ------------------- diff --git a/doc/implementation.rst b/doc/implementation.rst index 972c9a72d7b1163f35d5d322407f753e40b84810..3ccb7d349d0b7aea2913701cc161f33896a57cc0 100644 --- a/doc/implementation.rst +++ b/doc/implementation.rst @@ -30,6 +30,8 @@ This will assure that all parameters of the experiments are stored into the ``Ex If you plan to write your own tools, please assure that you are following the following structure. +.. _preprocessors: + Preprocessors ~~~~~~~~~~~~~ @@ -57,6 +59,8 @@ When a different IO for the original data is required (for example to read video * ``read_original_data(filename)``: Reads the original data from file. +.. _extractors: + Extractors ~~~~~~~~~~ @@ -93,6 +97,8 @@ Second, this behavior is registered in the ``__init__`` function by calling the Given that the training algorithm needs to have the training data split by identity, the ``bob.bio.base.extractor.Extractor.__init__(self, requires_training=True, split_training_images_by_client = True, ...)`` is used instead. +.. _algorithms: + Algorithms ~~~~~~~~~~ The implementation of recognition algorithm is as straightforward. @@ -190,32 +196,22 @@ Furthermore, some of the databases split off some data from the training set, wh Finally, most of the databases come with specific annotation files, which define additional information about the data, e.g., hand-labeled eye locations for face images. -Generic Databases -~~~~~~~~~~~~~~~~~ - -All these different interfaces are concentrated into the :py:class:`bob.bio.base.database.Database` class. -This database provides a minimum common interface for all databases that can be used by ``bob.bio``. - -.. todo:: - Provide more details about the Database. - -If the database provides an interface for ZT score normalization, the :py:class:`bob.bio.base.database.DatabaseZT` is used, which is derived from :py:class:`bob.bio.base.database.Database`. - -.. todo:: - Provide more details about the DatabaseZT. - - Verification Database Interface ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ For most of the data sets, we rely on the database interfaces from Bob_. -Particularly, all databases that are derived from the :py:class:`bob.db.verification.utils.Database` (click :ref:`here <verification_databases>` for a list of implemented databases) are supported by -a special derivation of the databases from above. -These databases +Particularly, all databases that are derived from the :py:class:`bob.db.verification.utils.Database` (click :ref:`here <verification_databases>` for a list of implemented databases) are supported by a special derivation of the databases from above. +For these databases, the special :py:class:`bob.bio.base.database.DatabaseBob` interface is provided, which takes the Bob_ database as parameter. +Several such databases are defined in the according packages, i.e., :ref:`bob.bio.spear <bob.bio.spear>`, :ref:`bob.bio.face <bob.bio.face>` and :ref:`bob.bio.video <bob.bio.video>`. +For Bob_'s ZT-norm databases, we provide the :py:class:`bob.bio.base.database.DatabaseBobZT` interface. +Additionally, a generic database interface, which is derived from :py:class:`bob.bio.base.database.DatabaseBobZT`, is the :py:class:`bob.bio.base.database.DatabaseFileList`. +This database interfaces with the :py:class:`bob.db.verification.filelist.Database`, which is a generic database based on file lists, implementing the :py:class:`bob.db.verification.utils.Database` interface. + +Defining your own Database +~~~~~~~~~~~~~~~~~~~~~~~~~~ If you have your own database that you want to execute the recognition experiments on, you should first check if you could use the :ref:`Verifcation FileList Database <bob.db.verification.filelist>` interface by defining appropriate file lists for the training set, the model set, and the probes. -If you can do this, just write your own configuration file that uses the :py:class:`facereclib.databases.DatabaseFileList` interface (see :ref:`databases` for details). In most of the cases, the :py:class:`bob.db.verification.filelist.Database` should be sufficient to run experiments. Please refer to the documentation :ref:`Documentation <bob.db.verification.filelist>` of this database for more instructions on how to configure this database. @@ -223,67 +219,92 @@ In case you want to have a more complicated interface to your database, you are In this case, you have to derive your class from the :py:class:`facereclib.databases.Database`, and provide the following functions: * ``__init__(self, <your-parameters>, **kwargs)``: Constructor of your database interface. - Please call the base class constructor, providing all the required parameters (see :ref:`databases`), e.g. by ``facereclib.databases.Database.__init__(self, **kwargs)``. -* ``all_files(self)``: Returns a list of all :py:class:`facereclib.databases.File` objects of the database. + Please call the base class constructor, providing all the required parameters, e.g. by ``bob.bio.base.database.Database.__init__(self, **kwargs)``. +* ``all_files(self)``: Returns a list of all :py:class:`bob.bio.base.database.File` objects of the database. The list needs to be sorted by the file id (you can use the ``self.sort(files)`` function for sorting). -* ``training_files(self, step, arrange_by_client = False)``: A sorted list of the :py:class:`facereclib.databases.File` objects that is used for training. - If ``arrange_by_clients`` is enabled, you might want to use the ``self.arrange_by_client(files)`` function to perform the job. +* ``training_files(self, step, arrange_by_client = False)``: A sorted list of the :py:class:`bob.bio.base.database.File` objects that is used for training. + If ``arrange_by_clients`` is enabled, you might want to use the :py:meth:`bob.bio.base.database.Database.arrange_by_client` function to perform the job. * ``model_ids(self, group = 'dev'): The ids for the models (usually, there is only one model per client and, thus, you can simply use the client ids) for the given group. Usually, providing ids for the group ``'dev'`` should be sufficient. * ``client_id_from_model_id(self, model_id)``: Returns the client id for the given model id. -* ``enroll_files(self, model_id, group='dev')``: Returns the list of model :py:class:`facereclib.databases.File` objects for the given model id. +* ``enroll_files(self, model_id, group='dev')``: Returns the list of model :py:class:`bob.bio.base.database.File` objects for the given model id. * ``probe_files(self, model_id=None, group='dev')``: Returns the list of probe files, the given model_id should be compared with. Usually, all probe files are compared with all model files. In this case, you can just ignore the ``model_id``. If the ``model_id`` is ``None``, this function is supposed to return *all* probe files for all models of the given group. Additionally, you can define more lists that can be used for ZT score normalization. -In this case, derive you class from :py:class:`facereclib.databases.DatabaseZT` instead, and additionally overwrite the following functions: +In this case, derive you class from :py:class:`bob.bio.base.database.DatabaseZT` instead, and additionally overwrite the following functions: * ``t_model_ids(self, group = 'dev')``: The ids for the T-Norm models for the given group. -* ``t_enroll_files(self, model_id, group='dev')``: Returns the list of model :py:class:`facereclib.databases.File` objects for the given T-Norm model id. -* ``z_probe_files(self, group='dev')``: Returns the list of Z-probe :py:class:`facereclib.databases.File` objects, with which all the models and T-Norm models are compared. +* ``t_enroll_files(self, model_id, group='dev')``: Returns the list of model :py:class:`bob.bio.base.database.File` objects for the given T-Norm model id. +* ``z_probe_files(self, group='dev')``: Returns the list of Z-probe :py:class:`bob.bio.base.database.File` objects, with which all the models and T-Norm models are compared. .. note: - For a proper face recognition protocol, the identities from the models and the T-Norm models, as well as the Z-probes should be different. + For a proper biometric recognition protocol, the identities from the models and the T-Norm models, as well as the Z-probes should be different. For some protocols, a single probe consists of several features, see :ref:`algorithms` about strategies how to incorporate several probe files into one score. If your database should provide this functionality, please overwrite: * ``uses_probe_file_sets(self)``: Return ``True`` if the current protocol of the database provides multiple files for one probe. -* ``probe_file_sets(self, model_id=None, group='dev')``: Returns a list of lists of :py:class:`facereclib.databases.FileSet` objects. -* ``z_probe_file_sets(self, model_id=None, group='dev')``: Returns a list of lists of Z-probe :py:class:`facereclib.databases.FileSet` objects (only needed if the base class is :py:class:`facereclib.databases.DatabaseZT`). +* ``probe_file_sets(self, model_id=None, group='dev')``: Returns a list of lists of :py:class:`bob.bio.base.database.FileSet` objects. +* ``z_probe_file_sets(self, model_id=None, group='dev')``: Returns a list of lists of Z-probe :py:class:`bob.bio.base.database.FileSet` objects (only needed if the base class is :py:class:`bob.bio.base.database.DatabaseZT`). .. _configuration-files: -Adding Configuration Files --------------------------- -After your code is tested, you should provide a configuration file for your algorithm. -A configuration file basically consists of a constructor call to your new class with a useful (yet not necessarily optimized) set of parameters. -Depending on your type of contribution, you should write a line like: +Configuration Files +------------------- + +One important aspect of the ``bob.bio`` packages is reproducibility. +To be able to reproduce an experiment, it is required that all parameters of all tools are present. -* ``database = facereclib.databases.<YourDatabase>(<YourParameters>)`` -* ``preprocessor = facereclib.preprocessing.<YourPreprocessor>(<YourParameters>)`` -* ``feature_extractor = facereclib.features.<YourExtractor>(<YourParameters>)`` -* ``tool = facereclib.tools.<YourAlgorithm>(<YourParameters>)`` +In ``bob.bio`` this is achieved by providing these parameters in configuration files. +In these files, an *instance* of one of the tools is generated, and assigned to a variable with a specific name. +These variable names are: + +* ``database`` for an instance of a (derivation of a) :py:class:`bob.bio.base.database.Database` +* ``preprocessor`` for an instance of a (derivation of a) :py:class:`bob.bio.base.preprocessor.Preprocessor` +* ``extractor`` for an instance of a (derivation of a) :py:class:`bob.bio.base.extractor.Extractor` +* ``algorithm`` for an instance of a (derivation of a) :py:class:`bob.bio.base.algorithm.Algorithm` +* ``grid`` for an instance of the :py:class:`bob.bio.base.grid.Grid` + +For example, the configuration file for a PCA algorithm, which uses 80% of variance and a cosine distance function, could look somewhat like: + +.. code-block:: py + + import bob.bio.base + import scipy.spatial + + algorithm = bob.bio.base.algorithm.PCA(subspace_dimension = 0.8, distance_function = scipy.spatial.distance.cosine, is_distance_function = True) + +Some default configuration files can be found in the ``bob/bio/*/config`` directories of all ``bob.bio`` packages, but you can create configuration files in any directory you like. +In fact, since all tools have a different keyword, you can define a complete experiment in a single configuration file. + + +.. _resources: + +Resources +--------- -and save the configuration file into the according sub-directory of `facereclib/configurations <file:../facereclib/configurations>`_. +Finally, some of the configuration files, which sit in the ``bob/bio/*/config`` directories, are registered as *resources*. +This means that a resource is nothing else than a short name for a registered instance of one of the tools (database, preprocessor, extractor, algorithm or grid configuration) of ``bob.bio``, which has a pre-defined set of parameters. +The process of registering a resource is relatively easy. +We use the SetupTools_ mechanism of registering so-called entry points in the ``setup.py`` file of the according ``bob.bio`` package. +Particularly, we use a specific list of entry points, which are: -.. _register-resources: +* ``bob.bio.database`` to register an instance of a (derivation of a) :py:class:`bob.bio.base.database.Database` +* ``bob.bio.preprocessor`` to register an instance of a (derivation of a) :py:class:`bob.bio.base.preprocessor.Preprocessor` +* ``bob.bio.extractor`` to register an instance of a (derivation of a) :py:class:`bob.bio.base.extractor.Extractor` +* ``bob.bio.algorithm`` to register an instance of a (derivation of a) :py:class:`bob.bio.base.algorithm.Algorithm` +* ``bob.bio.grid`` to register an instance of the :py:class:`bob.bio.base.grid.Grid` -Registering your Code as a Resource ------------------------------------ -Now, you should be able to register this configuration file as a resource, so that you can use the configuration from above by a simple ``<shortcut>`` of your choice. -Please open the `setup.py <file:../setup.py>`_ file in the base directory of your satellite package and edit the ``entry_points`` section. -Depending on your type of algorithm, you have to add: +For each of the tools, several resources are defined, which you can list with the ``./bin/resources.py`` command line. -* ``'facereclib.database': [ '<your-database-shortcut> = <your-database-configuration>.database' ]`` -* ``'facereclib.preprocessor': [ '<your-preprocessor-shortcut> = <your-preprocessor-configuration>.preprocessor' ]`` -* ``'facereclib.feature_extractor': [ '<your-extractor-shortcut> = <your-extractor-configuration>.feature_extractor' ]`` -* ``'facereclib.tool': [ '<your-recognition-algorithm-shortcut> = <your-algorithm-configuration>.tool' ]`` +When you want to register your own resource, make sure that your configuration file is importable (usually it is sufficient to have an empty ``__init__.py`` file in the same directory as your configuration file). +Then, you can simply add a line inside the according ``entry_points`` section of the ``setup.py`` file (you might need to create that section, just follow the example of the ``setup.py`` file that you can find online in the base directory of our `bob.bio.base GitHub page <http://github.com/bioidiap/bob.bio.base>`__). After re-running ``./bin/buildout``, your new resource should be listed in the output of ``./bin/resources.py``. diff --git a/doc/implemented.rst b/doc/implemented.rst index 5290419c24e4538d98eb230c6bd3e1e965869a1b..3d3d1ae75ca5f9477c75da926d715fcc54c1a38f 100644 --- a/doc/implemented.rst +++ b/doc/implemented.rst @@ -1,14 +1,34 @@ - - - ================================= Tools implemented in bob.bio.base ================================= -Databases ---------- +Summary +------- + +Base Classes +~~~~~~~~~~~~ + +.. autosummary:: + bob.bio.base.preprocessor.Preprocessor + bob.bio.base.extractor.Extractor + bob.bio.base.algorithm.Algorithm + bob.bio.base.database.Database + bob.bio.base.database.DatabaseZT + bob.bio.base.grid.Grid + +Implementations +~~~~~~~~~~~~~~~ + +.. autosummary:: + bob.bio.base.extractor.Linearize + bob.bio.base.algorithm.PCA + bob.bio.base.algorithm.LDA + bob.bio.base.algorithm.PLDA + bob.bio.base.algorithm.BIC + bob.bio.base.database.DatabaseBob + bob.bio.base.database.DatabaseBobZT + bob.bio.base.database.DatabaseFileList -.. automodule:: bob.bio.base.database Preprocessors ------------- @@ -25,6 +45,10 @@ Algorithms .. automodule:: bob.bio.base.algorithm +Databases +--------- + +.. automodule:: bob.bio.base.database Grid Configuration ------------------ diff --git a/doc/index.rst b/doc/index.rst index 0619eb6f6cd173c516ee26a9824de1230038ab04..2a7d478bbbb01292d7f8380c98874572a2af3f9e 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -51,6 +51,7 @@ Users Guide installation experiments implementation + more ================ Reference Manual @@ -63,6 +64,15 @@ Reference Manual py_api +========== +References +========== + +.. [TP91] *M. Turk and A. Pentland*. **Eigenfaces for recognition**. Journal of Cognitive Neuroscience, 3(1):71-86, 1991. +.. [ZKC+98] *W. Zhao, A. Krishnaswamy, R. Chellappa, D. Swets and J. Weng*. **Discriminant analysis of principal components for face recognition**, pages 73-85. Springer Verlag Berlin, 1998. +.. [Pri07] *S. J. D. Prince*. **Probabilistic linear discriminant analysis for inferences about identity**. Proceedings of the International Conference on Computer Vision. 2007. +.. [ESM+13] *L. El Shafey, Chris McCool, Roy Wallace and Sébastien Marcel*. **A scalable formulation of probabilistic linear discriminant analysis: applied to face recognition**. IEEE Transactions on Pattern Analysis and Machine Intelligence, 35(7):1788-1794, 7/2013. + ToDo-List ========= diff --git a/doc/installation.rst b/doc/installation.rst index 61efa17663a193a0d32323536e8dd0d152d04827..6db1d9c73ca24296af540846659aa32b83a6e2d8 100644 --- a/doc/installation.rst +++ b/doc/installation.rst @@ -47,7 +47,7 @@ Databases With ``bob.bio`` you will run biometric recognition experiments using some default biometric recognition databases. Though the verification protocols are implemented in ``bob.bio``, the original data are **not included**. To download the original data of the databases, please refer to the according Web-pages. -Database URL's will be given in the :ref:`databases` section. +For a list of supported databases including their download URLs, please refer to the :ref:`verification_databases`. After downloading the original data for the databases, you will need to tell ``bob.bio``, where these databases can be found. For this purpose, we have decided to implement a special file, where you can set your directories. @@ -65,7 +65,7 @@ Please use ``./bin/databases.py`` for a list of known databases, where you can s .. note:: - If you have installed only ``bob.bio.base``, there is no database listed -- as all databases are included in other packages, such as :ref:`bob.bio.face <bob.bio.face>` or :ref:`bob.bio.speaker <bob.bio.speaker>`. + If you have installed only ``bob.bio.base``, there is no database listed -- as all databases are included in other packages, such as :ref:`bob.bio.face <bob.bio.face>` or :ref:`bob.bio.spear <bob.bio.spear>`. Test your Installation diff --git a/doc/links.rst b/doc/links.rst index 06bc90291b8fb543d13bef4996622877cb937f7f..2a20a1bc4ff44ad46e0ac1d4cf700fd350d8df8a 100644 --- a/doc/links.rst +++ b/doc/links.rst @@ -13,6 +13,7 @@ .. _bob's github page: http://idiap.github.com/bob .. _gridtk: http://github.com/idiap/gridtk .. _buildout: http://www.buildout.org +.. _setuptools: http://trac.edgewall.org/wiki/setuptools .. _nist: http://www.nist.gov/itl/iad/ig/focs.cfm .. _pypi: http://pypi.python.org .. _sge: http://wiki.idiap.ch/linux/SunGridEngine diff --git a/doc/more.rst b/doc/more.rst new file mode 100644 index 0000000000000000000000000000000000000000..39e73f9f561a80f6d6233281149087701b7a479f --- /dev/null +++ b/doc/more.rst @@ -0,0 +1,187 @@ +.. vim: set fileencoding=utf-8 : +.. author: Manuel Günther <manuel.guenther@idiap.ch> +.. date: Thu Sep 20 11:58:57 CEST 2012 + +============================== +More about Running Experiments +============================== + +Now that we have learned the implementation details, we can have a closer look into how experiments can be parametrized. + +.. _running_part_2: + +Running Experiments (part II) +----------------------------- + +As mentioned before, running biometric recognition experiments can be achieved using the ``./bin/verify.py`` command line. +In section :ref:`running_part_1`, we have used registered resources to run an experiment. +However, the command line options of ``./bin/verify.py`` is more flexible, as you can have three different ways of defining tools: + +1. Choose a resource (see ``./bin/resources.py`` or ``./bin/verify.py --help`` for a list of registered resources): + + .. code-block:: sh + + $ ./bin/verify.py --algorithm pca + + +2. Use a configuration file. Make sure that your configuration file has the correct variable name: + + .. code-block:: sh + + $ ./bin/verify.py --algorithm bob/bio/base/config/algorithm/pca.py + + +3. Instantiate a class on the command line. Usually, quotes ``"..."`` are required, and the ``--imports`` need to be specified: + + .. code-block:: sh + + $ ./bin/verify.py --algorithm "bob.bio.base.algorithm.PCA(subspace_dimension = 30, distance_function = scipy.spatial.distance.euclidean, is_distance_function = True)" --imports bob.bio.base scipy.spatial + +All these three ways can be used for any of the five command line options: ``--database``, ``--preprocessor``, ``--extractor``, ``--algorithm`` and ``--grid``. +You can even mix these three types freely in a single command line. + + +.. _grid-search: + +Finding the Optimal Configuration +--------------------------------- + +Sometimes, configurations of tools (preprocessors, extractors or algorithms) are highly dependent on the database or even the employed protocol. +Additionally, configuration parameters depend on each other. +``bob.bio`` provides a relatively simple set up that allows to test different configurations in the same task, and find out the best set of configurations. +For this, the ``./bin/grid_search.py`` script can be employed. +This script executes a configurable series of experiments, which reuse data as far as possible. +Please check out ``./bin/grid_search.py --help`` for a list of command line options. + +The Configuration File +~~~~~~~~~~~~~~~~~~~~~~ +The most important parameter to the ``./bin/grid_search.py`` is the ``--configuration-file``. +In this configuration file it is specified, which parameters of which part of the algorithms will be tested. +An example for a configuration file can be found in the test scripts: ``bob/bio/base/test/dummy/grid_search.py``. +The configuration file is a common python file, which can contain certain variables: + +1. ``preprocessor =`` +2. ``extractor =`` +3. ``algorithm =`` +4. ``replace =`` +5. ``requirement =`` +6. ``imports =`` + +The variables from 1. to 3. usually contain instantiations for classes of :ref:`preprocessors`, :ref:`extractors` and :ref:`algorithms`, but also registered :ref:`resources` can be used. +For any of the parameters of the classes, a *placeholder* can be put. +By default, these place holders start with a # character, followed by a digit or character. +The variables 1. to 3. can also be overridden by the command line options ``--preprocessor``, ``--extractor`` and ``--algorithm`` of the ``./bin/grid_search.py`` script. + +The ``replace`` variable has to be set as a dictionary. +In it, you can define with which values your place holder key should be filled, and in which step of the tool chain execution this should happen. +The steps are ``'preprocessing'``, ``'extraction'``, ``'projection'``, ``'enrollment'`` and ``'scoring'``. +For each of the steps, it can be defined, which placeholder should be replaced by which values. +To be able to differentiate the results later on, each of the replacement values is bound to a directory name. +The final structure looks somewhat like that: + +.. code-block:: python + + replace = { + step1 : { + '#a' : { + 'Dir_a1' : 'Value_a1', + 'Dir_a2' : 'Value_a2' + }, + + '#b' : { + 'Dir_b1' : 'Value_b1', + 'Dir_b2' : 'Value_b2' + } + }, + + step2 : { + '#c' : { + 'Dir_c1' : 'Value_c1', + 'Dir_c2' : 'Value_c2' + } + } + } + + +Of course, more than two values can be selected. +In the above example, the results of the experiments will be placed into a directory structure as ``results/[...]/Dir_a1/Dir_b1/Dir_c1/[...]``. + +.. note:: + Please note that we are using a dictionary structure to define the replacements. + Hence, the order of the directories inside the same step might not be in the same order as written in the configuration file. + For the above example, a directory structure of `results/[...]/Dir_b1/Dir_a1/Dir_c1/[...]`` might be possible as well. + + +Additionally, tuples of place holders can be defined, in which case always the full tuple will be replaced in one shot. +Continuing the above example, it is possible to add: + +.. code-block:: python + + ... + step3 : { + ('#d','#e') : { + 'Dir_de1' : ('Value_d1', 'Value_e1'), + 'Dir_de2' : ('Value_d2', 'Value_e2') + } + } + +.. warning:: + *All possible combinations* of the configuration parameters are tested, which might result in a *huge number of executed experiments*. + +Some combinations of parameters might not make any sense. +In this case, a set of requirements on the parameters can be set, using the ``requirement`` variable. +In the requirements, any string including any placeholder can be put that can be evaluated using pythons ``eval`` function: + +.. code-block:: python + + requirement = ['#a > #b', '2*#c != #a', ...] + +Finally, when any of the classes or variables need to import a certain python module, it needs to be declared in the ``imports`` variable. +If you, e.g., test, which ``scipy.spatial`` distance function works best for your features, please add the imports (and don't forget the ``bob.bio.base`` and other ``bob.bio`` packages in case you use their tools): + +.. code-block:: python + + imports = ['scipy', 'bob.bio.base', 'bob.bio.face'] + + +Further Command Line Options +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +The ``./bin/grid_search.py`` script has a further set of command line options. + +- The ``--database`` and the ``--protocol`` define, which database and (optionally) which protocol should be used. +- The ``--sub-directory`` is similar to the one in the ``./bin/verify.py``. +- ``--result-directory`` and ``--temp-directory`` specify directories to write results and temporary files into. Defaults are ``./results/grid_search`` and ``./temp/grid_search`` in the current directory. Make sure that the ``--temp-directory`` can store sufficient amount of data. +- The ``--preprocessor``, ``--extractor`` and ``--algorithm`` can be used to override the ``preprocessor``, ``extractor`` and ``algorithm`` fields in the configuration file (in which case the configuration file does not need to contain these variables). +- The ``--grid`` option can select the SGE_ configuration. +- The ``--parallel`` option can run on the local machine using the given number of parallel threads. +- The ``--preprocessed-directory`` can be used to select a directory of previously preprocessed data. This should not be used in combination with testing different preprocessor parameters. +- The ``--gridtk-database-directory`` can be used to select another directory, where the ``submitted.sql3`` files will be stored. +- Sometimes, the gridtk databases grow, and are too large for holding all experiments. Using the ``--gridtk-database-split-level``, databases can be split at the desired level. +- The ``--write-commands`` directory can be selected to write the executed commands into (this is useful in case some experiments fail and need to be rerun). +- The ``--dry-run`` flag should always be used before the final execution to see if the experiment definition works as expected. +- The ``--skip-when-existent`` flag will only execute the experiments that have not yet finished (i.e., where the resulting score files are not produced yet). +- With the ``--executable`` flag, you might select a different script rather that ``bob.bio.base.script.verify`` to run the experiments (such as the ``bob.bio.gmm.script.verify_gmm``). +- Finally, additional options might be sent to the ``./bin/verify.py`` script directly. These options might be put after a ``--`` separation. + + +Evaluation of Results +~~~~~~~~~~~~~~~~~~~~~ + +To evaluate a series of experiments, a special script iterates through all the results and computes EER on the development set and HTER on the evaluation set, for both the ``nonorm`` and the ``ztnorm`` directories. +Simply call: + +.. code-block:: sh + + $ ./bin/collect_results.py -vv --directory [result-base-directory] --sort + +This will iterate through all result files found in ``[result-base-directory]`` and sort the results according to the EER on the development set (the sorting criterion can be modified using the ``--criterion`` and the ``--sort-key`` comamnd line options). +Hence, to find the best results of your grid search experiments (with default directories), simply run: + +.. code-block:: sh + + $ ./bin/collect_results.py -vv --directory results/grid_search --sort --criterion EER --sort-key nonorm-dev + + + + +.. include:: links.rst diff --git a/doc/py_api.rst b/doc/py_api.rst index 4e5d00cb843cc173a1b7453c0073dd3ae573f1e3..a43beead5e5c32c6ea92aefbea50a2232b2457df 100644 --- a/doc/py_api.rst +++ b/doc/py_api.rst @@ -6,12 +6,105 @@ Python API for bob.bio.base Generic functions ----------------- -.. automodule:: bob.bio.base +IO-related functions +~~~~~~~~~~~~~~~~~~~~ + +.. autosummary:: + bob.bio.base.load + bob.bio.base.save + bob.bio.base.load_compressed + bob.bio.base.save_compressed + bob.bio.base.open_compressed + bob.bio.base.close_compressed + bob.bio.base.check_file + + +Functions dealing with resources +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. autosummary:: + bob.bio.base.load_resource + bob.bio.base.read_config_file + bob.bio.base.resource_keys + bob.bio.base.extensions + bob.bio.base.valid_keywords + + +Miscellaneous functions +~~~~~~~~~~~~~~~~~~~~~~~ + +.. autosummary:: + bob.bio.base.get_config + bob.bio.base.score_fusion_strategy + bob.bio.base.selected_elements + bob.bio.base.selected_indices + Tools to run recognition experiments ------------------------------------ +Command line generation +~~~~~~~~~~~~~~~~~~~~~~~ + +.. autosummary:: + bob.bio.base.tools.command_line_parser + bob.bio.base.tools.initialize + bob.bio.base.tools.command_line + bob.bio.base.tools.write_info + bob.bio.base.tools.FileSelector + +Controlling of elements +~~~~~~~~~~~~~~~~~~~~~~~ + +.. autosummary:: + bob.bio.base.tools.groups + bob.bio.base.tools.indices + +Preprocessing +~~~~~~~~~~~~~ + +.. autosummary:: + bob.bio.base.tools.preprocess + bob.bio.base.tools.read_preprocessed_data + +Feature Extraction +~~~~~~~~~~~~~~~~~~ + +.. autosummary:: + bob.bio.base.tools.train_extractor + bob.bio.base.tools.extract + bob.bio.base.tools.read_features + +Algorithm +~~~~~~~~~ + +.. autosummary:: + bob.bio.base.tools.train_projector + bob.bio.base.tools.project + bob.bio.base.tools.train_enroller + bob.bio.base.tools.enroll + +Scoring +~~~~~~~ + +.. autosummary:: + bob.bio.base.tools.compute_scores + bob.bio.base.tools.concatenate + bob.bio.base.tools.calibrate + +Details +------- + +.. automodule:: bob.bio.base + + .. attribute:: valid_keywords + + Valid keywords, for which resources are defined, are ``('database', 'preprocessor', 'extractor', 'algorithm', 'grid')`` + + .. automodule:: bob.bio.base.tools + .. autoclass:: FileSelector + .. include:: links.rst