Skip to content
Snippets Groups Projects
Commit e1659aed authored by Amir MOHAMMADI's avatar Amir MOHAMMADI
Browse files

Merge branch...

Merge branch '78-the-documentation-does-not-mention-usage-of-single-configuration-files' into 'master'

Resolve "The documentation does not mention usage of single configuration files"

Closes #78

See merge request !92
parents 4f1ae620 830489b3
No related branches found
No related tags found
1 merge request!92Resolve "The documentation does not mention usage of single configuration files"
Pipeline #
...@@ -15,9 +15,10 @@ class Preprocessor (object): ...@@ -15,9 +15,10 @@ class Preprocessor (object):
writes_data : bool writes_data : bool
Select, if the preprocessor actually writes preprocessed images, or if it is simply returning values. Select, if the preprocessor actually writes preprocessed images, or if it is simply returning values.
read_original_data: callable read_original_data: callable or ``None``
This function is used to read the original data from file. This function is used to read the original data from file.
It takes three inputs: A :py:class:`bob.bio.base.database.BioFile` (or one of its derivatives), the original directory (as ``str``) and the original extension (as ``str``). It takes three inputs: A :py:class:`bob.bio.base.database.BioFile` (or one of its derivatives), the original directory (as ``str``) and the original extension (as ``str``).
If ``None``, the default function :py:func:`bob.bio.base.read_original_data` is used.
min_preprocessed_file_size: int min_preprocessed_file_size: int
The minimum file size of a saved preprocessd data in bytes. If the saved The minimum file size of a saved preprocessd data in bytes. If the saved
......
#!/bin/python
# This file describes an exemplary configuration file that can be used in combination with the parameter_test.py script. # This file describes an exemplary configuration file that can be used in combination with the parameter_test.py script.
......
...@@ -35,7 +35,7 @@ def command_line_config_group(parser, package_prefix='bob.bio.', exclude_resourc ...@@ -35,7 +35,7 @@ def command_line_config_group(parser, package_prefix='bob.bio.', exclude_resourc
help='A configuration file containing one or more of "database", "preprocessor", ' help='A configuration file containing one or more of "database", "preprocessor", '
'"extractor", "algorithm" and/or "grid"') '"extractor", "algorithm" and/or "grid"')
config_group.add_argument('-H', '--create-configuration-file', metavar='PATH', config_group.add_argument('-H', '--create-configuration-file', metavar='PATH',
help='If selected, an empty configuration file will be created') help='If selected, an empty configuration file will be created, and no further process is executed')
config_group.add_argument('-d', '--database', metavar='x', nargs='+', config_group.add_argument('-d', '--database', metavar='x', nargs='+',
help='Database and the protocol; registered databases are: %s' % utils.resource_keys( help='Database and the protocol; registered databases are: %s' % utils.resource_keys(
'database', exclude_resources_from, package_prefix=package_prefix)) 'database', exclude_resources_from, package_prefix=package_prefix))
...@@ -198,8 +198,8 @@ def command_line_parser(description=__doc__, exclude_resources_from=[]): ...@@ -198,8 +198,8 @@ def command_line_parser(description=__doc__, exclude_resources_from=[]):
"database can be processed; missing scores will be NaN.") "database can be processed; missing scores will be NaN.")
flag_group.add_argument('-r', '--parallel', type=int, flag_group.add_argument('-r', '--parallel', type=int,
help='This flag is a shortcut for running the commands on the local machine with the given amount of ' help='This flag is a shortcut for running the commands on the local machine with the given amount of '
'parallel threads; equivalent to --grid bob.bio.base.grid.Grid("local", ' 'parallel processes; equivalent to --grid bob.bio.base.grid.Grid("local", '
'number_of_parallel_threads=X) --run-local-scheduler --stop-on-failure.') 'number_of_parallel_processes=X) --run-local-scheduler --stop-on-failure.')
flag_group.add_argument('-t', '--environment', dest='env', nargs='*', default=[], flag_group.add_argument('-t', '--environment', dest='env', nargs='*', default=[],
help='Passes specific environment variables to the job.') help='Passes specific environment variables to the job.')
......
...@@ -59,12 +59,11 @@ def check_file(filename, force, expected_file_size=1): ...@@ -59,12 +59,11 @@ def check_file(filename, force, expected_file_size=1):
def read_original_data(biofile, directory, extension): def read_original_data(biofile, directory, extension):
"""read_original_data(biofile, directory, extension) -> data """This function reads the original data using the given ``biofile`` instance.
This function reads the original data using the given ``biofile`` instance.
It simply calls ``load(directory, extension)`` from :py:class:`bob.bio.base.database.BioFile` or one of its derivatives. It simply calls ``load(directory, extension)`` from :py:class:`bob.bio.base.database.BioFile` or one of its derivatives.
**Parameters:** Parameters
----------
``biofile`` : :py:class:`bob.bio.base.database.BioFile` or one of its derivatives ``biofile`` : :py:class:`bob.bio.base.database.BioFile` or one of its derivatives
The file to read the original data. The file to read the original data.
...@@ -76,9 +75,10 @@ def read_original_data(biofile, directory, extension): ...@@ -76,9 +75,10 @@ def read_original_data(biofile, directory, extension):
The extension of the original data. The extension of the original data.
Might be ``None`` if the ``biofile`` itself has the extension stored. Might be ``None`` if the ``biofile`` itself has the extension stored.
**Returns** Returns
-------
``data`` : object object:
Whatver ``biofile.load`` returns; usually a :py:class:`numpy.ndarray` Whatver ``biofile.load`` returns; usually a :py:class:`numpy.ndarray`
""" """
assert isinstance(biofile, database.BioFile) assert isinstance(biofile, database.BioFile)
......
...@@ -15,31 +15,34 @@ Now, you are ready to run your first biometric recognition experiment. ...@@ -15,31 +15,34 @@ Now, you are ready to run your first biometric recognition experiment.
Running Experiments (part I) Running Experiments (part I)
---------------------------- ----------------------------
To run an experiment, we provide a generic script ``verify.py``.
As a default, ``verify.py`` accepts one or more *configuration files* that include the parametrization of the experiment to run.
A configuration file contains one ore more *variables* that define parts of the experiment.
When several configuration files are specified, the variables of the latter will overwrite the ones of the former.
For simplicity, here we discuss only a single configuration file.
To run an experiment, we provide a generic script ``verify.py``, which is highly parameterizable. As a start, we have implemented a shortcut to generate an empty configuration file that contains all possible variables, each of which are documented and commented out:
To get a complete list of command line options, please run:
.. code-block:: sh .. code-block:: sh
$ verify.py --help $ verify.py --create-configuration-file experiment.py
Whoops, that's a lot of options.
But, no worries, most of them have proper default values.
.. note:: .. note::
Sometimes, command line options have a long version starting with ``--`` and a short one starting with a single ``-``. The generated ``experiment.py`` is a regular python file, so you can include any regular python code inside this file.
In this section, only the long names of the arguments are listed, please refer to ``verify.py --help`` (or short: ``faceverify.py -h``) for the abbreviations.
There are five command line options, which are required and sufficient to define the complete biometric recognition experiment. Alright, lets have a look into this file.
These five options are: Whoops, that's a lot of variables!
But, no worries, most of them have proper default values.
However, there are five variables, which are required and sufficient to define the complete biometric recognition experiment.
These five variables are:
* ``--database``: The database to run the experiments on * ``database``: The database to run the experiments on
* ``--preprocessor``: The data preprocessor * ``preprocessor``: The data preprocessor
* ``--extractor``: The feature extractor * ``extractor``: The feature extractor
* ``--algorithm``: The recognition algorithm * ``algorithm``: The recognition algorithm
* ``--sub-directory``: A descriptive name for your experiment, which will serve as a sub-directory * ``sub_directory``: A descriptive name for your experiment, which will serve as a sub-directory
The first four parameters, i.e., the ``database``, the ``preprocessor``, the ``extractor`` and the ``algorithm`` can be specified in several different ways. The first four variables, i.e., the ``database``, the ``preprocessor``, the ``extractor`` and the ``algorithm`` can be specified in several different ways.
For the start, we will use only the registered :ref:`Resources <bob.bio.base.resources>`. For the start, we will use only the registered :ref:`Resources <bob.bio.base.resources>`.
These resources define the source code that will be used to compute the experiments, as well as all the meta-parameters of the algorithms (which we will call the *configuration*). These resources define the source code that will be used to compute the experiments, as well as all the meta-parameters of the algorithms (which we will call the *configuration*).
To get a list of registered resources, please call: To get a list of registered resources, please call:
...@@ -56,46 +59,89 @@ To see more details about the resources, i.e., the full constructor call for the ...@@ -56,46 +59,89 @@ To see more details about the resources, i.e., the full constructor call for the
$ resources.py -dt algorithm $ resources.py -dt algorithm
.. note:: .. note::
You will also find some ``grid`` resources being listed. You will also find some ``grid`` resources being listed.
These type of resources will be explained :ref:`later <running_in_parallel>`. These type of resources will be explained :ref:`later <running_in_parallel>`.
Before going into :ref:`more details about the configurations <running_part_2>`, we will provide information about running default experiments. Before going into :ref:`more details about the configurations <running_part_2>`, we will provide information about running default experiments.
One command line option, which is not required, but recommended, is the ``--verbose`` option. One variable, which is not required, but recommended, is ``verbose``.
By default, the algorithms are set up to execute quietly, and only errors are reported. By default, the algorithms are set up to execute quietly, and only errors are reported (``logging.ERROR``).
To change this behavior, you can use the ``--verbose`` option several times to increase the verbosity level to show: To change this behavior, you can set the ``verbose`` variable to show:
1) Warning messages (``logging.WARN``)
2) Informative messages (``logging.INFO``)
3) Debug messages (``logging.DEBUG``)
1) Warning messages When running experiments, my personal preference is verbosity level ``2``.
2) Informative messages So, a minimal configuration file (say: ``pca_atnt.py``) would look something like:
3) Debug messages
When running experiments, my personal preference is verbose level 2, which can be enabled by ``--verbose --verbose``, or using the short version: ``-vv``. .. code-block:: py
So, a typical biometric recognition experiment (in this case, face recognition) could look something like:
database = 'atnt'
preprocessor = 'face-detect'
extractor = 'linearize'
algorithm = 'pca'
sub_directory = 'PCA_ATNT'
verbose = 2
Running the experiment is then as simple as:
.. code-block:: sh .. code-block:: sh
$ verify.py --database mobio-image --preprocessor face-crop-eyes --extractor linearize --algorithm pca --sub-directory pca-experiment -vv $ verify.py pca_atnt.py
.. note:: .. note::
To be able to run exactly the command line from above, it requires to have :ref:`bob.bio.face <bob.bio.face>` installed. To be able to run exactly the command line from above, it requires to have :ref:`bob.bio.face <bob.bio.face>` installed.
Before running an experiment, it is recommended to add the ``--dry-run`` option, so that it will only print, which steps would be executed, without actually executing them, and make sure that everything works as expected. Before running an experiment, it is recommended to add set the variable ``dry_run = True``, so that it will only print, which steps would be executed, without actually executing them, and make sure that everything works as expected.
The final result of the experiment will be one (or more) score file(s). The final result of the experiment will be one (or more) score file(s).
Usually, they will be called something like ``scores-dev``. Usually, they will be called something like ``scores-dev``.
By default, you can find them in a sub-directory the ``result`` directory, but you can change this option using the ``--result-directory`` command line option. By default, you can find them in a sub-directory the ``result`` directory, but you can change this option using the ``result_directory`` variable.
.. note:: .. note::
At Idiap_, the default result directory differs, see ``verify.py --help`` for your directory. At Idiap_, the default result directory differs, see ``verify.py --help`` for your directory.
.. _bob.bio.base.command_line:
Command Line Options
--------------------
Each configuration can also directly be specified as command line option of ``verify.py``.
.. note::
Command line options have a long version starting with ``--`` and often a short version starting with a single ``-``.
Here, only the long names of the arguments are listed, please refer to ``verify.py --help`` (or short: ``verify.py -h``) for the abbreviations.
Usually, the (long version of the) command line parameter is identical to the variable name, where ``_`` characters are replaced by ``-``, and all options start with ``--``.
For example, the ``sub_directory`` variable can also be set by the ``--sub-directory`` command line option.
Only, the ``--verbose`` option differs, you can use the ``--verbose`` option several times to increase verbosity, e.g, ``--verbose --verbose`` (or short ``-vv``) increases verbosity to ``2`` (alias ``logging.INFO``).
Generally, options defined on the command line will overwrite variables inside the configuration file(s).
.. note::
Required options need to be specified at least in either the configuration file or on command line.
If all options are given on the command line, the configuration file can be omitted completely.
The exact same experiment as above can, hence, be executed using:
.. code-block:: sh
$ verify.py --database mobio-image --preprocessor face-crop-eyes --extractor linearize --algorithm pca --sub-directory pca-experiment -vv
.. note::
When running an experiment twice, you might realize that the second execution of the same experiment is much faster than the first one.
This is due to the fact that those parts of the experiment, which have been successfully executed before (i.e., the according files already exist), are skipped.
To override this behavior, i.e., to always regenerate all parts of the experiments, you can set ``force = True``.
While we recommend to use a configuration file to declare your experiment, some variables might be faster to be changed on the command line, such as ``--dry-run``, ``--verbose``, ``--force`` (see above), ``--parallel N``, or ``--skip-...`` (see below).
However, to be consistent, throughout this documentation we document the options as variables.
.. _bob.bio.base.evaluate: .. _bob.bio.base.evaluate:
Evaluating Experiments Evaluating Experiments
---------------------- ----------------------
After the experiment has finished successfully, one or more text file containing all the scores are written. After the experiment has finished successfully, one or more text file containing all the scores are written.
To evaluate the experiment, you can use the generic ``evaluate.py`` script, which has properties for all prevalent evaluation types, such as CMC, ROC and DET plots, as well as computing recognition rates, EER/HTER, Cllr and minDCF. To evaluate the experiment, you can use the generic ``evaluate.py`` script, which has properties for all prevalent evaluation types, such as CMC, ROC and DET plots, as well as computing recognition rates, EER/HTER, Cllr and minDCF.
...@@ -116,104 +162,106 @@ With this configuration it is possible to inspect all default parameters of the ...@@ -116,104 +162,106 @@ With this configuration it is possible to inspect all default parameters of the
Running in Parallel Running in Parallel
------------------- -------------------
One important property of the ``verify.py`` script is that it can run in parallel, using either several processes on the local machine, or an SGE grid.
One important property of the ``verify.py`` script is that it can run in parallel, using either several threads on the local machine, or an SGE grid.
To achieve that, ``bob.bio`` is well-integrated with our SGE grid toolkit GridTK_, which we have selected as a python package in the :ref:`Installation <bob.bio.base.installation>` section. To achieve that, ``bob.bio`` is well-integrated with our SGE grid toolkit GridTK_, which we have selected as a python package in the :ref:`Installation <bob.bio.base.installation>` section.
The ``verify.py`` script can submit jobs either to the SGE grid, or to a local scheduler, keeping track of dependencies between the jobs. The ``verify.py`` script can submit jobs either to the SGE grid, or to a local scheduler, keeping track of dependencies between the jobs.
The GridTK_ keeps a list of jobs in a local database, which by default is called ``submitted.sql3``, but which can be overwritten with the ``--gridtk-database-file`` option. The GridTK_ keeps a list of jobs in a local database, which by default is called ``submitted.sql3``, but which can be overwritten with the ``gridtk_database_file`` variable.
Please refer to the `GridTK documentation <http://pythonhosted.org/gridtk>`_ for more details on how to use the Job Manager ``jman``. Please refer to the `GridTK documentation <http://pythonhosted.org/gridtk>`_ for more details on how to use the Job Manager ``jman``.
Two different types of ``grid`` resources are defined, which can be used with the ``--grid`` command line option. Two different types of ``grid`` resources are defined, which can be used with the ``grid`` variable.
The first type of resources will submit jobs to an SGE grid. The first type of resources will submit jobs to an SGE grid.
They are mainly designed to run in the Idiap_ SGE grid and might need some adaptations to run on your grid. They are mainly designed to run in the Idiap_ SGE grid and might need some adaptations to run on your grid.
The second type of resources will submit jobs to a local queue, which needs to be run by hand (e.g., using ``jman --local run-scheduler --parallel 4``), or by using the command line option ``--run-local-scheduler``. The second type of resources will submit jobs to a local queue, which needs to be run by hand (e.g., using ``jman --local run-scheduler --parallel 4``), or by setting the variable ``run_local_scheduler = True``.
The difference between the two types of resources is that the local submission usually starts with ``local-``, while the SGE resource does not. The difference between the two types of resources is that the local submission usually starts with ``local-``, while the SGE resource does not.
You can also re-nice the parallel jobs by setting the ``nice`` variable accordingly.
Hence, to run the same experiment as above using four parallel threads on the local machine, re-nicing the jobs to level 10, simply call: To run an experiment parallel on the local machine, you can also use the simple variable ``parallel = N``, which will run the experiments in ``N`` parallel processes on your machine.
Here, ``N`` can be any positive integer -- but providing ``N`` greater than the number of processor threads of your machine will rather slow down processing.
Basically, ``parallel = N`` is a shortcut for:
.. code-block:: sh .. code-block:: py
$ verify.py --database mobio-image --preprocessor face-crop-eyes --extractor linearize --algorithm pca --sub-directory pca-experiment -vv --grid local-p4 --run-local-scheduler --nice 10 grid = bob.bio.base.grid.Grid("local", number_of_parallel_processes=N)
run_local_scheduler = True
stop_on_failure = True
.. note:: .. warning::
You might realize that the second execution of the same experiment is much faster than the first one. Some of the processes require a lot of memory, which are multiplied by ``N`` when you run in ``N`` parallel processes.
This is due to the fact that those parts of the experiment, which have been successfully executed before (i.e., the according files already exist), are skipped. There is no check implemented to avoid that.
To override this behavior, i.e., to always regenerate all parts of the experiments, you can use the ``--force`` option.
Command Line Options to change Default Behavior Variables to change Default Behavior
----------------------------------------------- ------------------------------------
Additionally to the required command line arguments discussed above, there are several options to modify the behavior of the experiments. Additionally to the required variables discussed above, there are several variables to modify the behavior of the experiments.
One set of command line options change the directory structure of the output. One set of command line options change the directory structure of the output.
By default, intermediate (temporary) files are by default written to the ``temp`` directory, which can be overridden by the ``--temp-directory`` command line option, which expects relative or absolute paths: By default, intermediate (temporary) files are by default written to the ``temp`` directory, which can be overridden by the ``temp_directory`` variable, which expects relative or absolute paths.
Re-using Parts of Experiments Re-using Parts of Experiments
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
If you want to re-use parts previous experiments, you can specify the directories (which are relative to the ``--temp-directory``, but you can also specify absolute paths): If you want to re-use parts previous experiments, you can specify the directories (which are relative to the ``temp_directory``, but you can also specify absolute paths):
* ``--preprocessed-data-directory`` * ``preprocessed_directory``
* ``--extracted-directory`` * ``extracted_directory``
* ``--projected-directory`` * ``projected_directory``
* ``--models-directories`` (one for each the models and the ZT-norm-models, see below) * ``models_directories`` (one for each the models and the ZT-norm-models, see below)
or even trained extractor, projector, or enroller (i.e., the results of the extractor, projector, or enroller training): or even trained extractor, projector, or enroller (i.e., the results of the extractor, projector, or enroller training):
* ``--extractor-file`` * ``extractor_file``
* ``--projector-file`` * ``projector_file``
* ``--enroller-file`` * ``enroller_file``
For that purpose, it is also useful to skip parts of the tool chain. For that purpose, it is also useful to skip parts of the tool chain.
To do that you can use: To do that you can set these variables to ``True``:
* ``--skip-preprocessing`` * ``skip_preprocessing``
* ``--skip-extractor-training`` * ``skip_extractor_training``
* ``--skip-extraction`` * ``skip_extraction``
* ``--skip-projector-training`` * ``skip_projector_training``
* ``--skip-projection`` * ``skip_projection``
* ``--skip-enroller-training`` * ``skip_enroller_training``
* ``--skip-enrollment`` * ``skip_enrollment``
* ``--skip-score-computation`` * ``skip_score_computation``
* ``--skip-concatenation`` * ``skip_concatenation``
* ``--skip-calibration`` * ``skip_calibration``
although by default files that already exist are not re-created. although by default files that already exist are not re-created.
You can use the ``--force`` argument combined with the ``--skip...`` arguments (in which case the skip is preferred). You can use the ``force`` variable combined with the ``skip_`` variables (in which case the skip is preferred).
To run just a sub-selection of the tool chain, you can also use the ``--execute-only`` option, which takes a list of options out of: ``preprocessing``, ``extractor-training``, ``extraction``, ``projector-training``, ``projection``, ``enroller-training``, ``enrollment``, ``score-computation``, ``concatenation`` or ``calibration``. To (re-)run just a sub-selection of the tool chain, you can also use the ``execute_only`` variable, which takes a list of options out of: ``preprocessing``, ``extractor-training``, ``extraction``, ``projector-training``, ``projection``, ``enroller-training``, ``enrollment``, ``score-computation``, ``concatenation`` or ``calibration``.
This option is particularly useful for debugging purposes.
Database-dependent Arguments Database-dependent Variables
~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Many databases define several protocols that can be executed. Many databases define several protocols that can be executed.
To change the protocol, you can either modify the configuration file, or simply use the ``--protocol`` option. To change the protocol, you can either modify the configuration file, or simply use the ``protocol`` variable.
Some databases define several kinds of evaluation setups. Some databases define several kinds of evaluation setups.
For example, often two groups of data are defined, a so-called *development set* and an *evaluation set*. For example, often two groups of data are defined, a so-called *development set* and an *evaluation set*.
The scores of the two groups will be concatenated into two files called **scores-dev** and **scores-eval**, which are located in the score directory (see above). The scores of the two groups will be concatenated into two files called **scores-dev** and **scores-eval**, which are located in the score directory (see above).
In this case, by default only the development set is employed. In this case, by default only the development set is employed.
To use both groups, just specify ``--groups dev eval`` (of course, you can also only use the ``'eval'`` set by calling ``--groups eval``). To use both groups, just specify ``groups = ['dev', 'eval']`` (of course, you can also only use the ``'eval'`` set by setting ``groups = ['eval']``).
One score normalization technique is the so-called ZT score normalization. One score normalization technique is the so-called ZT score normalization.
To enable this, simply use the ``--zt-norm`` option. To enable this, simply use the ``zt_norm`` variable.
If the ZT-norm is enabled, two sets of scores will be computed, and they will be placed in two different sub-directories of the score directory, which are by default called **nonorm** and **ztnorm**, but which can be changed using the ``--zt-score-directories`` option. If the ZT-norm is enabled, two sets of scores will be computed, and they will be placed in two different sub-directories of the score directory, which are by default called **nonorm** and **ztnorm**, but which can be changed using the ``zt_score_directories`` variable.
Other Arguments Other Variables
--------------- ---------------
Calibration Calibration
~~~~~~~~~~~ ~~~~~~~~~~~
For some applications it is interesting to get calibrated scores. For some applications it is interesting to get calibrated scores.
Simply add the ``--calibrate-scores`` option and another set of score files will be created by training the score calibration on the scores of the ``'dev'`` group and execute it to all available groups. Simply set the variable ``calibrate_scores = True`` and another set of score files will be created by training the score calibration on the scores of the ``'dev'`` group and execute it to all available groups.
The scores will be located at the same directory as the **nonorm** and **ztnorm** scores, and the file names are **calibrated-dev** (and **calibrated-eval** if applicable). The scores will be located at the same directory as the **nonorm** and **ztnorm** scores, and the file names are **calibrated-dev** (and **calibrated-eval** if applicable).
Unsuccessful Preprocessing Unsuccessful Preprocessing
~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~
In some cases, the preprocessor is not able to preprocess the data (e.g., for face image processing the face detector might not detect the face). In some cases, the preprocessor is not able to preprocess the data (e.g., for face image processing the face detector might not detect the face).
If you expect such cases to happen, you might want to use the ``--allow-missing-files`` option. If you expect such cases to happen, you might want to use the ``allow_missing_files`` variable.
When this option is enabled, missing files will be handled correctly throughout the whole processing chain, i.e.: When this variable is set to ``True``, missing files will be handled correctly throughout the whole processing chain, i.e.:
* the data file is not used during training (in any step of the processing tool chain) * the data file is not used during training (in any step of the processing tool chain)
* preprocessed data is not written * preprocessed data is not written
...@@ -223,6 +271,6 @@ When this option is enabled, missing files will be handled correctly throughout ...@@ -223,6 +271,6 @@ When this option is enabled, missing files will be handled correctly throughout
If several probe files are combined into one score, missing probe files will be ignored; if all probe files are not found, the score is ``NaN``. If several probe files are combined into one score, missing probe files will be ignored; if all probe files are not found, the score is ``NaN``.
.. warning:: .. warning::
At the moment, combining the ``--allow-missing-files`` and ``zt-norm`` options might result in unexpected behavior, as the ZT-Norm computation does not handle ``NaN`` values appropriately. At the moment, combining the ``allow_missing_files`` and ``zt_norm`` variables might result in unexpected behavior, as the ZT-Norm computation does not handle ``NaN`` values appropriately.
.. include:: links.rst .. include:: links.rst
...@@ -52,11 +52,11 @@ If a class returns data that is **not** of type :py:class:`numpy.ndarray`, it ov ...@@ -52,11 +52,11 @@ If a class returns data that is **not** of type :py:class:`numpy.ndarray`, it ov
* ``write_data(data, data_file)``: Writes the given data (that has been generated using the ``__call__`` function of this class) to file. * ``write_data(data, data_file)``: Writes the given data (that has been generated using the ``__call__`` function of this class) to file.
* ``read_data(data_file)``: Reads the preprocessed data from file. * ``read_data(data_file)``: Reads the preprocessed data from file.
By default, the original data is read by :py:func:`bob.io.base.load`. The preprocessor is also responsible for reading the original data.
Hence, data is given as :py:class:`numpy.ndarray`\s. How to read original data can be specified by the ``read_original_data`` parameter of the constructor.
When a different IO for the original data is required (for example to read videos in :py:class:`bob.bio.video.preprocessor.Video`), the following function is overridden: The ``read_original_data`` function gets three parameters: the :py:class:`bob.bio.base.database.BioFile` object from the database, the base ``directory`` where to read the data from, and the ``extension`` in which the original data is stored.
By default, this function is :py:func:`bob.bio.base.read_original_data`, which simply calls: ``biofile.load(directory, extension)``, so that each database implementation can define an appropriate way, how data is read or written.
* ``read_original_data(filename)``: Reads the original data from file. In the rare case that this is not the way that the preprocessor expects the data, another function can be passed to the constructor, i.e., in a configuration file of an experiment.
.. _bob.bio.base.extractors: .. _bob.bio.base.extractors:
...@@ -222,28 +222,24 @@ For Bob_'s ZT-norm databases, we provide the :py:class:`bob.bio.base.database.ZT ...@@ -222,28 +222,24 @@ For Bob_'s ZT-norm databases, we provide the :py:class:`bob.bio.base.database.ZT
Defining your own Database Defining your own Database
~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~
.. note:: .. note::
If you have your own database that you want to execute the recognition experiments on, you should If you have your own database that you want to execute the recognition experiments on, you should first check if you could use the `File List Database` interface by defining appropriate file lists for the training set, the model set, and the probes.
first check if you could use the ``Verification File List Database`` interface by defining appropriate
file lists for the training set, the model set, and the probes.
Please refer to the documentation :doc:`filelist-guide` of this database for more instructions on how to setup this database. Please refer to the documentation :doc:`filelist-guide` of this database for more instructions on how to setup this database.
For an example, you might want to have a look into the implementation of the `BANCA FileList database <http://gitlab.idiap.ch/bob/bob.bio.spear/tree/master/bob/bio/spear/config/database/banca>`_, where the protocol with the name ``G`` is implemented, and its according `database configuration file <https://gitlab.idiap.ch/bob/bob.bio.spear/blob/master/bob/bio/spear/config/database/banca_audio_G.py>`_. For an example, you might want to have a look into the implementation of the `Timit FileList database <http://gitlab.idiap.ch/bob/bob.bio.spear/tree/master/bob/bio/spear/config/database/timit>`_, where the protocol with the name ``2`` is implemented, and its according `database configuration file <https://gitlab.idiap.ch/bob/bob.bio.spear/blob/master/bob/bio/spear/config/database/timit.py>`_.
To "plug" your own (non-file-list-based) database in this framework you have to write your own database class by deriving :py:class:`bob.bio.base.database.BioDatabase`. To "plug" your own (non-file-list-based) database in this framework you have to write your own database class by deriving :py:class:`bob.bio.base.database.BioDatabase`.
In this case, you have to derive your class from the :py:class:`bob.bio.base.database.BioDatabase`, and provide the following functions: In this case, you have to derive your class from the :py:class:`bob.bio.base.database.BioDatabase`, and provide the following functions:
* ``__init__(self, <your-parameters>, **kwargs)`` Constructor of your database interface. * ``__init__(self, <your-parameters>, **kwargs)`` Constructor of your database interface.
Please call the base class constructor, providing all the required parameters, e.g. by ``super(<your_db>,self).__init__(self, **kwargs)``. Please call the base class constructor, providing all the required parameters, e.g. by ``super(<your_db>,self).__init__(self, **kwargs)``.
Usually, providing ids for the group ``'dev'`` should be sufficient. Usually, providing ids for the group ``'dev'`` should be sufficient.
* ``objects(self, groups=None, protocol=None, purposes=None, model_ids=None, **kwargs)`` * ``objects(self, groups=None, protocol=None, purposes=None, model_ids=None, **kwargs)``
This function must return a list of ``bob.bio.base.database.BioFile`` objects with your data. This function must return a list of ``bob.bio.base.database.BioFile`` objects with your data.
The keyword arguments are possible filters that you may use. The keyword arguments are filters that you should use.
* ``model_ids_with_protocol(self, groups, protocol, **kwargs)`` * ``model_ids_with_protocol(self, groups, protocol, **kwargs)``
This function must return a list of model ids for the given groups and given protocol. This function must return a list of model ids for the given groups and given protocol.
In this context models are basically them "templates" used for enrollment.
Additionally, you can define more lists that can be used for ZT score normalization. Additionally, you can define more lists that can be used for ZT score normalization.
If you don't know what ZT score normalization is, just forget about it and move on. If you don't know what ZT score normalization is, just forget about it and move on.
...@@ -258,16 +254,15 @@ If you know and want to use it, just derive your class from :py:class:`bob.bio.b ...@@ -258,16 +254,15 @@ If you know and want to use it, just derive your class from :py:class:`bob.bio.b
* ``tmodel_ids_with_protocol(self, protocol=None, groups=None, **kwargs)`` * ``tmodel_ids_with_protocol(self, protocol=None, groups=None, **kwargs)``
The ids for the T norm models for the given group and protocol. The ids for the T norm models for the given group and protocol.
.. note: .. note::
For a proper biometric recognition protocol, the identities from the models and the T-Norm models, as well as the Z-probes should be different. For a proper biometric recognition protocol, the identities from the models and the T-Norm models, as well as the Z-probes should be different.
.. For some protocols, a single probe consists of several features, see :ref:`bob.bio.base.algorithms` about strategies how to incorporate several probe files into one score.
For some protocols, a single probe consists of several features, see :ref:`bob.bio.base.algorithms` about strategies how to incorporate several probe files into one score. If your database should provide this functionality, please overwrite:
If your database should provide this functionality, please overwrite:
* ``uses_probe_file_sets(self)``: Return ``True`` if the current protocol of the database provides multiple files for one probe. * ``uses_probe_file_sets(self)``: Return ``True`` if the current protocol of the database provides multiple files for one probe.
* ``probe_file_sets(self, model_id=None, group='dev')``: Returns a list of lists of :py:class:`bob.bio.base.database.FileSet` objects. * ``probe_file_sets(self, model_id=None, group='dev')``: Returns a list of lists of :py:class:`bob.bio.base.database.BioFileSet` objects.
* ``z_probe_file_sets(self, model_id=None, group='dev')``: Returns a list of lists of Z-probe :py:class:`bob.bio.base.database.FileSet` objects (only needed if the base class is :py:class:`bob.bio.base.database.DatabaseZT`). * ``z_probe_file_sets(self, model_id=None, group='dev')``: Returns a list of lists of Z-probe :py:class:`bob.bio.base.database.BioFileSet` objects.
.. _bob.bio.base.configuration-files: .. _bob.bio.base.configuration-files:
...@@ -298,7 +293,6 @@ For example, the configuration file for a PCA algorithm, which uses 80% of varia ...@@ -298,7 +293,6 @@ For example, the configuration file for a PCA algorithm, which uses 80% of varia
algorithm = bob.bio.base.algorithm.PCA(subspace_dimension = 0.8, distance_function = scipy.spatial.distance.cosine, is_distance_function = True) algorithm = bob.bio.base.algorithm.PCA(subspace_dimension = 0.8, distance_function = scipy.spatial.distance.cosine, is_distance_function = True)
Some default configuration files can be found in the ``bob/bio/*/config`` directories of all ``bob.bio`` packages, but you can create configuration files in any directory you like. Some default configuration files can be found in the ``bob/bio/*/config`` directories of all ``bob.bio`` packages, but you can create configuration files in any directory you like.
In fact, since all tools have a different keyword, you can define a complete experiment in a single configuration file.
.. _bob.bio.base.resources: .. _bob.bio.base.resources:
...@@ -324,7 +318,7 @@ Particularly, we use a specific list of entry points, which are: ...@@ -324,7 +318,7 @@ Particularly, we use a specific list of entry points, which are:
For each of the tools, several resources are defined, which you can list with the ``resources.py`` command line. For each of the tools, several resources are defined, which you can list with the ``resources.py`` command line.
When you want to register your own resource, make sure that your configuration file is importable (usually it is sufficient to have an empty ``__init__.py`` file in the same directory as your configuration file). When you want to register your own resource, make sure that your configuration file is importable (usually it is sufficient to have an empty ``__init__.py`` file in the same directory as your configuration file).
Then, you can simply add a line inside the according ``entry_points`` section of the ``setup.py`` file (you might need to create that section, just follow the example of the ``setup.py`` file that you can find online in the base directory of our `bob.bio.base Gitlab page <http://gitlab.idiap.ch/bob/bob.bio.base>`__). Then, you can simply add a line inside the according ``entry_points`` section of the ``setup.py`` file (you might need to create that section, just follow the example of the ``setup.py`` file `that you can find online in bob.bio.base Gitlab page <https://gitlab.idiap.ch/bob/bob.bio.base/blob/master/setup.py>`__).
After re-running ``buildout``, your new resource should be listed in the output of ``resources.py``. After re-running ``buildout``, your new resource should be listed in the output of ``resources.py``.
......
...@@ -12,33 +12,49 @@ Now that we have learned the implementation details, we can have a closer look i ...@@ -12,33 +12,49 @@ Now that we have learned the implementation details, we can have a closer look i
Running Experiments (part II) Running Experiments (part II)
----------------------------- -----------------------------
As mentioned before, running biometric recognition experiments can be achieved using configuration files for the ``verify.py`` script.
As mentioned before, running biometric recognition experiments can be achieved using the ``verify.py`` command line.
In section :ref:`running_part_1`, we have used registered resources to run an experiment. In section :ref:`running_part_1`, we have used registered resources to run an experiment.
However, the command line options of ``verify.py`` is more flexible, as you can have three different ways of defining tools: However, the variables (and also the :ref:`bob.bio.base.command_line` of ``verify.py``) are more flexible, as you can have three different ways of defining tools:
1. Choose a resource (see ``resources.py`` or ``verify.py --help`` or the result of ``verify.py --create-configuration-file`` for a list of registered resources):
.. code-block:: py
algorithm = "pca"
2. Use a (pre-defined) configuration file, see: :ref:`bob.bio.base.configuration-files`.
In case several tools are specified inside the configuration file, only the variable that matches to your variable will be used.
For example, the file "bob/bio/base/config/algorithm/pca.py" might define several variables, but only the ``algorithm`` variable is used when setting:
1. Choose a resource (see ``resources.py`` or ``verify.py --help`` for a list of registered resources): .. code-block:: py
.. code-block:: sh algorithm = "bob/bio/base/config/algorithm/pca.py"
$ verify.py --algorithm pca
3. Instantiate a class and pass all desired parameters to its constructor:
2. Use a configuration file. Make sure that your configuration file has the correct variable name: .. code-block:: py
.. code-block:: sh import bob.bio.base
import scipy.spatial
algorithm = bob.bio.base.algorithm.PCA(
subspace_dimension = 30,
distance_function = scipy.spatial.distance.euclidean,
is_distance_function = True
)
$ verify.py --algorithm bob/bio/base/config/algorithm/pca.py .. note::
When specified on the command line, usually quotes ``"..."`` are required, and the ``--imports`` need to be provided:
3. Instantiate a class on the command line. Usually, quotes ``"..."`` are required, and the ``--imports`` need to be specified: .. code-block:: sh
.. code-block:: sh $ verify.py --algorithm "bob.bio.base.algorithm.PCA(subspace_dimension = 30, distance_function = scipy.spatial.distance.euclidean, is_distance_function = True)" --imports bob.bio.base scipy.spatial
$ verify.py --algorithm "bob.bio.base.algorithm.PCA(subspace_dimension = 30, distance_function = scipy.spatial.distance.euclidean, is_distance_function = True)" --imports bob.bio.base scipy.spatial
All these three ways can be used for any of the five command line options: ``--database``, ``--preprocessor``, ``--extractor``, ``--algorithm`` and ``--grid``. All these three ways can be used for any of the five variables: ``database``, ``preprocessor``, ``extractor``, ``algorithm`` and ``grid``.
You can even mix these three types freely in a single command line. You can even mix these three types freely in a single configuration file.
Score Level Fusion of Different Algorithms on the same Database Score Level Fusion of Different Algorithms on the same Database
...@@ -57,16 +73,15 @@ Afterwards, the fusion is applied to the ``--dev-files`` and the resulting score ...@@ -57,16 +73,15 @@ Afterwards, the fusion is applied to the ``--dev-files`` and the resulting score
If ``--eval-files`` are specified, the same fusion that is trained on the development set is now applied to the evaluation set as well, and the ``--fused-eval-file`` is written. If ``--eval-files`` are specified, the same fusion that is trained on the development set is now applied to the evaluation set as well, and the ``--fused-eval-file`` is written.
.. note:: .. note::
When ``--eval-files`` are specified, they need to be in the same order as the ``dev-files``, otherwise the result is undefined. When ``--eval-files`` are specified, they need to be in the same order as the ``--dev-files``, otherwise the result is undefined.
The resulting ``--fused-dev-file`` and ``fused-eval-file`` can then be evaluated normally, e.g., using the ``evaluate.py`` script. The resulting ``--fused-dev-file`` and ``--fused-eval-file`` can then be evaluated normally, e.g., using the ``evaluate.py`` script.
.. _grid-search: .. _grid-search:
Finding the Optimal Configuration Finding the Optimal Configuration
--------------------------------- ---------------------------------
Sometimes, configurations of tools (preprocessors, extractors or algorithms) are highly dependent on the database or even the employed protocol. Sometimes, configurations of tools (preprocessors, extractors or algorithms) are highly dependent on the database or even the employed protocol.
Additionally, configuration parameters depend on each other. Additionally, configuration parameters depend on each other.
``bob.bio`` provides a relatively simple set up that allows to test different configurations in the same task, and find out the best set of configurations. ``bob.bio`` provides a relatively simple set up that allows to test different configurations in the same task, and find out the best set of configurations.
...@@ -90,7 +105,7 @@ The configuration file is a common python file, which can contain certain variab ...@@ -90,7 +105,7 @@ The configuration file is a common python file, which can contain certain variab
The variables from 1. to 3. usually contain instantiations for classes of :ref:`bob.bio.base.preprocessors`, :ref:`bob.bio.base.extractors` and :ref:`bob.bio.base.algorithms`, but also registered :ref:`bob.bio.base.resources` can be used. The variables from 1. to 3. usually contain instantiations for classes of :ref:`bob.bio.base.preprocessors`, :ref:`bob.bio.base.extractors` and :ref:`bob.bio.base.algorithms`, but also registered :ref:`bob.bio.base.resources` can be used.
For any of the parameters of the classes, a *placeholder* can be put. For any of the parameters of the classes, a *placeholder* can be put.
By default, these place holders start with a # character, followed by a digit or character. By default, these place holders start with a ``#`` character, followed by a digit or character.
The variables 1. to 3. can also be overridden by the command line options ``--preprocessor``, ``--extractor`` and ``--algorithm`` of the ``grid_search.py`` script. The variables 1. to 3. can also be overridden by the command line options ``--preprocessor``, ``--extractor`` and ``--algorithm`` of the ``grid_search.py`` script.
The ``replace`` variable has to be set as a dictionary. The ``replace`` variable has to be set as a dictionary.
...@@ -130,7 +145,7 @@ In the above example, the results of the experiments will be placed into a direc ...@@ -130,7 +145,7 @@ In the above example, the results of the experiments will be placed into a direc
.. note:: .. note::
Please note that we are using a dictionary structure to define the replacements. Please note that we are using a dictionary structure to define the replacements.
Hence, the order of the directories inside the same step might not be in the same order as written in the configuration file. Hence, the order of the directories inside the same step might not be in the same order as written in the configuration file.
For the above example, a directory structure of `results/[...]/Dir_b1/Dir_a1/Dir_c1/[...]`` might be possible as well. For the above example, a directory structure of ``results/[...]/Dir_b1/Dir_a1/Dir_c1/[...]`` might be possible as well.
Additionally, tuples of place holders can be defined, in which case always the full tuple will be replaced in one shot. Additionally, tuples of place holders can be defined, in which case always the full tuple will be replaced in one shot.
...@@ -147,7 +162,7 @@ Continuing the above example, it is possible to add: ...@@ -147,7 +162,7 @@ Continuing the above example, it is possible to add:
} }
.. warning:: .. warning::
*All possible combinations* of the configuration parameters are tested, which might result in a *huge number of executed experiments*. **All possible combinations** of the configuration parameters are tested, which might result in a **huge number of executed experiments**.
Some combinations of parameters might not make any sense. Some combinations of parameters might not make any sense.
In this case, a set of requirements on the parameters can be set, using the ``requirement`` variable. In this case, a set of requirements on the parameters can be set, using the ``requirement`` variable.
...@@ -165,6 +180,8 @@ If you, e.g., test, which ``scipy.spatial`` distance function works best for you ...@@ -165,6 +180,8 @@ If you, e.g., test, which ``scipy.spatial`` distance function works best for you
imports = ['scipy', 'bob.bio.base', 'bob.bio.face'] imports = ['scipy', 'bob.bio.base', 'bob.bio.face']
For a complete example of the grid search configuration file, you might want to have a look into `the actual file that is used to test the grid search <https://gitlab.idiap.ch/bob/bob.bio.base/blob/master/bob/bio/base/test/dummy/grid_search.py>`__.
Further Command Line Options Further Command Line Options
~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The ``grid_search.py`` script has a further set of command line options. The ``grid_search.py`` script has a further set of command line options.
...@@ -184,10 +201,8 @@ The ``grid_search.py`` script has a further set of command line options. ...@@ -184,10 +201,8 @@ The ``grid_search.py`` script has a further set of command line options.
- With the ``--executable`` flag, you might select a different script rather that ``bob.bio.base.script.verify`` to run the experiments (such as the ``bob.bio.gmm.script.verify_gmm``). - With the ``--executable`` flag, you might select a different script rather that ``bob.bio.base.script.verify`` to run the experiments (such as the ``bob.bio.gmm.script.verify_gmm``).
- Finally, additional options might be sent to the ``verify.py`` script directly. These options might be put after a ``--`` separation. - Finally, additional options might be sent to the ``verify.py`` script directly. These options might be put after a ``--`` separation.
Evaluation of Results Evaluation of Results
~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~
To evaluate a series of experiments, a special script iterates through all the results and computes EER on the development set and HTER on the evaluation set, for both the ``nonorm`` and the ``ztnorm`` directories. To evaluate a series of experiments, a special script iterates through all the results and computes EER on the development set and HTER on the evaluation set, for both the ``nonorm`` and the ``ztnorm`` directories.
Simply call: Simply call:
...@@ -203,6 +218,4 @@ Hence, to find the best results of your grid search experiments (with default di ...@@ -203,6 +218,4 @@ Hence, to find the best results of your grid search experiments (with default di
$ collect_results.py -vv --directory results/grid_search --sort --criterion EER --sort-key nonorm-dev $ collect_results.py -vv --directory results/grid_search --sort --criterion EER --sort-key nonorm-dev
.. include:: links.rst .. include:: links.rst
...@@ -10,6 +10,7 @@ IO-related functions ...@@ -10,6 +10,7 @@ IO-related functions
~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~
.. autosummary:: .. autosummary::
bob.bio.base.read_original_data
bob.bio.base.load bob.bio.base.load
bob.bio.base.save bob.bio.base.save
bob.bio.base.load_compressed bob.bio.base.load_compressed
......
...@@ -94,7 +94,7 @@ setup( ...@@ -94,7 +94,7 @@ setup(
'bob.bio.preprocessor': [ 'bob.bio.preprocessor': [
'dummy = bob.bio.base.test.dummy.preprocessor:preprocessor', # for test purposes only 'dummy = bob.bio.base.test.dummy.preprocessor:preprocessor', # for test purposes only
'filename = bob.bio.base.config.preprocessor.filename:preprocessor', # for test purposes only 'filename = bob.bio.base.config.preprocessor.filename:preprocessor',
], ],
'bob.bio.extractor': [ 'bob.bio.extractor': [
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment