diff --git a/bob/fusion/base/__init__.py b/bob/fusion/base/__init__.py index b36187c58512bb6deb58d64649386fc9658d828d..dabab0fbe1ae33b4075216805efa6f9bd9095771 100644 --- a/bob/fusion/base/__init__.py +++ b/bob/fusion/base/__init__.py @@ -1,6 +1,7 @@ -# from .utils import * from . import algorithm from . import tools +from . import config +from . import script def get_config(): """Returns a string containing the configuration information. diff --git a/bob/fusion/base/algorithm/Algorithm.py b/bob/fusion/base/algorithm/Algorithm.py index a769e33006843b95bb162821e57d9bd335ab5dcc..d83cb9d08ef5b188f1b2a0dddf92a39585c23dbf 100644 --- a/bob/fusion/base/algorithm/Algorithm.py +++ b/bob/fusion/base/algorithm/Algorithm.py @@ -3,7 +3,6 @@ from __future__ import division from __future__ import absolute_import -from ..tools import grouping import numpy as np import pickle @@ -12,7 +11,7 @@ logger = bob.core.log.setup("bob.fusion.base") class Algorithm(object): - """docstring for Algorithm""" + """A class to be used in score fusion""" def __init__(self, preprocessors=None, @@ -21,6 +20,13 @@ class Algorithm(object): **kwargs ): """ + preprocessors: A list of preprocessors that follow the API of + :py:meth:`sklearn.preprocessing.StandardScaler`. Especially `fit_transform` + and `transform` must be implemented. + + classifier: An instance of a class that implements `fit(X[, y])` and + `decision_function(X)` like: + :py:meth:`sklearn.linear_model.LogisticRegression` kwargs : ``key=value`` pairs A list of keyword arguments to be written in the @@ -45,6 +51,13 @@ class Algorithm(object): return scores def train(self, train, devel=None): + """If you use development data for training you need to override this + method. + train: A :py:meth:`tuple` of length 2 containing + the negatives and positives. negatives and positives should be + numpy.ndarray with the shape of (n_samples, n_systems). + devel: same as train but used for development (validation) + """ (negatives, positives) = train train_scores = np.vstack((negatives, positives)) neg_len = negatives.shape[0] @@ -53,10 +66,10 @@ class Algorithm(object): self.classifier.fit(train_scores, y) def fuse(self, scores): - if hasattr(self, 'classifier'): - return self.classifier.decision_function(scores) - else: - return self.decision_function(scores) + """ + scores: A numpy.ndarray with the shape of (n_samples, n_systems). + """ + return self.classifier.decision_function(scores) def __str__(self): """__str__() -> info @@ -74,81 +87,11 @@ class Algorithm(object): self._kwargs.items() if value is not None])) def save(self, model_file): + """If your class cannot be pickled, you need to override this method.""" with open(model_file, "wb") as f: pickle.dump(self, f) def load(self, model_file): + """If your class cannot be pickled, you need to override this method.""" with open(model_file, "rb") as f: return pickle.load(f) - - def plot_boundary_decision(self, scores, score_labels, threshold, - thres_system1=None, - thres_system2=None, - do_grouping=False, - resolution=100, - x_pad=0.5, - y_pad=0.5, - alpha=0.75, - legends=None, - i1=0, - i2=1, - **kwargs - ): - ''' - Plots the boundary decision of the Algorithm - - @param score_labels numpy.array A (scores.shape[0]) array containing - the true labels of scores. - - @param threshold float threshold of the decision boundary - ''' - if legends is None: - legends = ['Impostor', 'Genuine'] - markers = ['x', 'o'] - - if scores.shape[1] > 2: - raise NotImplementedError( - "Currently plotting the decision boundary for more than two systems " - "is not supported.") - - import matplotlib.pyplot as plt - plt.gca() # this is necessary for subplots to work. - - X = scores[:, [i1, i2]] - Y = score_labels - x_min, x_max = X[:, i1].min() - x_pad, X[:, i1].max() + x_pad - y_min, y_max = X[:, i2].min() - y_pad, X[:, i2].max() + y_pad - xx, yy = np.meshgrid( - np.linspace(x_min, x_max, resolution), - np.linspace(y_min, y_max, resolution)) - temp = np.c_[xx.ravel(), yy.ravel()] - temp = self.preprocess(temp) - Z = (self.fuse(temp) > threshold).reshape(xx.shape) - - contourf = plt.contour(xx, yy, Z, 1, alpha=1, cmap=plt.cm.viridis) - - if do_grouping: - negatives, positives = X[np.logical_not(Y)], X[Y] - negatives, positives = grouping(negatives, positives, **kwargs) - X = np.concatenate((negatives, positives), axis=0) - Y = np.concatenate( - (np.zeros(negatives.shape[0], dtype=np.bool8), - np.ones(positives.shape[0], dtype=np.bool8)), - axis=0) - - negatives, positives = X[np.logical_not(Y)], X[Y] - colors = plt.cm.viridis(np.linspace(0, 1, 2)) - for i, X in enumerate((negatives, positives)): - plt.scatter( - X[:, 0], X[:, 1], marker=markers[i], alpha=alpha, - c=colors[i], label=legends[i]) - plt.legend() - - if thres_system1 is not None: - plt.axvline(thres_system1, color='red') - plt.axhline(thres_system2, color='red') - - plt.xlim([x_min, x_max]) - plt.ylim([y_min, y_max]) - - return contourf diff --git a/bob/fusion/base/algorithm/MLP.py b/bob/fusion/base/algorithm/MLP.py index 6d8e46670a52ce1fe902230919138d2f526dfa95..8de2c08cfba9233a052555439dfaeed935e44662 100644 --- a/bob/fusion/base/algorithm/MLP.py +++ b/bob/fusion/base/algorithm/MLP.py @@ -16,8 +16,8 @@ logger = bob.core.log.setup("bob.fusion.base") class MLP(Algorithm): - """This MLP is implemented using the bob tools - It may change its API and functionality in the future. + """This MLP is implemented using the bob tools. + The preprocessors used with this class should be pickleable. """ def __init__(self, diff --git a/bob/fusion/base/algorithm/Weighted_Sum.py b/bob/fusion/base/algorithm/Weighted_Sum.py index 68fc899e59a0b9a5b32b7c4acbadac8926c8dcc9..0a70e0e4daf422641d302ff71c6f2f6362fa6d62 100644 --- a/bob/fusion/base/algorithm/Weighted_Sum.py +++ b/bob/fusion/base/algorithm/Weighted_Sum.py @@ -12,7 +12,7 @@ logger = bob.core.log.setup("bob.fusion.base") class Weighted_Sum(Algorithm): - """docstring for Weighted_Sum weighted sum (default: mean)""" + """weighted sum (default: mean)""" def __init__(self, weights=None, *args, **kwargs): super(Weighted_Sum, self).__init__( diff --git a/bob/fusion/base/script/plot_fusion_decision_boundary.py b/bob/fusion/base/script/plot_fusion_decision_boundary.py index 11e5c1fc1ecf493a91cd9a39cb24a0494de3fab0..f5817fcea1b57dd53999b0d803221067ef739482 100644 --- a/bob/fusion/base/script/plot_fusion_decision_boundary.py +++ b/bob/fusion/base/script/plot_fusion_decision_boundary.py @@ -36,10 +36,84 @@ import bob.core from bob.measure.load import load_score, get_negatives_positives,\ get_all_scores from bob.measure import eer_threshold +from ..tools import grouping logger = bob.core.log.setup("bob.fusion.base") +def plot_boundary_decision(algorithm, scores, score_labels, threshold, + thres_system1=None, + thres_system2=None, + do_grouping=False, + resolution=100, + x_pad=0.5, + y_pad=0.5, + alpha=0.75, + legends=None, + i1=0, + i2=1, + **kwargs + ): + ''' + Plots the boundary decision of the Algorithm + + @param score_labels numpy.array A (scores.shape[0]) array containing + the true labels of scores. + + @param threshold float threshold of the decision boundary + ''' + if legends is None: + legends = ['Impostor', 'Genuine'] + markers = ['x', 'o'] + + if scores.shape[1] > 2: + raise NotImplementedError( + "Currently plotting the decision boundary for more than two systems " + "is not supported.") + + import matplotlib.pyplot as plt + plt.gca() # this is necessary for subplots to work. + + X = scores[:, [i1, i2]] + Y = score_labels + x_min, x_max = X[:, i1].min() - x_pad, X[:, i1].max() + x_pad + y_min, y_max = X[:, i2].min() - y_pad, X[:, i2].max() + y_pad + xx, yy = numpy.meshgrid( + numpy.linspace(x_min, x_max, resolution), + numpy.linspace(y_min, y_max, resolution)) + temp = numpy.c_[xx.ravel(), yy.ravel()] + temp = algorithm.preprocess(temp) + Z = (algorithm.fuse(temp) > threshold).reshape(xx.shape) + + contourf = plt.contour(xx, yy, Z, 1, alpha=1, cmap=plt.cm.viridis) + + if do_grouping: + negatives, positives = X[numpy.logical_not(Y)], X[Y] + negatives, positives = grouping(negatives, positives, **kwargs) + X = numpy.concatenate((negatives, positives), axis=0) + Y = numpy.concatenate( + (numpy.zeros(negatives.shape[0], dtype=numpy.bool8), + numpy.ones(positives.shape[0], dtype=numpy.bool8)), + axis=0) + + negatives, positives = X[numpy.logical_not(Y)], X[Y] + colors = plt.cm.viridis(numpy.linspace(0, 1, 2)) + for i, X in enumerate((negatives, positives)): + plt.scatter( + X[:, 0], X[:, 1], marker=markers[i], alpha=alpha, + c=colors[i], label=legends[i]) + plt.legend() + + if thres_system1 is not None: + plt.axvline(thres_system1, color='red') + plt.axhline(thres_system2, color='red') + + plt.xlim([x_min, x_max]) + plt.ylim([y_min, y_max]) + + return contourf + + def main(command_line_parameters=None): args = docopt(__doc__, argv=command_line_parameters, version=bob.fusion.base.get_config()) @@ -60,8 +134,8 @@ def main(command_line_parameters=None): score_labels = score_lines['claimed_id'] == score_lines['real_id'] # plot the decision boundary - algorithm.plot_boundary_decision( - scores, score_labels, threshold, + plot_boundary_decision( + algorithm, scores, score_labels, threshold, do_grouping=True, npoints=int(args['--group']), seed=0, diff --git a/doc/conf.py b/doc/conf.py index 22958db4da9789a3eab05df5c2ec4a13255083fc..2ac5668799098965a8768b5ce262662432badb01 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -58,12 +58,12 @@ source_suffix = '.rst' master_doc = 'index' # General information about the project. -project = u'Bobs interface for running biometric recognition experiments' +project = u'Bobs interface for running score fusion in biometric recognition experiments' import time copyright = u'%s, Idiap Research Institute' % time.strftime('%Y') # Grab the setup entry -distribution = pkg_resources.require('bob.bio.base')[0] +distribution = pkg_resources.require('bob.fusion.base')[0] # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the diff --git a/doc/experiments.rst b/doc/experiments.rst deleted file mode 100644 index 77dbf262dc46867285daf38f1dcb9d79b1e36616..0000000000000000000000000000000000000000 --- a/doc/experiments.rst +++ /dev/null @@ -1,235 +0,0 @@ -.. vim: set fileencoding=utf-8 : -.. author: Manuel Günther <manuel.guenther@idiap.ch> -.. date: Thu Sep 20 11:58:57 CEST 2012 - -.. _bob.bio.base.experiments: - - -========================================= -Running Biometric Recognition Experiments -========================================= - -Now, you are almost ready to run your first biometric recognition experiment. -Just a little bit of theory, and then: off we go. - - -Structure of a Biometric Recognition Experiment ------------------------------------------------ - -Each biometric recognition experiment that is run with ``bob.bio`` is divided into several steps. -The steps are: - -1. Data preprocessing: Raw data is preprocessed, e.g., for face recognition, faces are detected, images are aligned and photometrically enhanced. -2. Feature extractor training: Feature extraction parameters are learned. -3. Feature extraction: Features are extracted from the preprocessed data. -4. Feature projector training: Parameters of a subspace-projection of the features are learned. -5. Feature projection: The extracted features are projected into a subspace. -6. Model enroller training: The ways how to enroll models from extracted or projected features is learned. -7. Model enrollment: One model is enrolled from the features of one or more images. -8. Scoring: The verification scores between various models and probe features are computed. -9. Evaluation: The computed scores are evaluated and curves are plotted. - -These 9 steps are divided into four distinct groups, which are discussed in more detail later: - -* Preprocessing (only step 1) -* Feature extraction (steps 2 and 3) -* Biometric recognition (steps 4 to 8) -* Evaluation (step 9) - -The communication between two steps is file-based, usually using a binary HDF5_ interface, which is implemented in the :py:class:`bob.io.base.HDF5File` class. -The output of one step usually serves as the input of the subsequent step(s). -Depending on the algorithm, some of the steps are not applicable/available. -E.g. most of the feature extractors do not need a special training step, or some algorithms do not require a subspace projection. -In these cases, the according steps are skipped. -``bob.bio`` takes care that always the correct files are forwarded to the subsequent steps. - - -.. _running_part_1: - -Running Experiments (part I) ----------------------------- - -To run an experiment, we provide a generic script ``./bin/verify.py``, which is highly parametrizable. -To get a complete list of command line options, please run: - -.. code-block:: sh - - $ ./bin/verify.py --help - -Whoops, that's a lot of options. -But, no worries, most of them have proper default values. - -.. note:: - Sometimes, command line options have a long version starting with ``--`` and a short one starting with a single ``-``. - In this section, only the long names of the arguments are listed, please refer to ``./bin/verify.py --help`` (or short: ``./bin/faceverify.py -h``) for the abbreviations. - -There are five command line options, which are required and sufficient to define the complete biometric recognition experiment. -These five options are: - -* ``--database``: The database to run the experiments on -* ``--preprocessor``: The data preprocessor -* ``--extractor``: The feature extractor -* ``--algorithm``: The recognition algorithm -* ``--sub-directory``: A descriptive name for your experiment, which will serve as a sub-directory - -The first four parameters, i.e., the ``database``, the ``preprocessor``, the ``extractor`` and the ``algorithm`` can be specified in several different ways. -For the start, we will use only the registered :ref:`Resources <bob.bio.base.resources>`. -These resources define the source code that will be used to compute the experiments, as well as all the meta-parameters of the algorithms (which we will call the *configuration*). -To get a list of registered resources, please call: - -.. code-block:: sh - - $ ./bin/resources.py - -Each package in ``bob.bio`` defines its own resources, and the printed list of registered resources differs according to the installed packages. -If only ``bob.bio.base`` is installed, no databases and no preprocessors will be listed. - -.. note:: - You will also find some ``grid`` resources being listed. - These type of resources will be explained :ref:`later <running_in_parallel>`. - -Before going into :ref:`more details about the configurations <running_part_2>`, we will provide information about running default experiments. - -One command line option, which is not required, but recommended, is the ``--verbose`` option. -By default, the algorithms are set up to execute quietly, and only errors are reported. -To change this behavior, you can use the ``--verbose`` option several times to increase the verbosity level to show: - -1) Warning messages -2) Informative messages -3) Debug messages - -When running experiments, my personal preference is verbose level 2, which can be enabled by ``--verbose --verbose``, or using the short version: ``-vv``. -So, a typical biometric recognition experiment (in this case, face recognition) could look something like: - -.. code-block:: sh - - $ ./bin/verify.py --database mobio-image --preprocessor face-crop-eyes --extractor linearize --algorithm pca --sub-directory pca-experiment -vv - -.. note:: - To be able to run exactly the command line from above, it requires to have :ref:`bob.bio.face <bob.bio.face>` installed. - -Before running an experiment, it is recommended to add the ``--dry-run`` option, so that it will only print, which steps would be executed, without actually executing them, and make sure that everything works as expected. - -The final result of the experiment will be one (or more) score file(s). -Usually, they will be called something like ``scores-dev``. -By default, you can find them in a sub-directory the ``result`` directory, but you can change this option using the ``--result-directory`` command line option. - -.. note:: - At Idiap_, the default result directory differs, see ``./bin/verify.py --help`` for your directory. - - -.. _bob.bio.base.evaluate: - -Evaluating Experiments ----------------------- - -After the experiment has finished successfully, one or more text file containing all the scores are written. - -To evaluate the experiment, you can use the generic ``./bin/evaluate.py`` script, which has properties for all prevalent evaluation types, such as CMC, ROC and DET plots, as well as computing recognition rates, EER/HTER, Cllr and minDCF. -Additionally, a combination of different algorithms can be plotted into the same files. -Just specify all the score files that you want to evaluate using the ``--dev-files`` option, and possible legends for the plots (in the same order) using the ``--legends`` option, and the according plots will be generated. -For example, to create a ROC curve for the experiment above, use: - -.. code-block:: sh - - $ ./bin/evaluate.py --dev-files results/pca-experiment/male/nonorm/scores-dev --legend MOBIO --roc MOBIO_MALE_ROC.pdf -vv - -Please note that there exists another file called ``Experiment.info`` inside the result directory. -This file is a pure text file and contains the complete configuration of the experiment. -With this configuration it is possible to inspect all default parameters of the algorithms, and even to re-run the exact same experiment. - - -.. _running_in_parallel: - -Running in Parallel -------------------- - -One important property of the ``./bin/verify.py`` script is that it can run in parallel, using either several threads on the local machine, or an SGE grid. -To achieve that, ``bob.bio`` is well-integrated with our SGE grid toolkit GridTK_, which we have selected as a python package in the :ref:`Installation <bob.bio.base.installation>` section. -The ``./bin/verify.py`` script can submit jobs either to the SGE grid, or to a local scheduler, keeping track of dependencies between the jobs. - -The GridTK_ keeps a list of jobs in a local database, which by default is called ``submitted.sql3``, but which can be overwritten with the ``--gridtk-database-file`` option. -Please refer to the `GridTK documentation <http://pythonhosted.org/gridtk>`_ for more details on how to use the Job Manager ``./bin/jman``. - -Two different types of ``grid`` resources are defined, which can be used with the ``--grid`` command line option. -The first type of resources will submit jobs to an SGE grid. -They are mainly designed to run in the Idiap_ SGE grid and might need some adaptations to run on your grid. -The second type of resources will submit jobs to a local queue, which needs to be run by hand (e.g., using ``./bin/jman --local run-scheduler --parallel 4``), or by using the command line option ``--run-local-scheduler``. -The difference between the two types of resources is that the local submission usually starts with ``local-``, while the SGE resource does not. - -Hence, to run the same experiment as above using four parallel threads on the local machine, re-nicing the jobs to level 10, simply call: - -.. code-block:: sh - - $ ./bin/verify.py --database mobio-image --preprocessor face-crop-eyes --extractor linearize --algorithm pca --sub-directory pca-experiment -vv --grid local-p4 --run-local-scheduler --nice 10 - -.. note:: - You might realize that the second execution of the same experiment is much faster than the first one. - This is due to the fact that those parts of the experiment, which have been successfully executed before (i.e., the according files already exist), are skipped. - To override this behavior, i.e., to always regenerate all parts of the experiments, you can use the ``--force`` option. - - -Command Line Options to change Default Behavior ------------------------------------------------ -Additionally to the required command line arguments discussed above, there are several options to modify the behavior of the experiments. -One set of command line options change the directory structure of the output. -By default, intermediate (temporary) files are by default written to the ``temp`` directory, which can be overridden by the ``--temp-directory`` command line option, which expects relative or absolute paths: - -Re-using Parts of Experiments -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -If you want to re-use parts previous experiments, you can specify the directories (which are relative to the ``--temp-directory``, but you can also specify absolute paths): - -* ``--preprocessed-data-directory`` -* ``--extracted-directory`` -* ``--projected-directory`` -* ``--models-directories`` (one for each the models and the ZT-norm-models, see below) - -or even trained extractor, projector, or enroller (i.e., the results of the extractor, projector, or enroller training): - -* ``--extractor-file`` -* ``--projector-file`` -* ``--enroller-file`` - -For that purpose, it is also useful to skip parts of the tool chain. -To do that you can use: - -* ``--skip-preprocessing`` -* ``--skip-extractor-training`` -* ``--skip-extraction`` -* ``--skip-projector-training`` -* ``--skip-projection`` -* ``--skip-enroller-training`` -* ``--skip-enrollment`` -* ``--skip-score-computation`` -* ``--skip-concatenation`` -* ``--skip-calibration`` - -although by default files that already exist are not re-created. -You can use the ``--force`` argument combined with the ``--skip...`` arguments (in which case the skip is preferred). -To run just a sub-selection of the tool chain, you can also use the ``--execute-only`` option, which takes a list of options out of: ``preprocessing``, ``extractor-training``, ``extraction``, ``projector-training``, ``projection``, ``enroller-training``, ``enrollment``, ``score-computation``, ``concatenation`` or ``calibration``. - - -Database-dependent Arguments -~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Many databases define several protocols that can be executed. -To change the protocol, you can either modify the configuration file, or simply use the ``--protocol`` option. - -Some databases define several kinds of evaluation setups. -For example, often two groups of data are defined, a so-called *development set* and an *evaluation set*. -The scores of the two groups will be concatenated into two files called **scores-dev** and **scores-eval**, which are located in the score directory (see above). -In this case, by default only the development set is employed. -To use both groups, just specify ``--groups dev eval`` (of course, you can also only use the ``'eval'`` set by calling ``--groups eval``). - -One score normalization technique is the so-called ZT score normalization. -To enable this, simply use the ``--zt-norm`` option. -If the ZT-norm is enabled, two sets of scores will be computed, and they will be placed in two different sub-directories of the score directory, which are by default called **nonorm** and **ztnorm**, but which can be changed using the ``--zt-score-directories`` option. - - -Other Arguments ---------------- - -For some applications it is interesting to get calibrated scores. -Simply add the ``--calibrate-scores`` option and another set of score files will be created by training the score calibration on the scores of the ``'dev'`` group and execute it to all available groups. -The scores will be located at the same directory as the **nonorm** and **ztnorm** scores, and the file names are **calibrated-dev** (and **calibrated-eval** if applicable) . - -.. include:: links.rst diff --git a/doc/implementation.rst b/doc/implementation.rst deleted file mode 100644 index 356aba45ba24db91729de887f716a6d1e2bc2bdd..0000000000000000000000000000000000000000 --- a/doc/implementation.rst +++ /dev/null @@ -1,330 +0,0 @@ -.. vim: set fileencoding=utf-8 : -.. Manuel Guenther <Manuel.Guenther@idiap.ch> -.. Mon 23 04 2012 - -====================== -Implementation Details -====================== - -The ``bob.bio`` module is specifically designed to be as flexible as possible while trying to keep things simple. -Therefore, it uses python to implement tools such as preprocessors, feature extractors and recognition algorithms. -It is file based so any tool can implement its own way of reading and writing data, features or models. -Configurations are stored in configuration files, so it should be easy to test different parameters of your algorithms without modifying the code. - - -Base Classes ------------- - -All tools implemented in the ``bob.bio`` packages are based on some classes, which are defined in the ``bob.bio.base`` package, and which are detailed below. -Most of the functionality is provided in the base classes, but any function can be overridden in the derived class implementations. - -In the derived class constructors, the base class constructor needs to be called. -For automatically tracing the algorithms, all parameters that are passed to the derived class constructor should be passed to the base class constructor as a list of keyword arguments (which is indicated by ``...`` below). -This will assure that all parameters of the experiments are stored into the ``Experiment.info`` file. - -.. note:: - All tools are based on reading, processing and writing files. - By default, any type of file is allowed to be handled, and file names are provided to the ``read_...`` and ``write_...`` functions as strings. - However, some of the extensions -- particularly the :ref:`bob.bio.video <bob.bio.video>` extension -- requires the read and write functions to handle files of type :py:class:`bob.io.base.HDF5File`. - -If you plan to write your own tools, please assure that you are following the following structure. - - -.. _bob.bio.base.preprocessors: - -Preprocessors -~~~~~~~~~~~~~ - -All preprocessor classes are derived from :py:class:`bob.bio.base.preprocessor.Preprocessor`. -All of them implement the following two functions: - -* ``__init__(self, <parameters>)``: Initializes the preprocessing algorithm with the parameters it needs. - The base class constructor is called in the derived class constructor, e.g. as ``bob.bio.base.preprocessor.Preprocessor.__init__(self, ...)``. -* ``__call__(self, original_data, annotations) -> data``: preprocesses the data given the dictionary of annotations (e.g. ``{'reye' : [re_y, re_x], 'leye': [le_y, le_x]}`` for face images). - - .. note:: - When the database does not provide annotations, the ``annotations`` parameter might be ``None``. - -By default, the data returned by the preprocessor is of type :py:class:`numpy.ndarray`. -In that case, the base class IO functionality can be used. -If a class returns data that is **not** of type :py:class:`numpy.ndarray`, it overwrites further functions from :py:class:`bob.bio.base.preprocessor.Preprocessor` that define the IO of your class: - -* ``write_data(data, data_file)``: Writes the given data (that has been generated using the ``__call__`` function of this class) to file. -* ``read_data(data_file)``: Reads the preprocessed data from file. - -By default, the original data is read by :py:func:`bob.io.base.load`. -Hence, data is given as :py:class:`numpy.ndarray`\s. -When a different IO for the original data is required (for example to read videos in :py:class:`bob.bio.video.preprocessor.Video`), the following function is overridden: - -* ``read_original_data(filename)``: Reads the original data from file. - - -.. _bob.bio.base.extractors: - -Extractors -~~~~~~~~~~ - -Feature extractors should be derived from the :py:class:`bob.bio.base.extractor.Extractor` class. -All extractor classes provide at least the functions: - -* ``__init__(self, <parameters>)``: Initializes the feature extraction algorithm with the parameters it needs. - Calls the base class constructor, e.g. as ``bob.bio.base.extractor.Extractor.__init__(self, ...)`` (there are more parameters to this constructor, see below). -* ``__call__(self, data) -> feature``: Extracts the feature from the given preprocessed data. - By default, the returned feature should be a :py:class:`numpy.ndarray`. - -If features are not of type :py:class:`numpy.ndarray`, the ``write_feature`` function is overridden. -In this case, also the function to read that kind of features needs to be overridden: - -* ``write_feature(self, feature, feature_file)``: Writes the feature (as returned by the ``__call__`` function) to the given file name. -* ``read_feature(self, feature_file) -> feature``: Reads the feature (as written by the ``save_feature`` function) from the given file name. - -.. note:: - If the feature is of a class that contains and is written via a ``save(bob.io.base.HDF5File)`` method, the ``write_feature`` function does not need to be overridden. - However, the ``read_feature`` function is required in this case. - -If the feature extraction process requires to read a trained extractor model from file, the following function is overloaded: - -* ``load(self, extractor_file)``: Loads the extractor from file. - This function is called at least once before the ``__call__`` function is executed. - -It is also possible to train the extractor model before it is used. -In this case, two things are done. -First, the ``train`` function is overridden: - -* ``train(self, image_list, extractor_file)``: Trains the feature extractor with the given list of images and writes the ``extractor_file``. - -Second, this behavior is registered in the ``__init__`` function by calling the base class constructor with more parameters: ``bob.bio.base.extractor.Extractor.__init__(self, requires_training=True, ...)``. -Given that the training algorithm needs to have the training data split by identity, the ``bob.bio.base.extractor.Extractor.__init__(self, requires_training=True, split_training_images_by_client = True, ...)`` is used instead. - - -.. _bob.bio.base.algorithms: - -Algorithms -~~~~~~~~~~ -The implementation of recognition algorithm is as straightforward. -All algorithms are derived from the :py:class:`bob.bio.base.algorithm.Algorithm` class. -The constructor of this class has the following options, which are selected according to the current algorithm: - -* ``performs_projection``: If set to ``True``, features will be projected using the ``project`` function. - With the default ``False``, the ``project`` function will not be called at all. -* ``requires_projector_training``: If ``performs_projection`` is enabled, this flag specifies if the projector needs training. - If ``True`` (the default), the ``train_projector`` function will be called. -* ``split_training_features_by_client``: If the projector training needs training images split up by client identity, this flag is enabled. - In this case, the ``train_projector`` function will receive a list of lists of features. - If set to ``False`` (the default), the training features are given in one list. -* ``use_projected_features_for_enrollment``: If features are projected, by default (``True``) models are enrolled using the projected features. - If the algorithm requires the original unprojected features to enroll the model, ``use_projected_features_for_enrollment=False`` is selected. -* ``requires_enroller_training``: Enables the enroller training. - By default (``False``), no enroller training is performed, i.e., the ``train_enroller`` function is not called. - -* ``multiple_model_scoring``: The way to handle scoring when models store several features. - Set this parameter to ``None`` when you implement your own functionality to handle models from several features (see below). -* ``multiple_probe_scoring``: The way to handle scoring when models store several features. - Set this parameter to ``None`` when you handle scoring with multiple probes with your own ``score_for_multiple_probes`` function (see below). - -A recognition algorithm has to override at least three functions: - -* ``__init__(self, <parameters>)``: Initializes the face recognition algorithm with the parameters it needs. - Calls the base class constructor, e.g. as ``bob.bio.base.algorithm.Algorithm.__init__(self, ...)`` (there are more parameters to this constructor, see above). -* ``enroll(self, enroll_features) -> model``: Enrolls a model from the given vector of features (this list usually contains features from several files of one subject) and returns it. - The returned model is either a :py:class:`numpy.ndarray` or an instance of a class that defines a ``save(bob.io.base.HDF5File)`` method. - If neither of the two options are appropriate, a ``write_model`` function is defined (see below). -* ``score(self, model, probe) -> value``: Computes a similarity or probability score that the given probe feature and the given model stem from the same identity. - - .. note:: - When you use a distance measure in your scoring function, and lower distances represents higher probabilities of having the same identity, please return the negative distance. - -Additionally, an algorithm may need to project the features before they can be used for enrollment or recognition. -In this case, (some of) the function(s) are overridden: - -* ``train_projector(self, train_features, projector_file)``: Uses the given list of features and writes the ``projector_file``. - - .. warning:: - If you write this function, please assure that you use both ``performs_projection=True`` and ``requires_projector_training=True`` (for the latter, this is the default, but not for the former) during the base class constructor call in your ``__init__`` function. - If you need the training data to be sorted by clients, please use ``split_training_features_by_client=True`` as well. - Please also assure that you overload the ``project`` function. - -* ``load_projector(self, projector_file)``: Loads the projector from the given file, i.e., as stored by ``train_projector``. - This function is always called before the ``project``, ``enroll``, and ``score`` functions are executed. -* ``project(self, feature) -> feature``: Projects the given feature and returns the projected feature, which should either be a :py:class:`numpy.ndarray` or an instance of a class that defines a ``save(bob.io.base.HDF5File)`` method. - - .. note:: - If you write this function, please assure that you use ``performs_projection=True`` during the base class constructor call in your ``__init__`` function. - -And once more, if the projected feature is not of type ``numpy.ndarray``, the following methods are overridden: - -* ``write_feature(feature, feature_file)``: Writes the feature (as returned by the ``project`` function) to file. -* ``read_feature(feature_file) -> feature``: Reads and returns the feature (as written by the ``write_feature`` function). - -Some tools also require to train the model enrollment functionality (or shortly the ``enroller``). -In this case, these functions are overridden: - -* ``train_enroller(self, training_features, enroller_file)``: Trains the model enrollment with the list of lists of features and writes the ``enroller_file``. - - .. note:: - If you write this function, please assure that you use ``requires_enroller_training=True`` during the base class constructor call in your ``__init__`` function. - -* ``load_enroller(self, enroller_file)``: Loads the enroller from file. - This function is always called before the ``enroll`` and ``score`` functions are executed. - - -By default, it is assumed that both the models and the probe features are of type :py:class:`numpy.ndarray`. -If the ``score`` function expects models and probe features to be of a different type, these functions are overridden: - -* ``write_model(self, model, model_file)``: writes the model (as returned by the ``enroll`` function). -* ``read_model(self, model_file) -> model``: reads the model (as written by the ``write_model`` function) from file. -* ``read_probe(self, probe_file) -> feature``: reads the probe feature from file. - - .. note:: - In many cases, the ``read_feature`` and ``read_probe`` functions are identical (if both are present). - -Finally, the :py:class:`bob.bio.base.algorithm.Algorithm` class provides default implementations for the case that models store several features, or that several probe features should be combined into one score. -These two functions are: - -* ``score_for_multiple_models(self, models, probe)``: In case your model store several features, **call** this function to compute the average (or min, max, ...) of the scores. -* ``score_for_multiple_probes(self, model, probes)``: By default, the average (or min, max, ...) of the scores for all probes are computed. **Override** this function in case you want different behavior. - - -Implemented Tools ------------------ - -In this base class, only one feature extractor and some recognition algorithms are defined. -However, implementations of the base classes can be found in all of the ``bob.bio`` packages. -Here is a list of implementations: - -* :ref:`bob.bio.base <bob.bio.base>` : :ref:`bob.bio.base.implemented` -* :ref:`bob.bio.face <bob.bio.face>` : :ref:`bob.bio.face.implemented` -* :ref:`bob.bio.video <bob.bio.video>` : :ref:`bob.bio.video.implemented` -* :ref:`bob.bio.gmm <bob.bio.gmm>` : :ref:`bob.bio.gmm.implemented` -* :ref:`bob.bio.csu <bob.bio.csu>` : :ref:`bob.bio.csu.implemented` - -.. * :ref:`bob.bio.spear <bob.bio.spear>` : :ref:`bob.bio.spear.implemented` - - -.. todo:: complete this list, once the other packages are documented as well. - - -Databases ---------- - -Databases provide information about the data sets, on which the recognition algorithm should run on. -Particularly, databases come with one or more evaluation protocols, which defines, which part of the data should be used for training, enrollment and probing. -Some protocols split up the data into three different groups: a training set (aka. ``world`` group), a development set (aka. ``dev`` group) and an evaluation set (``eval``, sometimes also referred as test set). -Furthermore, some of the databases split off some data from the training set, which is used to perform a ZT score normalization. -Finally, most of the databases come with specific annotation files, which define additional information about the data, e.g., hand-labeled eye locations for face images. - - -Verification Database Interface -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -For most of the data sets, we rely on the database interfaces from Bob_. -Particularly, all databases that are derived from the :py:class:`bob.db.verification.utils.Database` (click :ref:`here <verification_databases>` for a list of implemented databases) are supported by a special derivation of the databases from above. -For these databases, the special :py:class:`bob.bio.base.database.DatabaseBob` interface is provided, which takes the Bob_ database as parameter. -Several such databases are defined in the according packages, i.e., :ref:`bob.bio.spear <bob.bio.spear>`, :ref:`bob.bio.face <bob.bio.face>` and :ref:`bob.bio.video <bob.bio.video>`. -For Bob_'s ZT-norm databases, we provide the :py:class:`bob.bio.base.database.DatabaseBobZT` interface. - -Additionally, a generic database interface, which is derived from :py:class:`bob.bio.base.database.DatabaseBobZT`, is the :py:class:`bob.bio.base.database.DatabaseFileList`. -This database interfaces with the :py:class:`bob.db.verification.filelist.Database`, which is a generic database based on file lists, implementing the :py:class:`bob.db.verification.utils.Database` interface. - -Defining your own Database -~~~~~~~~~~~~~~~~~~~~~~~~~~ - -If you have your own database that you want to execute the recognition experiments on, you should first check if you could use the :ref:`Verifcation FileList Database <bob.db.verification.filelist>` interface by defining appropriate file lists for the training set, the model set, and the probes. -In most of the cases, the :py:class:`bob.db.verification.filelist.Database` should be sufficient to run experiments. -Please refer to the documentation :ref:`Documentation <bob.db.verification.filelist>` of this database for more instructions on how to configure this database. - -In case you want to have a more complicated interface to your database, you are welcome to write your own database wrapper class. -In this case, you have to derive your class from the :py:class:`facereclib.databases.Database`, and provide the following functions: - -* ``__init__(self, <your-parameters>, **kwargs)``: Constructor of your database interface. - Please call the base class constructor, providing all the required parameters, e.g. by ``bob.bio.base.database.Database.__init__(self, **kwargs)``. -* ``all_files(self)``: Returns a list of all :py:class:`bob.bio.base.database.File` objects of the database. - The list needs to be sorted by the file id (you can use the ``self.sort(files)`` function for sorting). -* ``training_files(self, step, arrange_by_client = False)``: A sorted list of the :py:class:`bob.bio.base.database.File` objects that is used for training. - If ``arrange_by_clients`` is enabled, you might want to use the :py:meth:`bob.bio.base.database.Database.arrange_by_client` function to perform the job. -* ``model_ids(self, group = 'dev'): The ids for the models (usually, there is only one model per client and, thus, you can simply use the client ids) for the given group. - Usually, providing ids for the group ``'dev'`` should be sufficient. -* ``client_id_from_model_id(self, model_id)``: Returns the client id for the given model id. -* ``enroll_files(self, model_id, group='dev')``: Returns the list of model :py:class:`bob.bio.base.database.File` objects for the given model id. -* ``probe_files(self, model_id=None, group='dev')``: Returns the list of probe files, the given model_id should be compared with. - Usually, all probe files are compared with all model files. - In this case, you can just ignore the ``model_id``. - If the ``model_id`` is ``None``, this function is supposed to return *all* probe files for all models of the given group. - -Additionally, you can define more lists that can be used for ZT score normalization. -In this case, derive you class from :py:class:`bob.bio.base.database.DatabaseZT` instead, and additionally overwrite the following functions: - -* ``t_model_ids(self, group = 'dev')``: The ids for the T-Norm models for the given group. -* ``t_enroll_files(self, model_id, group='dev')``: Returns the list of model :py:class:`bob.bio.base.database.File` objects for the given T-Norm model id. -* ``z_probe_files(self, group='dev')``: Returns the list of Z-probe :py:class:`bob.bio.base.database.File` objects, with which all the models and T-Norm models are compared. - -.. note: - For a proper biometric recognition protocol, the identities from the models and the T-Norm models, as well as the Z-probes should be different. - -For some protocols, a single probe consists of several features, see :ref:`bob.bio.base.algorithms` about strategies how to incorporate several probe files into one score. -If your database should provide this functionality, please overwrite: - -* ``uses_probe_file_sets(self)``: Return ``True`` if the current protocol of the database provides multiple files for one probe. -* ``probe_file_sets(self, model_id=None, group='dev')``: Returns a list of lists of :py:class:`bob.bio.base.database.FileSet` objects. -* ``z_probe_file_sets(self, model_id=None, group='dev')``: Returns a list of lists of Z-probe :py:class:`bob.bio.base.database.FileSet` objects (only needed if the base class is :py:class:`bob.bio.base.database.DatabaseZT`). - - - -.. _bob.bio.base.configuration-files: - -Configuration Files -------------------- - -One important aspect of the ``bob.bio`` packages is reproducibility. -To be able to reproduce an experiment, it is required that all parameters of all tools are present. - -In ``bob.bio`` this is achieved by providing these parameters in configuration files. -In these files, an *instance* of one of the tools is generated, and assigned to a variable with a specific name. -These variable names are: - -* ``database`` for an instance of a (derivation of a) :py:class:`bob.bio.base.database.Database` -* ``preprocessor`` for an instance of a (derivation of a) :py:class:`bob.bio.base.preprocessor.Preprocessor` -* ``extractor`` for an instance of a (derivation of a) :py:class:`bob.bio.base.extractor.Extractor` -* ``algorithm`` for an instance of a (derivation of a) :py:class:`bob.bio.base.algorithm.Algorithm` -* ``grid`` for an instance of the :py:class:`bob.bio.base.grid.Grid` - -For example, the configuration file for a PCA algorithm, which uses 80% of variance and a cosine distance function, could look somewhat like: - -.. code-block:: py - - import bob.bio.base - import scipy.spatial - - algorithm = bob.bio.base.algorithm.PCA(subspace_dimension = 0.8, distance_function = scipy.spatial.distance.cosine, is_distance_function = True) - -Some default configuration files can be found in the ``bob/bio/*/config`` directories of all ``bob.bio`` packages, but you can create configuration files in any directory you like. -In fact, since all tools have a different keyword, you can define a complete experiment in a single configuration file. - - -.. _bob.bio.base.resources: - -Resources ---------- - -Finally, some of the configuration files, which sit in the ``bob/bio/*/config`` directories, are registered as *resources*. -This means that a resource is nothing else than a short name for a registered instance of one of the tools (database, preprocessor, extractor, algorithm or grid configuration) of ``bob.bio``, which has a pre-defined set of parameters. - -The process of registering a resource is relatively easy. -We use the SetupTools_ mechanism of registering so-called entry points in the ``setup.py`` file of the according ``bob.bio`` package. -Particularly, we use a specific list of entry points, which are: - -* ``bob.bio.database`` to register an instance of a (derivation of a) :py:class:`bob.bio.base.database.Database` -* ``bob.bio.preprocessor`` to register an instance of a (derivation of a) :py:class:`bob.bio.base.preprocessor.Preprocessor` -* ``bob.bio.extractor`` to register an instance of a (derivation of a) :py:class:`bob.bio.base.extractor.Extractor` -* ``bob.bio.algorithm`` to register an instance of a (derivation of a) :py:class:`bob.bio.base.algorithm.Algorithm` -* ``bob.bio.grid`` to register an instance of the :py:class:`bob.bio.base.grid.Grid` - -For each of the tools, several resources are defined, which you can list with the ``./bin/resources.py`` command line. - -When you want to register your own resource, make sure that your configuration file is importable (usually it is sufficient to have an empty ``__init__.py`` file in the same directory as your configuration file). -Then, you can simply add a line inside the according ``entry_points`` section of the ``setup.py`` file (you might need to create that section, just follow the example of the ``setup.py`` file that you can find online in the base directory of our `bob.bio.base GitHub page <http://github.com/bioidiap/bob.bio.base>`__). - -After re-running ``./bin/buildout``, your new resource should be listed in the output of ``./bin/resources.py``. - - -.. include:: links.rst diff --git a/doc/implemented.rst b/doc/implemented.rst deleted file mode 100644 index 6737d58008d04e68f8af7be834b4c355a02bf9b4..0000000000000000000000000000000000000000 --- a/doc/implemented.rst +++ /dev/null @@ -1,78 +0,0 @@ -.. _bob.bio.base.implemented: - -================================= -Tools implemented in bob.bio.base -================================= - -Summary -------- - -Base Classes -~~~~~~~~~~~~ - -.. autosummary:: - bob.bio.base.preprocessor.Preprocessor - bob.bio.base.extractor.Extractor - bob.bio.base.algorithm.Algorithm - bob.bio.base.database.Database - bob.bio.base.database.DatabaseZT - bob.bio.base.grid.Grid - - -Implementations -~~~~~~~~~~~~~~~ - -.. autosummary:: - bob.bio.base.extractor.Linearize - bob.bio.base.algorithm.Distance - bob.bio.base.algorithm.PCA - bob.bio.base.algorithm.LDA - bob.bio.base.algorithm.PLDA - bob.bio.base.algorithm.BIC - bob.bio.base.database.DatabaseBob - bob.bio.base.database.DatabaseBobZT - bob.bio.base.database.DatabaseFileList - - -Preprocessors -------------- - -.. automodule:: bob.bio.base.preprocessor - -Extractors ----------- - -.. automodule:: bob.bio.base.extractor - -Algorithms ----------- - -.. automodule:: bob.bio.base.algorithm - -Databases ---------- - -.. automodule:: bob.bio.base.database - -Grid Configuration ------------------- - -.. automodule:: bob.bio.base.grid - - -.. data:: PREDEFINED_QUEUES - - A dictionary of predefined queue keywords, which are adapted to the Idiap_ SGE. - - - .. adapted from http://stackoverflow.com/a/29789910/3301902 to ge a nice dictionary content view - - .. exec:: - import json - from bob.bio.base.grid import PREDEFINED_QUEUES - json_obj = json.dumps(PREDEFINED_QUEUES, sort_keys=True, indent=2) - json_obj = json_obj.replace("\n", "\n ") - print ('.. code-block:: JavaScript\n\n PREDEFINED_QUEUES = %s\n\n' % json_obj) - - -.. include:: links.rst diff --git a/doc/index.rst b/doc/index.rst index 3b1c0b3d3ba0d747b3fe1d36dac151e4ed9ef52c..2d224716440c0e6e9ab7090810d6b752a2beafbc 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -1,45 +1,15 @@ .. vim: set fileencoding=utf-8 : -.. author: Manuel Günther <manuel.guenther@idiap.ch> -.. date: Thu Sep 20 11:58:57 CEST 2012 +.. author: Amir Mohammadi <amir.mohammadi@idiap.ch> -.. _bob.bio.base: +.. _bob.fusion.base: -=========================================== - Running Biometric Recognition Experiments -=========================================== +=================================================== + Score Fusion in Biometric Recognition Experiments +=================================================== -The ``bob.bio`` packages provide open source tools to run comparable and reproducible biometric recognition experiments. -To design a biometric recognition experiment, one has to choose: +The ``bob.fusion.base`` package provides open source tools to run comparable and reproducible score fusion in biometric recognition experiments. -* a databases containing the original data, and a protocol that defines how to use the data, -* a data preprocessing algorithm, i.e., face detection for face recognition experiments or voice activity detection for speaker recognition, -* the type of features to extract from the preprocessed data, -* the biometric recognition algorithm to employ, -* the score fusion to combine outputs from different systems, and -* the way to evaluate the results - -For any of these parts, several different types are implemented in the ``bob.bio`` packages, and basically any combination of the five parts can be executed. -For each type, several meta-parameters can be tested. -This results in a nearly infinite amount of possible experiments that can be run using the current setup. -But it is also possible to use your own database, preprocessor, feature extractor, or biometric recognition algorithm and test this against the baseline algorithms implemented in the our packages. - -.. note:: - The ``bob.bio`` packages are derived from the former `FaceRecLib <http://pypi.python.org/pypi/facereclib>`__, which is herewith outdated. - -This package :py:mod:`bob.bio.base` includes the basic definition of a biometric recognition experiment, as well as a generic script, which can execute the full biometric experiment in a single command line. -Changing the employed tolls such as the database, protocol, preprocessor, feature extractor or recognition algorithm is as simple as changing a command line parameter. - -The implementation of (most of) the tools is separated into other packages in the ``bob.bio`` namespace. -All these packages can be easily combined. -Here is a growing list of derived packages: - -* :ref:`bob.bio.spear <bob.bio.spear>` Tools to run speaker recognition experiments, including voice activity detection, Cepstral feature extraction, and speaker databases -* :ref:`bob.bio.face <bob.bio.face>` Tools to run face recognition experiments, such as face detection, facial feature extraction and comparison, and face image databases -* :ref:`bob.bio.video <bob.bio.video>` An extension of face recognition algorithms to run on video data, and the according video databases -* :ref:`bob.bio.gmm <bob.bio.gmm>` Algorithms based on Gaussian Mixture Modeling (GMM) such as Inter-Session Variability modeling (ISV) or Total Variability modeling (TV, aka. I-Vector) -* `bob.bio.csu <http://pypi.python.org/pypi/bob.bio.csu>`__ for wrapper classes of the `CSU Face Recognition Resources <http://www.cs.colostate.edu/facerec>`__ (see `Installation Instructions <http://pythonhosted.org/bob.bio.csu/installation.html>`__ of ``bob.bio.csu``). - -If you are interested, please continue reading: +It is written to be tightly integrated with ``scikit-learn`` however you do not need to use it neccessairly. =========== @@ -49,10 +19,7 @@ Users Guide .. toctree:: :maxdepth: 2 - installation - experiments - implementation - more + fusion ================ Reference Manual @@ -61,7 +28,6 @@ Reference Manual .. toctree:: :maxdepth: 2 - implemented py_api @@ -69,13 +35,6 @@ Reference Manual References ========== -.. [TP91] *M. Turk and A. Pentland*. **Eigenfaces for recognition**. Journal of Cognitive Neuroscience, 3(1):71-86, 1991. -.. [ZKC+98] *W. Zhao, A. Krishnaswamy, R. Chellappa, D. Swets and J. Weng*. **Discriminant analysis of principal components for face recognition**, pages 73-85. Springer Verlag Berlin, 1998. -.. [Pri07] *S. J. D. Prince*. **Probabilistic linear discriminant analysis for inferences about identity**. Proceedings of the International Conference on Computer Vision. 2007. -.. [ESM+13] *L. El Shafey, Chris McCool, Roy Wallace and Sébastien Marcel*. **A scalable formulation of probabilistic linear discriminant analysis: applied to face recognition**. IEEE Transactions on Pattern Analysis and Machine Intelligence, 35(7):1788-1794, 7/2013. -.. [MWP98] *B. Moghaddam, W. Wahid and A. Pentland*. **Beyond eigenfaces: probabilistic matching for face recognition**. IEEE International Conference on Automatic Face and Gesture Recognition, pages 30-35. 1998. -.. [GW09] *M. Günther and R.P. Würtz*. **Face detection and recognition using maximum likelihood classifiers on Gabor graphs**. International Journal of Pattern Recognition and Artificial Intelligence, 23(3):433-461, 2009. - ========= ToDo-List @@ -85,7 +44,7 @@ This documentation is still under development. Here is a list of things that needs to be done: .. todolist:: - + testing ================== Indices and tables diff --git a/doc/installation.rst b/doc/installation.rst deleted file mode 100644 index f3811adc0a20cb8b205573663dc3be59b3f11bb6..0000000000000000000000000000000000000000 --- a/doc/installation.rst +++ /dev/null @@ -1,126 +0,0 @@ -.. vim: set fileencoding=utf-8 : -.. author: Manuel Günther <manuel.guenther@idiap.ch> -.. date: Thu Sep 20 11:58:57 CEST 2012 - -.. _bob.bio.base.installation: - -========================= -Installation Instructions -========================= - -As noted before, this package is part of the ``bob.bio`` packages, which in turn are part of the signal-processing and machine learning toolbox Bob_. -To install `Packages of Bob <https://github.com/idiap/bob/wiki/Packages>`_, please read the `Installation Instructions <https://github.com/idiap/bob/wiki/Installation>`_. -For Bob_ to be able to work properly, some dependent packages are required to be installed. -Please make sure that you have read the `Dependencies <https://github.com/idiap/bob/wiki/Dependencies>`_ for your operating system. - -.. note:: - Currently, running Bob_ under MS Windows in not yet supported. - However, we found that running Bob_ in a virtual Unix environment such as the one provided by VirtualBox_ is a good alternative. - -The most simple and most convenient way to use the ``bob.bio`` tools is to use a ``zc.buildout`` package, as explained in more detail `here <https://github.com/idiap/bob/wiki/Installation#using-zcbuildout-for-production>`__. -There, in the ``eggs`` section of the ``buildout.cfg`` file, simply list the ``bob.bio`` packages that you want, like: - -.. code-block:: python - - eggs = bob.bio.base - bob.bio.face - bob.bio.gmm - bob.bio.video - bob.db.youtube - gridtk - -in order to download and install all packages that are required for your experiments. -In the example above, you might want to run a video face recognition experiments using the :py:class:`bob.bio.face.preprocessor.FaceDetector` and the :py:class:`bob.bio.face.extractor.DCTBlocks` feature extractor defined in :ref:`bob.bio.face <bob.bio.face>`, the :py:class:`bob.bio.gmm.algorithm.IVector` algorithm defined in :ref:`bob.bio.gmm <bob.bio.gmm>` and the video extensions defined in :ref:`bob.bio.video <bob.bio.video>`, using the YouTube faces database interface defined in :ref:`bob.db.youtube <bob.db.youtube>`. -Running the simple command line: - -.. code-block:: sh - - $ python bootstrap-buildout.py - $ ./bin/buildout - -will the download and install all dependent packages locally (relative to your current working directory), and create a ``./bin`` directory containing all the necessary scripts to run the experiments. - - -Databases -~~~~~~~~~ - -With ``bob.bio`` you will run biometric recognition experiments using some default biometric recognition databases. -Though the verification protocols are implemented in ``bob.bio``, the original data are **not included**. -To download the original data of the databases, please refer to the according Web-pages. -For a list of supported databases including their download URLs, please refer to the :ref:`verification_databases`. - -After downloading the original data for the databases, you will need to tell ``bob.bio``, where these databases can be found. -For this purpose, we have decided to implement a special file, where you can set your directories. -By default, this file is located in ``~/.bob_bio_databases.txt``, and it contains several lines, each line looking somewhat like: - -.. code-block:: text - - [YOUR_ATNT_DIRECTORY] = /path/to/your/directory - -.. note:: - If this file does not exist, feel free to create and populate it yourself. - - -Please use ``./bin/databases.py`` for a list of known databases, where you can see the raw ``[YOUR_DATABASE_PATH]`` entries for all databases that you haven't updated, and the corrected paths for those you have. - - -.. note:: - If you have installed only ``bob.bio.base``, there is no database listed -- as all databases are included in other packages, such as :ref:`bob.bio.face <bob.bio.face>` or :ref:`bob.bio.spear <bob.bio.spear>`. - - -Test your Installation -~~~~~~~~~~~~~~~~~~~~~~ - -One of the scripts that were generated during the bootstrap/buildout step is a test script. -To verify your installation, you should run the script running the nose tests for each of the ``bob.bio`` packages: - -.. code-block:: sh - - $ ./bin/nosetests -vs bob.bio.base - $ ./bin/nosetests -vs bob.bio.gmm - ... - -Some of the tests that are run require the images of the `AT&T database`_ database. -If the database is not found on your system, it will automatically download and extract the `AT&T database`_ a temporary directory, **which will not be erased**. - -To avoid the download to happen each time you call the nose tests, please: - -1. Download the `AT&T database`_ database and extract it to the directory of your choice. -2. Set an environment variable ``ATNT_DATABASE_DIRECTORY`` to the directory, where you extracted the database to. - For example, in a ``bash`` you can call: - -.. code-block:: sh - - $ export ATNT_DATABASE_DIRECTORY=/path/to/your/copy/of/atnt - -.. note:: - To set the directory permanently, you can also change the ``atnt_default_directory`` in the file `bob/bio/base/test/utils.py <file:../bob/bio/base/test/utils.py>`_. - In this case, there is no need to set the environment variable any more. - -In case any of the tests fail for unexplainable reasons, please file a bug report through the `GitHub bug reporting system`_. - -.. note:: - Usually, all tests should pass with the latest stable versions of the Bob_ packages. - In other versions, some of the tests may fail. - - -Generate this documentation -~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Generally, the documentation of this package is `available online <http://pythonhosted.org/bob.bio.base>`__, and this should be your preferred resource. -However, to generate this documentation locally, you call: - -.. code-block:: sh - - $ ./bin/sphinx-build doc sphinx - -Afterward, the documentation is available and you can read it, e.g., by using: - -.. code-block:: sh - - $ firefox sphinx/index.html - - -.. _buildout.cfg: file:../buildout.cfg - -.. include:: links.rst diff --git a/doc/more.rst b/doc/more.rst deleted file mode 100644 index c14172433cd6dd788966b8b2cf77197ff13ebb12..0000000000000000000000000000000000000000 --- a/doc/more.rst +++ /dev/null @@ -1,208 +0,0 @@ -.. vim: set fileencoding=utf-8 : -.. author: Manuel Günther <manuel.guenther@idiap.ch> -.. date: Thu Sep 20 11:58:57 CEST 2012 - -============================== -More about Running Experiments -============================== - -Now that we have learned the implementation details, we can have a closer look into how experiments can be parametrized. - -.. _running_part_2: - -Running Experiments (part II) ------------------------------ - -As mentioned before, running biometric recognition experiments can be achieved using the ``./bin/verify.py`` command line. -In section :ref:`running_part_1`, we have used registered resources to run an experiment. -However, the command line options of ``./bin/verify.py`` is more flexible, as you can have three different ways of defining tools: - -1. Choose a resource (see ``./bin/resources.py`` or ``./bin/verify.py --help`` for a list of registered resources): - - .. code-block:: sh - - $ ./bin/verify.py --algorithm pca - - -2. Use a configuration file. Make sure that your configuration file has the correct variable name: - - .. code-block:: sh - - $ ./bin/verify.py --algorithm bob/bio/base/config/algorithm/pca.py - - -3. Instantiate a class on the command line. Usually, quotes ``"..."`` are required, and the ``--imports`` need to be specified: - - .. code-block:: sh - - $ ./bin/verify.py --algorithm "bob.bio.base.algorithm.PCA(subspace_dimension = 30, distance_function = scipy.spatial.distance.euclidean, is_distance_function = True)" --imports bob.bio.base scipy.spatial - -All these three ways can be used for any of the five command line options: ``--database``, ``--preprocessor``, ``--extractor``, ``--algorithm`` and ``--grid``. -You can even mix these three types freely in a single command line. - - -Score Level Fusion of Different Algorithms on the same Database ---------------------------------------------------------------- - -In several of our publications, we have shown that the combination of several biometric recognition algorithms is able to outperform each single algorithm. -This is particularly true, when the algorithms rely on different kind of data, e.g., we have `fused face and speaker recognition system on the MOBIO database <http://publications.idiap.ch/index.php/publications/show/2688>`__. -As long as several algorithms are executed on the same database, we can simply generate a fusion system by using the ``./bin/fuse_scores.py`` script, generating a new score file: - -.. code-block:: sh - - $ ./bin/fuse_scores.py --dev - -This computation is based on the :py:class:`bob.learn.linear.CGLogRegTrainer`, which is trained on the scores of the development set files (``--dev-files``) for the given systems. -Afterwards, the fusion is applied to the ``--dev-files`` and the resulting score file is written to the file specified by ``--fused-dev-file``. -If ``--eval-files`` are specified, the same fusion that is trained on the development set is now applied to the evaluation set as well, and the ``--fused-eval-file`` is written. - -.. note:: - When ``--eval-files`` are specified, they need to be in the same order as the ``dev-files``, otherwise the result is undefined. - -The resulting ``--fused-dev-file`` and ``fused-eval-file`` can then be evaluated normally, e.g., using the ``./bin/evaluate.py`` script. - - -.. _grid-search: - -Finding the Optimal Configuration ---------------------------------- - -Sometimes, configurations of tools (preprocessors, extractors or algorithms) are highly dependent on the database or even the employed protocol. -Additionally, configuration parameters depend on each other. -``bob.bio`` provides a relatively simple set up that allows to test different configurations in the same task, and find out the best set of configurations. -For this, the ``./bin/grid_search.py`` script can be employed. -This script executes a configurable series of experiments, which reuse data as far as possible. -Please check out ``./bin/grid_search.py --help`` for a list of command line options. - -The Configuration File -~~~~~~~~~~~~~~~~~~~~~~ -The most important parameter to the ``./bin/grid_search.py`` is the ``--configuration-file``. -In this configuration file it is specified, which parameters of which part of the algorithms will be tested. -An example for a configuration file can be found in the test scripts: ``bob/bio/base/test/dummy/grid_search.py``. -The configuration file is a common python file, which can contain certain variables: - -1. ``preprocessor =`` -2. ``extractor =`` -3. ``algorithm =`` -4. ``replace =`` -5. ``requirement =`` -6. ``imports =`` - -The variables from 1. to 3. usually contain instantiations for classes of :ref:`bob.bio.base.preprocessors`, :ref:`bob.bio.base.extractors` and :ref:`bob.bio.base.algorithms`, but also registered :ref:`bob.bio.base.resources` can be used. -For any of the parameters of the classes, a *placeholder* can be put. -By default, these place holders start with a # character, followed by a digit or character. -The variables 1. to 3. can also be overridden by the command line options ``--preprocessor``, ``--extractor`` and ``--algorithm`` of the ``./bin/grid_search.py`` script. - -The ``replace`` variable has to be set as a dictionary. -In it, you can define with which values your place holder key should be filled, and in which step of the tool chain execution this should happen. -The steps are ``'preprocess'``, ``'extract'``, ``'project'``, ``'enroll'`` and ``'score'``. -For each of the steps, it can be defined, which placeholder should be replaced by which values. -To be able to differentiate the results later on, each of the replacement values is bound to a directory name. -The final structure looks somewhat like that: - -.. code-block:: python - - replace = { - step1 : { - '#a' : { - 'Dir_a1' : 'Value_a1', - 'Dir_a2' : 'Value_a2' - }, - - '#b' : { - 'Dir_b1' : 'Value_b1', - 'Dir_b2' : 'Value_b2' - } - }, - - step2 : { - '#c' : { - 'Dir_c1' : 'Value_c1', - 'Dir_c2' : 'Value_c2' - } - } - } - - -Of course, more than two values can be selected. -In the above example, the results of the experiments will be placed into a directory structure as ``results/[...]/Dir_a1/Dir_b1/Dir_c1/[...]``. - -.. note:: - Please note that we are using a dictionary structure to define the replacements. - Hence, the order of the directories inside the same step might not be in the same order as written in the configuration file. - For the above example, a directory structure of `results/[...]/Dir_b1/Dir_a1/Dir_c1/[...]`` might be possible as well. - - -Additionally, tuples of place holders can be defined, in which case always the full tuple will be replaced in one shot. -Continuing the above example, it is possible to add: - -.. code-block:: python - - ... - step3 : { - '(#d,#e)' : { - 'Dir_de1' : ('Value_d1', 'Value_e1'), - 'Dir_de2' : ('Value_d2', 'Value_e2') - } - } - -.. warning:: - *All possible combinations* of the configuration parameters are tested, which might result in a *huge number of executed experiments*. - -Some combinations of parameters might not make any sense. -In this case, a set of requirements on the parameters can be set, using the ``requirement`` variable. -In the requirements, any string including any placeholder can be put that can be evaluated using pythons ``eval`` function: - -.. code-block:: python - - requirement = ['#a > #b', '2*#c != #a', ...] - -Finally, when any of the classes or variables need to import a certain python module, it needs to be declared in the ``imports`` variable. -If you, e.g., test, which ``scipy.spatial`` distance function works best for your features, please add the imports (and don't forget the ``bob.bio.base`` and other ``bob.bio`` packages in case you use their tools): - -.. code-block:: python - - imports = ['scipy', 'bob.bio.base', 'bob.bio.face'] - - -Further Command Line Options -~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The ``./bin/grid_search.py`` script has a further set of command line options. - -- The ``--database`` and the ``--protocol`` define, which database and (optionally) which protocol should be used. -- The ``--sub-directory`` is similar to the one in the ``./bin/verify.py``. -- ``--result-directory`` and ``--temp-directory`` specify directories to write results and temporary files into. Defaults are ``./results/grid_search`` and ``./temp/grid_search`` in the current directory. Make sure that the ``--temp-directory`` can store sufficient amount of data. -- The ``--preprocessor``, ``--extractor`` and ``--algorithm`` can be used to override the ``preprocessor``, ``extractor`` and ``algorithm`` fields in the configuration file (in which case the configuration file does not need to contain these variables). -- The ``--grid`` option can select the SGE_ configuration. -- The ``--parallel`` option can run on the local machine using the given number of parallel threads. -- The ``--preprocessed-directory`` can be used to select a directory of previously preprocessed data. This should not be used in combination with testing different preprocessor parameters. -- The ``--gridtk-database-directory`` can be used to select another directory, where the ``submitted.sql3`` files will be stored. -- Sometimes, the gridtk databases grow, and are too large for holding all experiments. Using the ``--gridtk-database-split-level``, databases can be split at the desired level. -- The ``--write-commands`` directory can be selected to write the executed commands into (this is useful in case some experiments fail and need to be rerun). -- The ``--dry-run`` flag should always be used before the final execution to see if the experiment definition works as expected. -- The ``--skip-when-existent`` flag will only execute the experiments that have not yet finished (i.e., where the resulting score files are not produced yet). -- With the ``--executable`` flag, you might select a different script rather that ``bob.bio.base.script.verify`` to run the experiments (such as the ``bob.bio.gmm.script.verify_gmm``). -- Finally, additional options might be sent to the ``./bin/verify.py`` script directly. These options might be put after a ``--`` separation. - - -Evaluation of Results -~~~~~~~~~~~~~~~~~~~~~ - -To evaluate a series of experiments, a special script iterates through all the results and computes EER on the development set and HTER on the evaluation set, for both the ``nonorm`` and the ``ztnorm`` directories. -Simply call: - -.. code-block:: sh - - $ ./bin/collect_results.py -vv --directory [result-base-directory] --sort - -This will iterate through all result files found in ``[result-base-directory]`` and sort the results according to the EER on the development set (the sorting criterion can be modified using the ``--criterion`` and the ``--sort-key`` comamnd line options). -Hence, to find the best results of your grid search experiments (with default directories), simply run: - -.. code-block:: sh - - $ ./bin/collect_results.py -vv --directory results/grid_search --sort --criterion EER --sort-key nonorm-dev - - - - -.. include:: links.rst diff --git a/doc/py_api.rst b/doc/py_api.rst index a43beead5e5c32c6ea92aefbea50a2232b2457df..951e069eec086c5b01f335ebda2652f58e7ce2c8 100644 --- a/doc/py_api.rst +++ b/doc/py_api.rst @@ -1,110 +1,10 @@ =========================== -Python API for bob.bio.base +Python API for bob.fusion.base =========================== -Generic functions ------------------ - -IO-related functions -~~~~~~~~~~~~~~~~~~~~ - -.. autosummary:: - bob.bio.base.load - bob.bio.base.save - bob.bio.base.load_compressed - bob.bio.base.save_compressed - bob.bio.base.open_compressed - bob.bio.base.close_compressed - bob.bio.base.check_file - - -Functions dealing with resources -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -.. autosummary:: - bob.bio.base.load_resource - bob.bio.base.read_config_file - bob.bio.base.resource_keys - bob.bio.base.extensions - bob.bio.base.valid_keywords - - -Miscellaneous functions -~~~~~~~~~~~~~~~~~~~~~~~ - -.. autosummary:: - bob.bio.base.get_config - bob.bio.base.score_fusion_strategy - bob.bio.base.selected_elements - bob.bio.base.selected_indices - - -Tools to run recognition experiments ------------------------------------- - -Command line generation -~~~~~~~~~~~~~~~~~~~~~~~ - -.. autosummary:: - bob.bio.base.tools.command_line_parser - bob.bio.base.tools.initialize - bob.bio.base.tools.command_line - bob.bio.base.tools.write_info - bob.bio.base.tools.FileSelector - -Controlling of elements -~~~~~~~~~~~~~~~~~~~~~~~ - -.. autosummary:: - bob.bio.base.tools.groups - bob.bio.base.tools.indices - -Preprocessing -~~~~~~~~~~~~~ - -.. autosummary:: - bob.bio.base.tools.preprocess - bob.bio.base.tools.read_preprocessed_data - -Feature Extraction -~~~~~~~~~~~~~~~~~~ - -.. autosummary:: - bob.bio.base.tools.train_extractor - bob.bio.base.tools.extract - bob.bio.base.tools.read_features - -Algorithm -~~~~~~~~~ - -.. autosummary:: - bob.bio.base.tools.train_projector - bob.bio.base.tools.project - bob.bio.base.tools.train_enroller - bob.bio.base.tools.enroll - -Scoring -~~~~~~~ - -.. autosummary:: - bob.bio.base.tools.compute_scores - bob.bio.base.tools.concatenate - bob.bio.base.tools.calibrate Details ------- -.. automodule:: bob.bio.base - - .. attribute:: valid_keywords - - Valid keywords, for which resources are defined, are ``('database', 'preprocessor', 'extractor', 'algorithm', 'grid')`` - - -.. automodule:: bob.bio.base.tools - - .. autoclass:: FileSelector - - .. include:: links.rst diff --git a/requirements.txt b/requirements.txt index 09801548506cc5688ade4b3f728ec5806bd16217..eb97d3f8cfdaa2f41157f6b5b65bc5410372653b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,6 +6,6 @@ bob.measure bob.learn.activation bob.learn.mlp bob.bio.base -scikit-learn +scikit-learn # Used for classifiers and pre-processors. matplotlib # for plotting docopt # for plotting script \ No newline at end of file