From a0d21253a42c177e8ff0a024ed86b6ba118e9010 Mon Sep 17 00:00:00 2001
From: Manuel Guenther <manuel.guenther@idiap.ch>
Date: Mon, 22 Jun 2015 18:44:54 +0200
Subject: [PATCH] Added tons of documentation

---
 bob/bio/base/algorithm/__init__.py    |   3 +
 bob/bio/base/extractor/__init__.py    |   9 +-
 bob/bio/base/grid.py                  |   3 +
 bob/bio/base/preprocessor/__init__.py |   3 +
 bob/bio/base/script/resources.py      |   2 +-
 bob/bio/base/tools/__init__.py        |   3 +
 bob/bio/base/tools/command_line.py    |  26 +--
 bob/bio/base/tools/grid.py            |   4 +-
 bob/bio/base/utils/resources.py       |   3 +-
 doc/conf.py                           |   2 +-
 doc/experiments.rst                   | 227 ++++++++++++++++++++
 doc/implementation.rst                | 291 ++++++++++++++++++++++++++
 doc/implemented.rst                   |  35 ++++
 doc/index.rst                         |  91 +++++++-
 doc/installation.rst                  | 126 +++++++++++
 doc/links.rst                         |  22 ++
 doc/py_api.rst                        |  17 ++
 17 files changed, 831 insertions(+), 36 deletions(-)
 create mode 100644 doc/experiments.rst
 create mode 100644 doc/implementation.rst
 create mode 100644 doc/implemented.rst
 create mode 100644 doc/installation.rst
 create mode 100644 doc/links.rst
 create mode 100644 doc/py_api.rst

diff --git a/bob/bio/base/algorithm/__init__.py b/bob/bio/base/algorithm/__init__.py
index 6baf97fd..e1bc1ad9 100644
--- a/bob/bio/base/algorithm/__init__.py
+++ b/bob/bio/base/algorithm/__init__.py
@@ -3,3 +3,6 @@ from .PCA import PCA
 from .LDA import LDA
 from .PLDA import PLDA
 from .BIC import BIC
+
+# gets sphinx autodoc done right - don't remove it
+__all__ = [_ for _ in dir() if not _.startswith('_')]
diff --git a/bob/bio/base/extractor/__init__.py b/bob/bio/base/extractor/__init__.py
index 92e638c9..bb58f93f 100644
--- a/bob/bio/base/extractor/__init__.py
+++ b/bob/bio/base/extractor/__init__.py
@@ -1,8 +1,5 @@
-#!/usr/bin/env python
-# vim: set fileencoding=utf-8 :
-# Laurent El Shafey <Laurent.El-Shafey@idiap.ch>
-
-"""Basic features for biometric recognition"""
-
 from .Extractor import Extractor
 from .Linearize import Linearize
+
+# gets sphinx autodoc done right - don't remove it
+__all__ = [_ for _ in dir() if not _.startswith('_')]
diff --git a/bob/bio/base/grid.py b/bob/bio/base/grid.py
index aba9ca30..424d3925 100644
--- a/bob/bio/base/grid.py
+++ b/bob/bio/base/grid.py
@@ -106,3 +106,6 @@ class Grid:
   def is_local(self):
     """Returns whether this grid setup should use the local submission or the SGE grid."""
     return self.grid_type == 'local'
+
+# gets sphinx autodoc done right - don't remove it
+__all__ = [_ for _ in dir() if not _.startswith('_')]
diff --git a/bob/bio/base/preprocessor/__init__.py b/bob/bio/base/preprocessor/__init__.py
index b2eb5b57..11d00588 100644
--- a/bob/bio/base/preprocessor/__init__.py
+++ b/bob/bio/base/preprocessor/__init__.py
@@ -1 +1,4 @@
 from .Preprocessor import Preprocessor
+
+# gets sphinx autodoc done right - don't remove it
+__all__ = [_ for _ in dir() if not _.startswith('_')]
diff --git a/bob/bio/base/script/resources.py b/bob/bio/base/script/resources.py
index 4ba3887c..8d4ea581 100644
--- a/bob/bio/base/script/resources.py
+++ b/bob/bio/base/script/resources.py
@@ -39,7 +39,7 @@ def resources():
 
 def databases():
   import argparse
-  database_replacement = "/idiap/home/%s/.bob_bio_databases.txt" % os.environ["USER"] if os.path.isdir("/idiap") else "/home/%s/.bob_bio_databases.txt" % os.environ["USER"]
+  database_replacement = "%s/.bob_bio_databases.txt" % os.environ["HOME"]
 
   parser = argparse.ArgumentParser(description="Prints a list of directories for registered databases", formatter_class=argparse.ArgumentDefaultsHelpFormatter)
   parser.add_argument('-D', '--database-directories-file', metavar = 'FILE', default = database_replacement, help = 'The file, where database directories are stored (to avoid changing the database configurations)')
diff --git a/bob/bio/base/tools/__init__.py b/bob/bio/base/tools/__init__.py
index 5147977f..6a1e9b9a 100644
--- a/bob/bio/base/tools/__init__.py
+++ b/bob/bio/base/tools/__init__.py
@@ -5,3 +5,6 @@ from .algorithm import *
 from .scoring import *
 from .command_line import *
 from .grid import *
+
+# gets sphinx autodoc done right - don't remove it
+__all__ = [_ for _ in dir() if not _.startswith('_')]
diff --git a/bob/bio/base/tools/command_line.py b/bob/bio/base/tools/command_line.py
index ebb6e9f7..cfe4ff27 100644
--- a/bob/bio/base/tools/command_line.py
+++ b/bob/bio/base/tools/command_line.py
@@ -5,7 +5,7 @@ import sys
 import bob.core
 logger = bob.core.log.setup("bob.bio.base")
 
-from ..utils import load_resource, resource_keys
+from .. import utils
 from . import FileSelector
 
 """Execute biometric recognition algorithms on a certain biometric database.
@@ -23,13 +23,13 @@ def command_line_parser(description=__doc__, exclude_resources_from=[]):
   ############## options that are required to be specified #######################
   config_group = parser.add_argument_group('\nParameters defining the experiment. Most of these parameters can be a registered resource, a configuration file, or even a string that defines a newly created object')
   config_group.add_argument('-d', '--database', metavar = 'x', nargs = '+', required = True,
-      help = 'Database and the protocol; registered databases are: %s' % resource_keys('database', exclude_resources_from))
+      help = 'Database and the protocol; registered databases are: %s' % utils.resource_keys('database', exclude_resources_from))
   config_group.add_argument('-p', '--preprocessor', metavar = 'x', nargs = '+', required = True,
-      help = 'Data preprocessing; registered preprocessors are: %s' % resource_keys('preprocessor', exclude_resources_from))
+      help = 'Data preprocessing; registered preprocessors are: %s' % utils.resource_keys('preprocessor', exclude_resources_from))
   config_group.add_argument('-e', '--extractor', metavar = 'x', nargs = '+', required = True,
-      help = 'Feature extraction; registered feature extractors are: %s' % resource_keys('extractor', exclude_resources_from))
+      help = 'Feature extraction; registered feature extractors are: %s' % utils.resource_keys('extractor', exclude_resources_from))
   config_group.add_argument('-a', '--algorithm', metavar = 'x', nargs = '+', required = True,
-      help = 'Biometric recognition; registered algorithms are: %s' % resource_keys('algorithm', exclude_resources_from))
+      help = 'Biometric recognition; registered algorithms are: %s' % utils.resource_keys('algorithm', exclude_resources_from))
   config_group.add_argument('-g', '--grid', metavar = 'x', nargs = '+',
       help = 'Configuration for the grid setup; if not specified, the commands are executed sequentially on the local machine.')
   config_group.add_argument('--imports', metavar = 'LIB', nargs = '+', default = ['bob.bio.base'],
@@ -48,7 +48,7 @@ def command_line_parser(description=__doc__, exclude_resources_from=[]):
   is_idiap = os.path.isdir("/idiap")
   temp = "/idiap/temp/%s/database-name/sub-directory" % os.environ["USER"] if is_idiap else "temp"
   results = "/idiap/user/%s/database-name/sub-directory" % os.environ["USER"] if is_idiap else "results"
-  database_replacement = "/idiap/home/%s/.bob_bio_databases.txt" % os.environ["USER"] if is_idiap else "/home/%s/.bob_bio_databases.txt" % os.environ["USER"]
+  database_replacement = "%s/.bob_bio_databases.txt" % os.environ["HOME"]
 
   dir_group = parser.add_argument_group('\nDirectories that can be changed according to your requirements')
   dir_group.add_argument('-T', '--temp-directory', metavar = 'DIR',
@@ -150,12 +150,12 @@ def initialize(parsers, command_line_parameters = None, skips = []):
     args.timer = ('real', 'system', 'user')
 
   # load configuration resources
-  args.database = load_resource(' '.join(args.database), 'database', imports = args.imports)
-  args.preprocessor = load_resource(' '.join(args.preprocessor), 'preprocessor', imports = args.imports)
-  args.extractor = load_resource(' '.join(args.extractor), 'extractor', imports = args.imports)
-  args.algorithm = load_resource(' '.join(args.algorithm), 'algorithm', imports = args.imports)
+  args.database = utils.load_resource(' '.join(args.database), 'database', imports = args.imports)
+  args.preprocessor = utils.load_resource(' '.join(args.preprocessor), 'preprocessor', imports = args.imports)
+  args.extractor = utils.load_resource(' '.join(args.extractor), 'extractor', imports = args.imports)
+  args.algorithm = utils.load_resource(' '.join(args.algorithm), 'algorithm', imports = args.imports)
   if args.grid is not None:
-    args.grid = load_resource(' '.join(args.grid), 'grid', imports = args.imports)
+    args.grid = utils.load_resource(' '.join(args.grid), 'grid', imports = args.imports)
 
   # set base directories
   is_idiap = os.path.isdir("/idiap")
@@ -237,8 +237,8 @@ def write_info(args, command_line_parameters, executable):
     f.write(command_line([executable] + command_line_parameters) + "\n\n")
     f.write("Configuration:\n")
     f.write("Database:\n%s\n\n" % args.database)
-    f.write("Preprocessing:\n%s\n\n" % args.preprocessor)
-    f.write("Feature Extraction:\n%s\n\n" % args.extractor)
+    f.write("Preprocessor:\n%s\n\n" % args.preprocessor)
+    f.write("Extractor:\n%s\n\n" % args.extractor)
     f.write("Algorithm:\n%s\n\n" % args.algorithm)
   except IOError:
     logger.error("Could not write the experimental setup into file '%s'", args.info_file)
diff --git a/bob/bio/base/tools/grid.py b/bob/bio/base/tools/grid.py
index 7c79716b..1cd5cc57 100644
--- a/bob/bio/base/tools/grid.py
+++ b/bob/bio/base/tools/grid.py
@@ -3,7 +3,7 @@ from __future__ import print_function
 import sys
 import os
 import math
-from ..grid import Grid
+from .. import grid
 from .command_line import command_line
 
 import bob.core
@@ -48,7 +48,7 @@ class GridSubmission:
     self.executable = executable
 
     if args.grid is not None:
-      assert isinstance(args.grid, Grid)
+      assert isinstance(args.grid, grid.Grid)
 
       # find, where jman is installed
       jmans = bob.extension.find_executable('jman', prefixes = ['bin'])
diff --git a/bob/bio/base/utils/resources.py b/bob/bio/base/utils/resources.py
index 57af5f21..29156ed1 100644
--- a/bob/bio/base/utils/resources.py
+++ b/bob/bio/base/utils/resources.py
@@ -174,7 +174,7 @@ def list_resources(keyword, strip=['dummy']):
   entry_points = _get_entry_points(keyword, strip)
   last_dist = None
   retval = ""
-  length = max(len(entry_point.name) for entry_point in entry_points)
+  length = max(len(entry_point.name) for entry_point in entry_points) if entry_points else 1
 
   for entry_point in sorted(entry_points):
     if last_dist != str(entry_point.dist):
@@ -198,7 +198,6 @@ def database_directories(strip=['dummy'], replacements = None):
       db = load_resource(entry_point.name, 'database')
       db.replace_directories(replacements)
       dirs[entry_point.name] = [db.original_directory]
-#      import ipdb; ipdb.set_trace()
       if db.annotation_directory is not None:
         dirs[entry_point.name].append(db.annotation_directory)
     except (AttributeError, ValueError):
diff --git a/doc/conf.py b/doc/conf.py
index d102e79c..63ed152c 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -86,7 +86,7 @@ release = distribution.version
 
 # List of patterns, relative to source directory, that match files and
 # directories to ignore when looking for source files.
-#exclude_patterns = ['**/links.rst']
+exclude_patterns = ['links.rst']
 
 # The reST default role (used for this markup: `text`) to use for all documents.
 #default_role = None
diff --git a/doc/experiments.rst b/doc/experiments.rst
new file mode 100644
index 00000000..aa4bf47c
--- /dev/null
+++ b/doc/experiments.rst
@@ -0,0 +1,227 @@
+.. vim: set fileencoding=utf-8 :
+.. author: Manuel GÃ¼nther <manuel.guenther@idiap.ch>
+.. date: Thu Sep 20 11:58:57 CEST 2012
+
+.. _experiments:
+
+
+=========================================
+Running Biometric Recognition Experiments
+=========================================
+
+Now, you are almost ready to run your first biometric recognition experiment.
+Just a little bit of theory, and then: off we go.
+
+
+Structure of a Biometric Recognition Experiment
+-----------------------------------------------
+
+Each biometric recognition experiment that is run with ``bob.bio`` is divided into several steps.
+The steps are:
+
+1. Data preprocessing: Raw data is preprocessed, e.g., for face recognition, faces are detected, images are aligned and photometrically enhanced.
+2. Feature extractor training: Feature extraction parameters are learned.
+3. Feature extraction: Features are extracted from the preprocessed data.
+4. Feature projector training: Parameters of a subspace-projection of the features are learned.
+5. Feature projection: The extracted features are projected into a subspace.
+6. Model enroller training: The ways how to enroll models from extracted or projected features is learned.
+7. Model enrollment: One model is enrolled from the features of one or more images.
+8. Scoring: The verification scores between various models and probe features are computed.
+9. Evaluation: The computed scores are evaluated and curves are plotted.
+
+These 9 steps are divided into four distinct groups, which are discussed in more detail later:
+
+* Preprocessing (only step 1)
+* Feature extraction (steps 2 and 3)
+* Biometric recognition (steps 4 to 8)
+* Evaluation (step 9)
+
+The communication between two steps is file-based, usually using a binary HDF5_ interface, which is implemented in the :py:class:`bob.io.base.HDF5File` class.
+The output of one step usually serves as the input of the subsequent step(s).
+Depending on the algorithm, some of the steps are not applicable/available.
+E.g. most of the feature extractors do not need a special training step, or some algorithms do not require a subspace projection.
+In these cases, the according steps are skipped.
+``bob.bio`` takes care that always the correct files are forwarded to the subsequent steps.
+
+
+Running Experiments
+-------------------
+
+To run an experiment, we provide a generic script ``./bin/verify.py``, which is highly parametrizable.
+To get a complete list of command line options, please run:
+
+.. code-block:: sh
+
+   $ ./bin/verify.py --help
+
+Whoops, that's a lot of options.
+But, no worries, most of them have proper default values.
+
+.. note::
+   Sometimes, command line options have a long version starting with ``--`` and a short one starting with a single ``-``.
+   In this section, only the long names of the arguments are listed, please refer to ``./bin/verify.py --help`` (or short: ``./bin/faceverify.py -h``) for the abbreviations.
+
+There are five command line options, which are required and sufficient to define the complete biometric recognition experiment.
+These five options are:
+
+* ``--database``: The database to run the experiments on
+* ``--preprocessor``: The data preprocessor
+* ``--extractor``: The feature extractor
+* ``--algorithm``: The recognition algorithm
+* ``--sub-directory``: A descriptive name for your experiment, which will serve as a sub-directory
+
+The first four parameters, i.e., the ``database``, the ``preprocessor``, the ``extractor`` and the ``algorithm`` can be specified in several different ways.
+For the start, we will use only the registered :ref:`Resources <resources>`.
+These resources define the source code that will be used to compute the experiments, as well as all the meta-parameters of the algorithms (which we will call the **configuration**).
+To get a list of registered resources, please call:
+
+.. code-block:: sh
+
+   $ ./bin/resources.py
+
+Each package in ``bob.bio`` defines its own resources, and the printed list of registered resources differs according to the installed packages.
+If only ``bob.bio.base`` is installed, no databases and no preprocessors will be listed.
+
+.. note::
+   You will also find some ``grid`` resources being listed.
+   These type of resources will be explained :ref:`later <grid>`.
+
+One command line option, which is not required, but recommended, is the ``--verbose`` option.
+By default, the algorithms are set up to execute quietly, and only errors are reported.
+To change this behavior, you can use the ``--verbose`` option several times to increase the verbosity level to show:
+
+1) Warning messages
+2) Informative messages
+3) Debug messages
+
+When running experiments, my personal preference is verbose level 2, which can be enabled by ``--verbose --verbose``, or using the short version: ``-vv``.
+So, a typical biometric recognition experiment (in this case, face recognition) could look something like:
+
+.. code-block:: sh
+
+   $ ./bin/verify.py --database mobio-image --preprocessor face-crop-eyes --extractor linearize --algorithm pca --sub-directory pca-experiment -vv
+
+.. note::
+   To be able to run exactly the command line from above, it requires to have :ref:`bob.bio.face <bob.bio.face>` installed.
+
+Before running an experiment, it is recommended to add the ``--dry-run`` option, so that it will only print, which steps would be executed, without actually executing them, and make sure that everything works as expected.
+
+The final result of the experiment will be one (or more) score file(s).
+Usually, they will be called something like ``scores-dev``.
+By default, you can find them in a sub-directory the ``result`` directory, but you can change this option using the ``--result-directory`` command line option.
+
+.. note::
+   At Idiap_, the default result directory differs, see ``./bin/verify.py --help`` for your directory.
+
+
+Evaluating Experiments
+----------------------
+
+After the experiment has finished successfully, one or more text file containing all the scores are written.
+
+To evaluate the experiment, you can use the generic ``./bin/evaluate.py`` script, which has properties for all prevalent evaluation types, such as CMC, ROC and DET plots, as well as computing recognition rates, EER/HTER, Cllr and minDCF.
+Additionally, a combination of different algorithms can be plotted into the same files.
+Just specify all the score files that you want to evaluate using the ``--dev-files`` option, and possible legends for the plots (in the same order) using the ``--legends`` option, and the according plots will be generated.
+For example, to create a ROC curve for the experiment above, use:
+
+.. code-block:: sh
+
+   $ ./bin/evaluate.py --dev-files results/pca-experiment/male/nonorm/scores-dev --legend MOBIO --roc MOBIO_MALE_ROC.pdf -vv
+
+Please note that there exists another file called ``Experiment.info`` inside the result directory.
+This file is a pure text file and contains the complete configuration of the experiment.
+With this configuration it is possible to inspect all default parameters of the algorithms, and even to re-run the exact same experiment.
+
+
+Running in Parallel
+-------------------
+
+One important property of the ``./bin/verify.py`` script is that it can run in parallel, using either several threads on the local machine, or an SGE grid.
+To achieve that, ``bob.bio`` is well-integrated with our SGE grid toolkit GridTK_, which we have selected as a python package in the :ref:`Installation <installation>` section.
+The ``./bin/verify.py`` script can submit jobs either to the SGE grid, or to a local scheduler, keeping track of dependencies between the jobs.
+
+The GridTK_ keeps a list of jobs in a local database, which by default is called ``submitted.sql3``, but which can be overwritten with the ``--gridtk-database-file`` option.
+Please refer to the `GridTK documentation <http://pythonhosted.org/gridtk>`_ for more details on how to use the Job Manager ``./bin/jman``.
+
+Two different types of ``grid`` resources are defined, which can be used with the ``--grid`` command line option.
+The first type of resources will submit jobs to an SGE grid.
+They are mainly designed to run in the Idiap_ SGE grid and might need some adaptations to run on your grid.
+The second type of resources will submit jobs to a local queue, which needs to be run by hand (e.g., using ``./bin/jman --local run-scheduler --parallel 4``), or by using the command line option ``--run-local-scheduler``.
+The difference between the two types of resources is that the local submission usually starts with ``local-``, while the SGE resource does not.
+
+Hence, to run the same experiment as above using four parallel threads on the local machine, re-nicing the jobs to level 10, simply call:
+
+.. code-block:: sh
+
+   $ ./bin/verify.py --database mobio-image --preprocessor face-crop-eyes --extractor linearize --algorithm pca --sub-directory pca-experiment -vv --grid local-p4 --run-local-scheduler --nice 10
+
+.. note::
+   You might realize that the second execution of the same experiment is much faster than the first one.
+   This is due to the fact that those parts of the experiment, which have been successfully executed before (i.e., the according files already exist), are skipped.
+   To override this behavior, i.e., to always regenerate all parts of the experiments, you can use the ``--force`` option.
+
+
+Command Line Options to change Default Behavior
+-----------------------------------------------
+Additionally to the required command line arguments discussed above, there are several options to modify the behavior of the experiments.
+One set of command line options change the directory structure of the output.
+By default, intermediate (temporary) files are by default written to the ``temp`` directory, which can be overridden by the ``--temp-directory`` command line option, which expects relative or absolute paths:
+
+Re-using Parts of Experiments
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+If you want to re-use parts previous experiments, you can specify the directories (which are relative to the ``--temp-directory``, but you can also specify absolute paths):
+
+* ``--preprocessed-data-directory``
+* ``--extracted-directory``
+* ``--projected-directory``
+* ``--models-directories`` (one for each the models and the ZT-norm-models, see below)
+
+or even trained extractor, projector, or enroller (i.e., the results of the extractor, projector, or enroller training):
+
+* ``--extractor-file``
+* ``--projector-file``
+* ``--enroller-file``
+
+For that purpose, it is also useful to skip parts of the tool chain.
+To do that you can use:
+
+* ``--skip-preprocessing``
+* ``--skip-extractor-training``
+* ``--skip-extraction``
+* ``--skip-projector-training``
+* ``--skip-projection``
+* ``--skip-enroller-training``
+* ``--skip-enrollment``
+* ``--skip-score-computation``
+* ``--skip-concatenation``
+* ``--skip-calibration``
+
+although by default files that already exist are not re-created.
+You can use the ``--force`` argument combined with the ``--skip...`` arguments (in which case the skip is preferred).
+To run just a sub-selection of the tool chain, you can also use the ``--execute-only`` option, which takes a list of options out of: ``preprocessing``, ``extractor-training``, ``extraction``, ``projector-training``, ``projection``, ``enroller-training``, ``enrollment``, ``score-computation``, ``concatenation`` or ``calibration``.
+
+
+Database-dependent Arguments
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Many databases define several protocols that can be executed.
+To change the protocol, you can either modify the configuration file, or simply use the ``--protocol`` option.
+
+Some databases define several kinds of evaluation setups.
+For example, often two groups of data are defined, a so-called *development set* and an *evaluation set*.
+The scores of the two groups will be concatenated into two files called **scores-dev** and **scores-eval**, which are located in the score directory (see above).
+In this case, by default only the development set is employed.
+To use both groups, just specify ``--groups dev eval`` (of course, you can also only use the ``'eval'`` set by calling ``--groups eval``).
+
+One score normalization technique is the so-called ZT score normalization.
+To enable this, simply use the ``--zt-norm`` option.
+If the ZT-norm is enabled, two sets of scores will be computed, and they will be placed in two different sub-directories of the score directory, which are by default called **nonorm** and **ztnorm**, but which can be changed using the ``--zt-score-directories`` option.
+
+
+Other Arguments
+---------------
+
+For some applications it is interesting to get calibrated scores.
+Simply add the ``--calibrate-scores`` option and another set of score files will be created by training the score calibration on the scores of the ``'dev'`` group and execute it to all available groups.
+The scores will be located at the same directory as the **nonorm** and **ztnorm** scores, and the file names are **calibrated-dev** (and **calibrated-eval** if applicable) .
+
+.. include:: links.rst
diff --git a/doc/implementation.rst b/doc/implementation.rst
new file mode 100644
index 00000000..d0dcd02d
--- /dev/null
+++ b/doc/implementation.rst
@@ -0,0 +1,291 @@
+.. vim: set fileencoding=utf-8 :
+.. Manuel Guenther <Manuel.Guenther@idiap.ch>
+.. Mon 23 04 2012
+
+======================
+Implementation Details
+======================
+
+The ``bob.bio`` module is specifically designed to be as flexible as possible while trying to keep things simple.
+Therefore, it uses python to implement tools such as preprocessors, feature extractors and recognition algorithms.
+It is file based so any tool can implement its own way of reading and writing data, features or models.
+Configurations are stored in configuration files, so it should be easy to test different parameters of your algorithms without modifying the code.
+
+
+Base Classes
+------------
+
+All tools implemented in the ``bob.bio`` packages are based on some classes, which are defined in the ``bob.bio.base`` package, and which are detailed below.
+Most of the functionality is provided in the base classes, but any function can be overridden in the derived class implementations.
+
+In the derived class constructors, the base class constructor needs to be called.
+For automatically tracing the algorithms, all parameters that are passed to the derived class constructor should be passed to the base class constructor as a list of keyword arguments (which is indicated by ``...`` below).
+This will assure that all parameters of the experiments are stored into the ``Experiment.info`` file.
+
+.. note::
+   All tools are based on reading, processing and writing files.
+   By default, any type of file is allowed to be handled, and file names are provided to the ``read_...`` and ``write_...`` functions as strings.
+   However, some of the extensions -- particularly the :ref:`bob.bio.video <bob.bio.video>` extension -- requires the read and write functions to handle HDF5 files.
+
+If you plan to write your own tools, please assure that you are following the following structure.
+
+
+Preprocessors
+~~~~~~~~~~~~~
+
+All preprocessor classes are derived from :py:class:`bob.bio.base.preprocessor.Preprocessor`.
+All of them implement the following two functions:
+
+* ``__init__(self, <parameters>)``: Initializes the preprocessing algorithm with the parameters it needs.
+  The base class constructor is called in the derived class constructor, e.g. as ``bob.bio.base.preprocessor.Preprocessor.__init__(self, ...)``.
+* ``__call__(self, original_data, annotations) -> data``: preprocesses the data given the dictionary of annotations (e.g. ``{'reye' : [re_y, re_x], 'leye': [le_y, le_x]}`` for face images).
+
+  .. note::
+     When the database does not provide annotations, the ``annotations`` parameter might be ``None``.
+
+By default, the data returned by the preprocessor is of type :py:class:`numpy.ndarray`.
+In that case, the base class IO functionality can be used.
+If a class returns data that is **not** of type :py:class:`numpy.ndarray`, it overwrites further functions from :py:class:`bob.bio.base.preprocessor.Preprocessor` that define the IO of your class:
+
+* ``write_data(data, data_file)``: Writes the given data (that has been generated using the ``__call__`` function of this class) to file.
+* ``read_data(data_file)``: Reads the preprocessed data from file.
+
+By default, the original data is read by :py:func:`bob.io.base.load`.
+Hence, data is given as :py:class:`numpy.ndarray`\s.
+When a different IO for the original data is required (for example to read videos in :py:class:`bob.bio.video.preprocessor.Video`), the following function is overridden:
+
+* ``read_original_data(filename)``: Reads the original data from file.
+
+
+Extractors
+~~~~~~~~~~
+
+Feature extractors should be derived from the :py:class:`bob.bio.base.extractor.Extractor` class.
+All extractor classes provide at least the functions:
+
+* ``__init__(self, <parameters>)``: Initializes the feature extraction algorithm with the parameters it needs.
+  Calls the base class constructor, e.g. as ``bob.bio.base.extractor.Extractor.__init__(self, ...)`` (there are more parameters to this constructor, see below).
+* ``__call__(self, data) -> feature``: Extracts the feature from the given preprocessed data.
+  By default, the returned feature should be a :py:class:`numpy.ndarray`.
+
+If features are not of type :py:class:`numpy.ndarray`, the ``write_feature`` function is overridden.
+In this case, also the function to read that kind of features needs to be overridden:
+
+* ``write_feature(self, feature, feature_file)``: Writes the feature (as returned by the ``__call__`` function) to the given file name.
+* ``read_feature(self, feature_file) -> feature``: Reads the feature (as written by the ``save_feature`` function) from the given file name.
+
+.. note::
+   If the feature is of a class that contains and is written via a ``save(bob.io.base.HDF5File)`` method, the ``write_feature`` function does not need to be overridden.
+   However, the ``read_feature`` function is required in this case.
+
+If the feature extraction process requires to read a trained extractor model from file, the following function is overloaded:
+
+* ``load(self, extractor_file)``: Loads the extractor from file.
+  This function is called at least once before the ``__call__`` function is executed.
+
+It is also possible to train the extractor model before it is used.
+In this case, two things are done.
+First, the ``train`` function is overridden:
+
+* ``train(self, image_list, extractor_file)``: Trains the feature extractor with the given list of images and writes the ``extractor_file``.
+
+Second, this behavior is registered in the ``__init__`` function by calling the base class constructor with more parameters: ``bob.bio.base.extractor.Extractor.__init__(self, requires_training=True, ...)``.
+Given that the training algorithm needs to have the training data split by identity, the ``bob.bio.base.extractor.Extractor.__init__(self, requires_training=True, split_training_images_by_client = True, ...)`` is used instead.
+
+
+Algorithms
+~~~~~~~~~~
+The implementation of recognition algorithm is as straightforward.
+All algorithms are derived from the :py:class:`bob.bio.base.algorithm.Algorithm` class.
+The constructor of this class has the following options, which are selected according to the current algorithm:
+
+* ``performs_projection``: If set to ``True``, features will be projected using the ``project`` function.
+  With the default ``False``, the ``project`` function will not be called at all.
+* ``requires_projector_training``: If ``performs_projection`` is enabled, this flag specifies if the projector needs training.
+  If ``True`` (the default), the ``train_projector`` function will be called.
+* ``split_training_features_by_client``: If the projector training needs training images split up by client identity, this flag is enabled.
+  In this case, the ``train_projector`` function will receive a list of lists of features.
+  If set to ``False`` (the default), the training features are given in one list.
+* ``use_projected_features_for_enrollment``: If features are projected, by default (``True``) models are enrolled using the projected features.
+  If the algorithm requires the original unprojected features to enroll the model, ``use_projected_features_for_enrollment=False`` is selected.
+* ``requires_enroller_training``: Enables the enroller training.
+  By default (``False``), no enroller training is performed, i.e., the ``train_enroller`` function is not called.
+
+* ``multiple_model_scoring``: The way to handle scoring when models store several features.
+  Set this parameter to ``None`` when you implement your own functionality to handle models from several features (see below).
+* ``multiple_probe_scoring``: The way to handle scoring when models store several features.
+  Set this parameter to ``None`` when you handle scoring with multiple probes with your own ``score_for_multiple_probes`` function (see below).
+
+A recognition algorithm has to override at least three functions:
+
+* ``__init__(self, <parameters>)``: Initializes the face recognition algorithm with the parameters it needs.
+  Calls the base class constructor, e.g. as ``bob.bio.base.algorithm.Algorithm.__init__(self, ...)`` (there are more parameters to this constructor, see above).
+* ``enroll(self, enroll_features) -> model``: Enrolls a model from the given vector of features (this list usually contains features from several files of one subject) and returns it.
+  The returned model is either a :py:class:`numpy.ndarray` or an instance of a class that defines a ``save(bob.io.base.HDF5File)`` method.
+  If neither of the two options are appropriate, a ``write_model`` function is defined (see below).
+* ``score(self, model, probe) -> value``: Computes a similarity or probability score that the given probe feature and the given model stem from the same identity.
+
+  .. note::
+     When you use a distance measure in your scoring function, and lower distances represents higher probabilities of having the same identity, please return the negative distance.
+
+Additionally, an algorithm may need to project the features before they can be used for enrollment or recognition.
+In this case, (some of) the function(s) are overridden:
+
+* ``train_projector(self, train_features, projector_file)``: Uses the given list of features and writes the ``projector_file``.
+
+  .. warning::
+     If you write this function, please assure that you use both ``performs_projection=True`` and ``requires_projector_training=True`` (for the latter, this is the default, but not for the former) during the base class constructor call in your ``__init__`` function.
+     If you need the training data to be sorted by clients, please use ``split_training_features_by_client=True`` as well.
+     Please also assure that you overload the ``project`` function.
+
+* ``load_projector(self, projector_file)``: Loads the projector from the given file, i.e., as stored by ``train_projector``.
+  This function is always called before the ``project``, ``enroll``, and ``score`` functions are executed.
+* ``project(self, feature) -> feature``: Projects the given feature and returns the projected feature, which should either be a :py:class:`numpy.ndarray` or an instance of a class that defines a ``save(bob.io.base.HDF5File)`` method.
+
+  .. note::
+     If you write this function, please assure that you use ``performs_projection=True`` during the base class constructor call in your ``__init__`` function.
+
+And once more, if the projected feature is not of type ``numpy.ndarray``, the following methods are overridden:
+
+* ``write_feature(feature, feature_file)``: Writes the feature (as returned by the ``project`` function) to file.
+* ``read_feature(feature_file) -> feature``: Reads and returns the feature (as written by the ``write_feature`` function).
+
+Some tools also require to train the model enrollment functionality (or shortly the ``enroller``).
+In this case, these functions are overridden:
+
+* ``train_enroller(self, training_features, enroller_file)``: Trains the model enrollment with the list of lists of features and writes the ``enroller_file``.
+
+  .. note::
+     If you write this function, please assure that you use ``requires_enroller_training=True`` during the base class constructor call in your ``__init__`` function.
+
+* ``load_enroller(self, enroller_file)``: Loads the enroller from file.
+  This function is always called before the ``enroll`` and ``score`` functions are executed.
+
+
+By default, it is assumed that both the models and the probe features are of type :py:class:`numpy.ndarray`.
+If the ``score`` function expects models and probe features to be of a different type, these functions are overridden:
+
+* ``write_model(self, model, model_file)``: writes the model (as returned by the ``enroll`` function).
+* ``read_model(self, model_file) -> model``: reads the model (as written by the ``write_model`` function) from file.
+* ``read_probe(self, probe_file) -> feature``: reads the probe feature from file.
+
+  .. note::
+     In many cases, the ``read_feature`` and ``read_probe`` functions are identical (if both are present).
+
+Finally, the :py:class:`bob.bio.base.algorithm.Algorithm` class provides default implementations for the case that models store several features, or that several probe features should be combined into one score.
+These two functions are:
+
+* ``score_for_multiple_models(self, models, probe)``: In case your model store several features, **call** this function to compute the average (or min, max, ...) of the scores.
+* ``score_for_multiple_probes(self, model, probes)``: By default, the average (or min, max, ...) of the scores for all probes are computed. **Override** this function in case you want different behavior.
+
+
+
+Databases
+---------
+
+Databases provide information about the data sets, on which the recognition algorithm should run on.
+Particularly, databases come with one or more evaluation protocols, which defines, which part of the data should be used for training, enrollment and probing.
+Some protocols split up the data into three different groups: a training set (aka. ``world`` group), a development set (aka. ``dev`` group) and an evaluation set (``eval``, sometimes also referred as test set).
+Furthermore, some of the databases split off some data from the training set, which is used to perform a ZT score normalization.
+Finally, most of the databases come with specific annotation files, which define additional information about the data, e.g., hand-labeled eye locations for face images.
+
+
+Generic Databases
+~~~~~~~~~~~~~~~~~
+
+All these different interfaces are concentrated into the :py:class:`bob.bio.base.database.Database` class.
+This database provides a minimum common interface for all databases that can be used by ``bob.bio``.
+
+.. todo::
+   Provide more details about the Database.
+
+If the database provides an interface for ZT score normalization, the :py:class:`bob.bio.base.database.DatabaseZT` is used, which is derived from :py:class:`bob.bio.base.database.Database`.
+
+.. todo::
+   Provide more details about the DatabaseZT.
+
+
+Verification Database Interface
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+For most of the data sets, we rely on the database interfaces from Bob_.
+Particularly, all databases that are derived from the :py:class:`bob.db.verification.utils.Database` (click :ref:`here <verification_databases>` for a list of implemented databases) are supported by
+a special derivation of the databases from above.
+These databases
+
+
+If you have your own database that you want to execute the recognition experiments on, you should first check if you could use the :ref:`Verifcation FileList Database <bob.db.verification.filelist>` interface by defining appropriate file lists for the training set, the model set, and the probes.
+If you can do this, just write your own configuration file that uses the :py:class:`facereclib.databases.DatabaseFileList` interface (see :ref:`databases` for details).
+In most of the cases, the :py:class:`bob.db.verification.filelist.Database` should be sufficient to run experiments.
+Please refer to the documentation :ref:`Documentation <bob.db.verification.filelist>` of this database for more instructions on how to configure this database.
+
+In case you want to have a more complicated interface to your database, you are welcome to write your own database wrapper class.
+In this case, you have to derive your class from the :py:class:`facereclib.databases.Database`, and provide the following functions:
+
+* ``__init__(self, <your-parameters>, **kwargs)``: Constructor of your database interface.
+  Please call the base class constructor, providing all the required parameters (see :ref:`databases`), e.g. by ``facereclib.databases.Database.__init__(self, **kwargs)``.
+* ``all_files(self)``: Returns a list of all :py:class:`facereclib.databases.File` objects of the database.
+  The list needs to be sorted by the file id (you can use the ``self.sort(files)`` function for sorting).
+* ``training_files(self, step, arrange_by_client = False)``: A sorted list of the :py:class:`facereclib.databases.File` objects that is used for training.
+  If ``arrange_by_clients`` is enabled, you might want to use the ``self.arrange_by_client(files)`` function to perform the job.
+* ``model_ids(self, group = 'dev'): The ids for the models (usually, there is only one model per client and, thus, you can simply use the client ids) for the given group.
+  Usually, providing ids for the group ``'dev'`` should be sufficient.
+* ``client_id_from_model_id(self, model_id)``: Returns the client id for the given model id.
+* ``enroll_files(self, model_id, group='dev')``: Returns the list of model :py:class:`facereclib.databases.File` objects for the given model id.
+* ``probe_files(self, model_id=None, group='dev')``: Returns the list of probe files, the given model_id should be compared with.
+  Usually, all probe files are compared with all model files.
+  In this case, you can just ignore the ``model_id``.
+  If the ``model_id`` is ``None``, this function is supposed to return *all* probe files for all models of the given group.
+
+Additionally, you can define more lists that can be used for ZT score normalization.
+In this case, derive you class from :py:class:`facereclib.databases.DatabaseZT` instead, and additionally overwrite the following functions:
+
+* ``t_model_ids(self, group = 'dev')``: The ids for the T-Norm models for the given group.
+* ``t_enroll_files(self, model_id, group='dev')``: Returns the list of model :py:class:`facereclib.databases.File` objects for the given T-Norm model id.
+* ``z_probe_files(self, group='dev')``: Returns the list of Z-probe :py:class:`facereclib.databases.File` objects, with which all the models and T-Norm models are compared.
+
+.. note:
+  For a proper face recognition protocol, the identities from the models and the T-Norm models, as well as the Z-probes should be different.
+
+For some protocols, a single probe consists of several features, see :ref:`algorithms` about strategies how to incorporate several probe files into one score.
+If your database should provide this functionality, please overwrite:
+
+* ``uses_probe_file_sets(self)``: Return ``True`` if the current protocol of the database provides multiple files for one probe.
+* ``probe_file_sets(self, model_id=None, group='dev')``: Returns a list of lists of :py:class:`facereclib.databases.FileSet` objects.
+* ``z_probe_file_sets(self, model_id=None, group='dev')``: Returns a list of lists of Z-probe :py:class:`facereclib.databases.FileSet` objects (only needed if the base class is :py:class:`facereclib.databases.DatabaseZT`).
+
+
+
+.. _configuration-files:
+
+Adding Configuration Files
+--------------------------
+After your code is tested, you should provide a configuration file for your algorithm.
+A configuration file basically consists of a constructor call to your new class with a useful (yet not necessarily optimized) set of parameters.
+Depending on your type of contribution, you should write a line like:
+
+* ``database = facereclib.databases.<YourDatabase>(<YourParameters>)``
+* ``preprocessor = facereclib.preprocessing.<YourPreprocessor>(<YourParameters>)``
+* ``feature_extractor = facereclib.features.<YourExtractor>(<YourParameters>)``
+* ``tool = facereclib.tools.<YourAlgorithm>(<YourParameters>)``
+
+and save the configuration file into the according sub-directory of `facereclib/configurations <file:../facereclib/configurations>`_.
+
+
+.. _register-resources:
+
+Registering your Code as a Resource
+-----------------------------------
+Now, you should be able to register this configuration file as a resource, so that you can use the configuration from above by a simple ``<shortcut>`` of your choice.
+Please open the `setup.py <file:../setup.py>`_ file in the base directory of your satellite package and edit the ``entry_points`` section.
+Depending on your type of algorithm, you have to add:
+
+* ``'facereclib.database': [ '<your-database-shortcut> = <your-database-configuration>.database' ]``
+* ``'facereclib.preprocessor': [ '<your-preprocessor-shortcut> = <your-preprocessor-configuration>.preprocessor' ]``
+* ``'facereclib.feature_extractor': [ '<your-extractor-shortcut> = <your-extractor-configuration>.feature_extractor' ]``
+* ``'facereclib.tool': [ '<your-recognition-algorithm-shortcut> = <your-algorithm-configuration>.tool' ]``
+
+After re-running ``./bin/buildout``, your new resource should be listed in the output of ``./bin/resources.py``.
+
+
+.. include:: links.rst
diff --git a/doc/implemented.rst b/doc/implemented.rst
new file mode 100644
index 00000000..a07bc48a
--- /dev/null
+++ b/doc/implemented.rst
@@ -0,0 +1,35 @@
+
+
+
+=================================
+Tools implemented in bob.bio.base
+=================================
+
+Databases
+---------
+
+.. automodule:: bob.bio.base.database
+
+Preprocessors
+-------------
+
+.. automodule:: bob.bio.base.preprocessor
+
+Extractors
+----------
+
+.. automodule:: bob.bio.base.extractor
+
+Algorithms
+----------
+
+.. automodule:: bob.bio.base.algorithm
+
+
+Grid Configuration
+------------------
+
+.. automodule:: bob.bio.base.grid
+
+
+.. include:: links.rst
diff --git a/doc/index.rst b/doc/index.rst
index 1ccb8b55..ab0da819 100644
--- a/doc/index.rst
+++ b/doc/index.rst
@@ -1,18 +1,87 @@
 .. vim: set fileencoding=utf-8 :
-.. Andre Anjos <andre.anjos@idiap.ch>
-.. Mon 13 Aug 2012 12:36:40 CEST
+.. author: Manuel GÃ¼nther <manuel.guenther@idiap.ch>
+.. date: Thu Sep 20 11:58:57 CEST 2012
 
-=====================
- Bob Example Project
-=====================
+.. _bob.bio.base:
 
+===========================================
+ Running Biometric Recognition Experiments
+===========================================
 
-Package Documentation
----------------------
+The ``bob.bio`` packages provide open source tools to run comparable and reproducible biometric recognition experiments.
+To design a biometric recognition experiment, one has to choose:
 
-.. automodule:: bob.bio.base
+* a databases containing the original data, and a protocol that defines how to use the data,
+* a data preprocessing algorithm, i.e., face detection for face recognition experiments or voice activity detection for speaker recognition
+* the type of features to extract from the preprocessed data,
+* the biometric recognition algorithm to employ, and
+* the way to evaluate the results
 
-Databases
----------
+For any of these parts, several different types are implemented in the ``bob.bio`` packages, and basically any combination of the five parts can be executed.
+For each type, several meta-parameters can be tested.
+This results in a nearly infinite amount of possible experiments that can be run using the current setup.
+But it is also possible to use your own database, preprocessing, feature type, or biometric recognition algorithm and test this against the baseline algorithms implemented in the our packages.
 
-.. automodule:: bob.bio.base.database
+The ``bob.bio`` packages derived from the former `FaceRecLib <http://pypi.python.org/pypi/facereclib>`__, which is herewith outdated.
+
+This package :py:mod:`bob.bio.base` includes the basic definition of a biometric recognition experiment, as well as a generic script, which can execute the full biometric experiment in a single command line.
+Changing the employed tolls such as the database, protocol, preprocessor, feature extractor or recognition algorithm is as simple as changing a command line parameter.
+
+The implementation of (most of) the tools is separated into other packages in the ``bob.bio`` namespace.
+All these packages can be easily combined.
+Here is a growing list of derived packages:
+
+* :ref:`bob.bio.spear <bob.bio.spear>` Tools to run speaker recognition experiments, including voice activity detection, Cepstral feature extraction, and speaker databases
+* :ref:`bob.bio.face <bob.bio.face>` Tools to run face recognition experiments, such as face detection, facial feature extraction and comparison, and face image databases
+* :ref:`bob.bio.video <bob.bio.video>` An extension of face recognition algorithms to run on video data, and the according video databases
+* :ref:`bob.bio.gmm <bob.bio.gmm>` Algorithms based on Gaussian Mixture Modeling (GMM) such as Inter-Session Variability modeling (ISV) or Total Variability modeling (TV, aka. I-Vector)
+* :ref:`bob.bio.csu <bob.bio.csu>` Wrapper classes for the `CSU Face Recognition Resources <http://www.cs.colostate.edu/facerec>`_ to be run with ``bob.bio``.
+
+If you are interested, please continue reading:
+
+
+===========
+Users Guide
+===========
+
+.. toctree::
+   :maxdepth: 2
+
+   installation
+   experiments
+   implementation
+   implemented
+   py_api
+..   evaluate
+
+================
+Reference Manual
+================
+
+.. toctree::
+   :maxdepth: 2
+
+   manual_databases
+   manual_preprocessors
+   manual_features
+   manual_tools
+   manual_utils
+
+
+ToDo-List
+=========
+
+This documentation is still under development.
+Here is a list of things that needs to be done:
+
+.. todolist::
+
+
+Indices and tables
+==================
+
+* :ref:`genindex`
+* :ref:`modindex`
+* :ref:`search`
+
+.. include:: links.rst
diff --git a/doc/installation.rst b/doc/installation.rst
new file mode 100644
index 00000000..61efa176
--- /dev/null
+++ b/doc/installation.rst
@@ -0,0 +1,126 @@
+.. vim: set fileencoding=utf-8 :
+.. author: Manuel GÃ¼nther <manuel.guenther@idiap.ch>
+.. date: Thu Sep 20 11:58:57 CEST 2012
+
+.. _installation:
+
+=========================
+Installation Instructions
+=========================
+
+As noted before, this package is part of the ``bob.bio`` packages, which in turn are part of the signal-processing and machine learning toolbox Bob_.
+To install `Packages of Bob <https://github.com/idiap/bob/wiki/Packages>`_, please read the `Installation Instructions <https://github.com/idiap/bob/wiki/Installation>`_.
+For Bob_ to be able to work properly, some dependent packages are required to be installed.
+Please make sure that you have read the `Dependencies <https://github.com/idiap/bob/wiki/Dependencies>`_ for your operating system.
+
+.. note::
+  Currently, running Bob_ under MS Windows in not yet supported.
+  However, we found that running Bob_ in a virtual Unix environment such as the one provided by VirtualBox_ is a good alternative.
+
+The most simple and most convenient way to use the ``bob.bio`` tools is to use a ``zc.buildout`` package, as explained in more detail `here <https://github.com/idiap/bob/wiki/Installation#using-zcbuildout-for-production>`__.
+There, in the ``eggs`` section of the ``buildout.cfg`` file, simply list the ``bob.bio`` packages that you want, like:
+
+.. code-block:: python
+
+   eggs = bob.bio.base
+          bob.bio.face
+          bob.bio.gmm
+          bob.bio.video
+          bob.db.youtube
+          gridtk
+
+in order to download and install all packages that are required for your experiments.
+In the example above, you might want to run a video face recognition experiments using the :py:class:`bob.bio.face.preprocessor.FaceDetector` and the :py:class:`bob.bio.face.extractor.DCTBlocks` feature extractor defined in :ref:`bob.bio.face <bob.bio.face>`, the :py:class:`bob.bio.gmm.algorithm.IVector` algorithm defined in :ref:`bob.bio.gmm <bob.bio.gmm>` and the video extensions defined in :ref:`bob.bio.video <bob.bio.video>`, using the YouTube faces database interface defined in :ref:`bob.db.youtube <bob.db.youtube>`.
+Running the simple command line:
+
+.. code-block:: sh
+
+   $ python bootstrap-buildout.py
+   $ ./bin/buildout
+
+will the download and install all dependent packages locally (relative to your current working directory), and create a ``./bin`` directory containing all the necessary scripts to run the experiments.
+
+
+Databases
+~~~~~~~~~
+
+With ``bob.bio`` you will run biometric recognition experiments using some default biometric recognition databases.
+Though the verification protocols are implemented in ``bob.bio``, the original data are **not included**.
+To download the original data of the databases, please refer to the according Web-pages.
+Database URL's will be given in the :ref:`databases` section.
+
+After downloading the original data for the databases, you will need to tell ``bob.bio``, where these databases can be found.
+For this purpose, we have decided to implement a special file, where you can set your directories.
+By default, this file is located in ``~/.bob_bio_databases.txt``, and it contains several lines, each line looking somewhat like:
+
+.. code-block:: text
+
+   [YOUR_ATNT_DATABASE_DIRECTORY] = /path/to/your/directory
+
+.. note::
+   If this file does not exist, feel free to create and populate it yourself.
+
+
+Please use ``./bin/databases.py`` for a list of known databases, where you can see the raw ``[YOUR_DATABASE_PATH]`` entries for all databases that you haven't updated, and the corrected paths for those you have.
+
+
+.. note::
+   If you have installed only ``bob.bio.base``, there is no database listed -- as all databases are included in other packages, such as :ref:`bob.bio.face <bob.bio.face>` or :ref:`bob.bio.speaker <bob.bio.speaker>`.
+
+
+Test your Installation
+~~~~~~~~~~~~~~~~~~~~~~
+
+One of the scripts that were generated during the bootstrap/buildout step is a test script.
+To verify your installation, you should run the script running the nose tests for each of the ``bob.bio`` packages:
+
+.. code-block:: sh
+
+  $ ./bin/nosetests -vs bob.bio.base
+  $ ./bin/nosetests -vs bob.bio.gmm
+  ...
+
+Some of the tests that are run require the images of the `AT&T database`_ database.
+If the database is not found on your system, it will automatically download and extract the `AT&T database`_ a temporary directory, **which will not be erased**.
+
+To avoid the download to happen each time you call the nose tests, please:
+
+1. Download the `AT&T database`_ database and extract it to the directory of your choice.
+2. Set an environment variable ``ATNT_DATABASE_DIRECTORY`` to the directory, where you extracted the database to.
+   For example, in a ``bash`` you can call:
+
+.. code-block:: sh
+
+  $ export ATNT_DATABASE_DIRECTORY=/path/to/your/copy/of/atnt
+
+.. note::
+  To set the directory permanently, you can also change the ``atnt_default_directory`` in the file `bob/bio/base/test/utils.py <file:../bob/bio/base/test/utils.py>`_.
+  In this case, there is no need to set the environment variable any more.
+
+In case any of the tests fail for unexplainable reasons, please file a bug report through the `GitHub bug reporting system`_.
+
+.. note::
+  Usually, all tests should pass with the latest stable versions of the Bob_ packages.
+  In other versions, some of the tests may fail.
+
+
+Generate this documentation
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Generally, the documentation of this package is `available online <http://pythonhosted.org/bob.bio.base>`__, and this should be your preferred resource.
+However, to generate this documentation locally, you call:
+
+.. code-block:: sh
+
+  $ ./bin/sphinx-build doc sphinx
+
+Afterward, the documentation is available and you can read it, e.g., by using:
+
+.. code-block:: sh
+
+  $ firefox sphinx/index.html
+
+
+.. _buildout.cfg: file:../buildout.cfg
+
+.. include:: links.rst
diff --git a/doc/links.rst b/doc/links.rst
new file mode 100644
index 00000000..06bc9029
--- /dev/null
+++ b/doc/links.rst
@@ -0,0 +1,22 @@
+.. vim: set fileencoding=utf-8 :
+.. author: Manuel GÃ¼nther <manuel.guenther@idiap.ch>
+.. date: Thu Sep 20 11:58:57 CEST 2012
+
+.. This file contains all links we use for documentation in a centralized place
+
+.. _idiap: http://www.idiap.ch
+.. _github: http://www.github.com/idiap
+.. _bob: http://www.idiap.ch/software/bob
+.. _github bug reporting system: http://github.com/bioidiap/bob.bio.base/issues
+.. _idiap at github: http://www.github.com/bioidiap
+.. _at&t database: http://www.cl.cam.ac.uk/research/dtg/attarchive/facedatabase.html
+.. _bob's github page: http://idiap.github.com/bob
+.. _gridtk: http://github.com/idiap/gridtk
+.. _buildout: http://www.buildout.org
+.. _nist: http://www.nist.gov/itl/iad/ig/focs.cfm
+.. _pypi: http://pypi.python.org
+.. _sge: http://wiki.idiap.ch/linux/SunGridEngine
+.. _csu face recognition resources: http://www.cs.colostate.edu/facerec
+.. _xfacereclib.extension.csu: http://pypi.python.org/pypi/xfacereclib.extension.CSU
+.. _virtualbox: https://www.virtualbox.org
+.. _hdf5: http://www.hdfgroup.org/HDF5
diff --git a/doc/py_api.rst b/doc/py_api.rst
new file mode 100644
index 00000000..4e5d00cb
--- /dev/null
+++ b/doc/py_api.rst
@@ -0,0 +1,17 @@
+
+===========================
+Python API for bob.bio.base
+===========================
+
+Generic functions
+-----------------
+
+.. automodule:: bob.bio.base
+
+Tools to run recognition experiments
+------------------------------------
+
+.. automodule:: bob.bio.base.tools
+
+
+.. include:: links.rst
-- 
GitLab