From 7f4e53d376f951068e40aa3b04015edb07bac415 Mon Sep 17 00:00:00 2001
From: Tiago Freitas Pereira <tiagofrepereira@gmail.com>
Date: Sat, 22 Feb 2020 11:57:08 +0100
Subject: [PATCH] [sphinx] Redoing the documentation

[sphinx] Documenting

[sphinx] Documenting

[sphinx] Update documentation

Added a template script for the vanilla pipeline

[sphinx] Redoing the documentation

[sphinx] Documenting

[sphinx] Documenting

[sphinx] Update documentation

Added a template script for the vanilla pipeline

[sphinx] Improving documentation

[ci] Patching the requirements

[ci] Patching the requirements

[sphinx] Documenting
---
 bob/bio/base/annotator/Annotator.py       |   2 +-
 bob/bio/base/config/baselines/pca_atnt.py |  21 ++
 bob/bio/base/pipelines/blocks.py          |   2 +-
 bob/bio/base/script/vanilla_biometrics.py |  16 +-
 conda/meta.yaml                           |   1 +
 develop.cfg                               |   5 +-
 doc/experiments.rst                       | 180 ++++--------------
 doc/implementation.rst                    |   8 +-
 doc/implemented.rst                       |  18 --
 doc/index.rst                             |   3 +-
 doc/installation.rst                      |   1 +
 doc/more.rst                              | 221 ----------------------
 doc/openbr.rst                            | 105 ----------
 doc/py_api.rst                            |   1 -
 requirements.txt                          |   1 +
 15 files changed, 77 insertions(+), 508 deletions(-)
 create mode 100644 bob/bio/base/config/baselines/pca_atnt.py
 delete mode 100644 doc/more.rst
 delete mode 100644 doc/openbr.rst

diff --git a/bob/bio/base/annotator/Annotator.py b/bob/bio/base/annotator/Annotator.py
index 9a4cfb2b..59602bf1 100644
--- a/bob/bio/base/annotator/Annotator.py
+++ b/bob/bio/base/annotator/Annotator.py
@@ -9,7 +9,7 @@ class Annotator(object):
     ----------
     read_original_data : callable
         A function that loads the samples. The syntax is like
-        :any:`bob.bio.base.read_original_data`.
+        `bob.bio.base.read_original_data`.
     """
 
     def __init__(self, read_original_data=None, **kwargs):
diff --git a/bob/bio/base/config/baselines/pca_atnt.py b/bob/bio/base/config/baselines/pca_atnt.py
new file mode 100644
index 00000000..6ef2a1b4
--- /dev/null
+++ b/bob/bio/base/config/baselines/pca_atnt.py
@@ -0,0 +1,21 @@
+from bob.bio.base.pipelines.blocks import DatabaseConnector, AlgorithmAdaptor
+import functools
+import bob.db.atnt
+
+database = DatabaseConnector(bob.db.atnt.Database(), protocol="Default")
+
+from bob.bio.face.preprocessor import Base
+preprocessor = functools.partial(
+                Base,
+                color_channel="gray",
+                dtype="float64",
+            )
+
+
+from bob.bio.base.extractor import Linearize
+extractor = Linearize
+#extractor = 'linearize'
+
+
+from bob.bio.base.algorithm import PCA
+algorithm = AlgorithmAdaptor(functools.partial(PCA, 0.99))
diff --git a/bob/bio/base/pipelines/blocks.py b/bob/bio/base/pipelines/blocks.py
index 0b9c88fb..5882a3d9 100644
--- a/bob/bio/base/pipelines/blocks.py
+++ b/bob/bio/base/pipelines/blocks.py
@@ -10,7 +10,7 @@ import copy
 import functools
 
 import bob.io.base
-
+from bob.pipelines.sample.sample import DelayedSample, SampleSet, Sample
 
 class DatabaseConnector:
     """Wraps a bob.bio.base database and generates conforming samples
diff --git a/bob/bio/base/script/vanilla_biometrics.py b/bob/bio/base/script/vanilla_biometrics.py
index c64b6107..85f067d1 100644
--- a/bob/bio/base/script/vanilla_biometrics.py
+++ b/bob/bio/base/script/vanilla_biometrics.py
@@ -61,7 +61,7 @@ TODO: Work out this help
     "-p",
     required=True,
     cls=ResourceOption,
-    entry_point_group="bob.pipelines.preprocessors",  # This should be linked to bob.bio.base
+    entry_point_group="bob.bio.preprocessor",  # This should be linked to bob.bio.base
     help="Data preprocessing algorithm",
 )
 @click.option(
@@ -69,7 +69,7 @@ TODO: Work out this help
     "-e",
     required=True,
     cls=ResourceOption,
-    entry_point_group="bob.pipelines.extractor",  # This should be linked to bob.bio.base
+    entry_point_group="bob.bio.extractor",  # This should be linked to bob.bio.base
     help="Feature extraction algorithm",
 )
 @click.option(
@@ -77,7 +77,7 @@ TODO: Work out this help
     "-a",
     required=True,
     cls=ResourceOption,
-    entry_point_group="bob.pipelines.biometric_algorithm",  # This should be linked to bob.bio.base
+    entry_point_group="bob.bio.algorithm",  # This should be linked to bob.bio.base
     help="Biometric Algorithm (class that implements the methods: `fit`, `enroll` and `score`)",
 )
 @click.option(
@@ -85,7 +85,7 @@ TODO: Work out this help
     "-d",
     required=True,
     cls=ResourceOption,
-    entry_point_group="bob.pipelines.database",  # This should be linked to bob.bio.base
+    entry_point_group="bob.bio.database",  # This should be linked to bob.bio.base
     help="Biometric Database connector (class that implements the methods: `background_model_samples`, `references` and `probes`)",
 )
 @click.option(
@@ -222,9 +222,11 @@ def vanilla_biometrics(
 
         # result.visualize(os.path.join(output, "graph.pdf"), rankdir="LR")
         result = result.compute(scheduler=dask_client)
-        for probe in result:
-            for reference in probe.samples:
-                print(reference.subject, probe.subject, probe.path, reference.data)
+        with open(os.path.join(output,f"scores-{g}"), "w") as f:
+            for probe in result:
+                for reference in probe.samples:
+                    line = "{0} {1} {2} {3}\n".format(reference.subject, probe.subject, probe.path, reference.data)
+                    f.write(line)
 
     dask_client.shutdown()
 
diff --git a/conda/meta.yaml b/conda/meta.yaml
index 44d9909b..6cb68e21 100644
--- a/conda/meta.yaml
+++ b/conda/meta.yaml
@@ -39,6 +39,7 @@ requirements:
     - python
     - setuptools
     - scipy
+    - bob.pipelines
     - six
 
 test:
diff --git a/develop.cfg b/develop.cfg
index b1c17c92..f9add9d4 100644
--- a/develop.cfg
+++ b/develop.cfg
@@ -5,14 +5,13 @@
 [buildout]
 parts = scripts
 eggs = bob.bio.base
-       bob.db.atnt
-       bob.io.image
+       bob.pipelines
        gridtk
 
 extensions = bob.buildout
              mr.developer
 auto-checkout = *
-develop = src/bob.extension
+develop = src/bob.pipelines
           src/bob.blitz
           src/bob.core
           src/bob.io.base
diff --git a/doc/experiments.rst b/doc/experiments.rst
index e82f891b..3bed99da 100644
--- a/doc/experiments.rst
+++ b/doc/experiments.rst
@@ -1,13 +1,14 @@
 .. vim: set fileencoding=utf-8 :
 .. author: Manuel Günther <manuel.guenther@idiap.ch>
+.. author: Tiago de Freitas Pereira <tiago.pereira@idiap.ch>
 .. date: Thu Sep 20 11:58:57 CEST 2012
 
 .. _bob.bio.base.experiments:
 
 
-======================================================================
-Running Biometric Recognition Experiments with the Vanilla Biometrics
-======================================================================
+===========================================================================
+Running Biometric Recognition Experiments: The Vanilla Biometrics in action
+===========================================================================
 
 Now, you are ready to run your first biometric recognition experiment.
 
@@ -24,16 +25,15 @@ Another source of information is the `TAM tutorial given at Idiap <https://githu
 
 
 To run biometric experiments, we provide a generic CLI command called ``bob pipelines``.
-Such CLI commands are entry-points to any kind of pipeline implemented under `bob.pipelines`.
-This tutorial will focus on the pipeline called `vanilla-biometrics`.
+Such CLI command is an entry-point to several pipelines implemented under `bob.pipelines`.
+This tutorial will focus on the pipeline called `VANILLA-BIOMETRICS` (FIREWORKS PLEASE!!! BUM BUM BUM).
 
 .. code-block:: sh
 
    bob pipelines vanilla-biometrics --help
 
 
-
-As a default, the ``vanilla-biometrics`` pipeline accepts one or more *configuration files* that include the parametrization of the experiment to run.
+By default, the ``vanilla-biometrics`` pipeline accepts one or more *configuration files* that include the parametrization of the experiment to run.
 A configuration file contains one ore more *variables* that define parts of the experiment.
 When several configuration files are specified, the variables of the latter will overwrite the ones of the former.
 For simplicity, here we discuss only a single configuration file.
@@ -58,7 +58,8 @@ These five variables are:
 * ``preprocessor``: The data preprocessor
 * ``extractor``: The feature extractor
 * ``algorithm``: The recognition algorithm
-* ``sub_directory``: A descriptive name for your experiment, which will serve as a sub-directory
+* ``dask_client``: The Dask client pointing the execution backend
+
 
 The first four variables, i.e., the ``database``, the ``preprocessor``, the ``extractor`` and the ``algorithm`` can be specified in several different ways.
 For the start, we will use only the registered :ref:`Resources <bob.bio.base.resources>`.
@@ -77,11 +78,8 @@ To see more details about the resources, i.e., the full constructor call for the
 
    $ resources.py -dt algorithm
 
-.. note::
-   You will also find some ``grid`` resources being listed.
-   These type of resources will be explained :ref:`later <running_in_parallel>`.
 
-Before going into :ref:`more details about the configurations <running_part_2>`, we will provide information about running default experiments.
+.. Before going into :ref:`more details about the configurations <running_part_2>`, we will provide information about running default experiments.
 
 One variable, which is not required, but recommended, is ``verbose``.
 By default, the algorithms are set up to execute quietly, and only errors are reported (``logging.ERROR``).
@@ -96,22 +94,32 @@ So, a minimal configuration file (say: ``pca_atnt.py``) would look something lik
 
 .. code-block:: py
 
-   database = 'atnt'
-   preprocessor = 'face-detect'
-   extractor = 'linearize'
-   algorithm = 'pca'
-   sub_directory = 'PCA_ATNT'
-   verbose = 2
+    from bob.bio.base.pipelines.blocks import DatabaseConnector, AlgorithmAdaptor
+    import functools
+    import bob.db.atnt
+
+    database = 'atnt'
+
+    preprocessor = 'face-detect'
+
+    extractor = 'linearize'
+
+    from bob.bio.base.algorithm import PCA
+    algorithm = AlgorithmAdaptor(functools.partial(PCA, 0.99))
+
 
 Running the experiment is then as simple as:
 
 .. code-block:: sh
 
-   $ verify.py pca_atnt.py
+   $ bob pipelines vanilla-biometrics pca_atnt.py local_parallel.py
 
 .. note::
    To be able to run exactly the command line from above, it requires to have :ref:`bob.bio.face <bob.bio.face>` installed.
 
+.. note::
+   The 'dask_client' variable is defined in the configuration file `local_parallel.py`. Check it out the package `bob.pipelines <http://gitlab.idiap.ch/bob/bob.pipelines>`_.
+
 
 .. note::
    Chain loading is possible through configuration files, i.e., variables of each
@@ -119,33 +127,26 @@ Running the experiment is then as simple as:
 
    This allows us to spread our experiment setup in several configuration files and have a call similar to this::
 
-   $ verify.py config_1.py config_2.py config_n.py
+   $ bob pipelines .py config_1.py config_2.py config_n.py
 
    For more information see *Chain Loading* in :ref:`bob.extension.config`.
 
 
-Before running an experiment, it is recommended to add set the variable ``dry_run = True``, so that it will only print, which steps would be executed, without actually executing them, and make sure that everything works as expected.
-
 The final result of the experiment will be one (or more) score file(s).
-Usually, they will be called something like ``scores-dev``.
-By default, you can find them in a sub-directory the ``result`` directory, but you can change this option using the ``result_directory`` variable.
-
-.. note::
-   At Idiap_, the default result directory differs, see ``verify.py --help`` for your directory.
+Usually, they will be called something like `scores-dev` in your output directory.
 
 
 .. _bob.bio.base.command_line:
 
 Command Line Options
 --------------------
-Each configuration can also directly be specified as command line option of ``verify.py``.
+Each configuration can also directly be specified as command line option of ``bob pipelines vanilla-biometrics``.
 
 .. note::
    Command line options have a long version starting with ``--`` and often a short version starting with a single ``-``.
-   Here, only the long names of the arguments are listed, please refer to ``verify.py --help`` (or short: ``verify.py -h``) for the abbreviations.
+   Here, only the long names of the arguments are listed, please refer to ``bob pipelines vanilla-biometrics --help`` (or short: ``bob pipelines vanilla-biometrics -h``) for the abbreviations.
 
 Usually, the (long version of the) command line parameter is identical to the variable name, where ``_`` characters are replaced by ``-``, and all options start with ``--``.
-For example, the ``sub_directory`` variable can also be set by the ``--sub-directory`` command line option.
 Only, the ``--verbose`` option differs, you can use the ``--verbose`` option several times to increase verbosity, e.g, ``--verbose --verbose`` (or short ``-vv``) increases verbosity to ``2`` (alias ``logging.INFO``).
 Generally, options defined on the command line will overwrite variables inside the configuration file(s).
 
@@ -157,14 +158,13 @@ The exact same experiment as above can, hence, be executed using:
 
 .. code-block:: sh
 
-   $ verify.py --database mobio-image --preprocessor face-crop-eyes --extractor linearize --algorithm pca --sub-directory pca-experiment -vv
+   $ bob pipelines vanilla-biometrics --database mobio-image --preprocessor face-crop-eyes --extractor linearize --algorithm pca --output pca-experiment -vv
 
 .. note::
    When running an experiment twice, you might realize that the second execution of the same experiment is much faster than the first one.
    This is due to the fact that those parts of the experiment, which have been successfully executed before (i.e., the according files already exist), are skipped.
    To override this behavior, i.e., to always regenerate all parts of the experiments, you can set ``force = True``.
 
-While we recommend to use a configuration file to declare your experiment, some variables might be faster to be changed on the command line, such as ``--dry-run``, ``--verbose``, ``--force`` (see above), ``--parallel N``, or ``--skip-...`` (see below).
 However, to be consistent, throughout this documentation we document the options as variables.
 
 
@@ -294,120 +294,4 @@ will output metrics and plots for the two experiments (dev and eval pairs) in
 `my_metrics.txt` and `my_plots.pdf`, respectively.
 
 
-.. _running_in_parallel:
-
-Running in Parallel
--------------------
-
-One important property of the ``verify.py`` script is that it can run in parallel, using either several processes on the local machine, or an SGE grid.
-To achieve that, ``bob.bio`` is well-integrated with our SGE grid toolkit GridTK_, which we have selected as a python package in the :ref:`Installation <bob.bio.base.installation>` section.
-The ``verify.py`` script can submit jobs either to the SGE grid, or to a local scheduler, keeping track of dependencies between the jobs.
-
-The GridTK_ keeps a list of jobs in a local database, which by default is called ``submitted.sql3``, but which can be overwritten with the ``gridtk_database_file`` variable.
-Please refer to the `GridTK documentation <https://www.idiap.ch/software/bob/docs/bob/gridtk/stable>`_ for more details on how to use the Job Manager ``jman``.
-
-Two different types of ``grid`` resources are defined, which can be used with the ``grid`` variable.
-The first type of resources will submit jobs to an SGE grid.
-They are mainly designed to run in the Idiap_ SGE grid and might need some adaptations to run on your grid.
-The second type of resources will submit jobs to a local queue, which needs to be run by hand (e.g., using ``jman --local run-scheduler --parallel 4``), or by setting the variable ``run_local_scheduler = True``.
-The difference between the two types of resources is that the local submission usually starts with ``local-``, while the SGE resource does not.
-You can also re-nice the parallel jobs by setting the ``nice`` variable accordingly.
-
-To run an experiment parallel on the local machine, you can also use the simple variable ``parallel = N``, which will run the experiments in ``N`` parallel processes on your machine.
-Here, ``N`` can be any positive integer -- but providing ``N`` greater than the number of processor threads of your machine will rather slow down processing.
-Basically, ``parallel = N`` is a shortcut for:
-
-.. code-block:: py
-
-   grid = bob.bio.base.grid.Grid("local", number_of_parallel_processes=N)
-   run_local_scheduler = True
-   stop_on_failure = True
-
-.. warning::
-   Some of the processes require a lot of memory, which are multiplied by ``N`` when you run in ``N`` parallel processes.
-   There is no check implemented to avoid that.
-
-
-Variables to change Default Behavior
-------------------------------------
-Additionally to the required variables discussed above, there are several variables to modify the behavior of the experiments.
-One set of command line options change the directory structure of the output.
-By default, intermediate (temporary) files are by default written to the ``temp`` directory, which can be overridden by the ``temp_directory`` variable, which expects relative or absolute paths.
-
-Re-using Parts of Experiments
-=============================
-If you want to re-use parts previous experiments, you can specify the directories (which are relative to the ``temp_directory``, but you can also specify absolute paths):
-
-* ``preprocessed_directory``
-* ``extracted_directory``
-* ``projected_directory``
-* ``models_directories`` (one for each the models and the ZT-norm-models, see below)
-
-or even trained extractor, projector, or enroller (i.e., the results of the extractor, projector, or enroller training):
-
-* ``extractor_file``
-* ``projector_file``
-* ``enroller_file``
-
-For that purpose, it is also useful to skip parts of the tool chain.
-To do that you can set these variables to ``True``:
-
-* ``skip_preprocessing``
-* ``skip_extractor_training``
-* ``skip_extraction``
-* ``skip_projector_training``
-* ``skip_projection``
-* ``skip_enroller_training``
-* ``skip_enrollment``
-* ``skip_score_computation``
-* ``skip_concatenation``
-* ``skip_calibration``
-
-although by default files that already exist are not re-created.
-You can use the ``force`` variable combined with the ``skip_`` variables (in which case the skip is preferred).
-To (re-)run just a sub-selection of the tool chain, you can also use the ``execute_only`` variable, which takes a list of options out of: ``preprocessing``, ``extractor-training``, ``extraction``, ``projector-training``, ``projection``, ``enroller-training``, ``enrollment``, ``score-computation``, ``concatenation`` or ``calibration``.
-This option is particularly useful for debugging purposes.
-
-
-Database-dependent Variables
-============================
-Many databases define several protocols that can be executed.
-To change the protocol, you can either modify the configuration file, or simply use the ``protocol`` variable.
-
-Some databases define several kinds of evaluation setups.
-For example, often two groups of data are defined, a so-called *development set* and an *evaluation set*.
-The scores of the two groups will be concatenated into two files called **scores-dev** and **scores-eval**, which are located in the score directory (see above).
-In this case, by default only the development set is employed.
-To use both groups, just specify ``groups = ['dev', 'eval']`` (of course, you can also only use the ``'eval'`` set by setting ``groups = ['eval']``).
-
-One score normalization technique is the so-called ZT score normalization.
-To enable this, simply use the ``zt_norm`` variable.
-If the ZT-norm is enabled, two sets of scores will be computed, and they will be placed in two different sub-directories of the score directory, which are by default called **nonorm** and **ztnorm**, but which can be changed using the ``zt_score_directories`` variable.
-
-
-Other Variables
----------------
-
-Calibration
-===========
-For some applications it is interesting to get calibrated scores.
-Simply set the variable ``calibrate_scores = True`` and another set of score files will be created by training the score calibration on the scores of the ``'dev'`` group and execute it to all available groups.
-The scores will be located at the same directory as the **nonorm** and **ztnorm** scores, and the file names are **calibrated-dev** (and **calibrated-eval** if applicable).
-
-Unsuccessful Preprocessing
-==========================
-In some cases, the preprocessor is not able to preprocess the data (e.g., for face image processing the face detector might not detect the face).
-If you expect such cases to happen, you might want to use the ``allow_missing_files`` variable.
-When this variable is set to ``True``, missing files will be handled correctly throughout the whole processing chain, i.e.:
-
-* the data file is not used during training (in any step of the processing tool chain)
-* preprocessed data is not written
-* feature extraction is not performed for that file
-* the file is exempt from model enrollment; if no enrollment file is found for a model, no model file is written
-* if either model or probe file is not found, the according score will be ``NaN``.
-  If several probe files are combined into one score, missing probe files will be ignored; if all probe files are not found, the score is ``NaN``.
-
-.. warning::
-   At the moment, combining the ``allow_missing_files`` and ``zt_norm`` variables might result in unexpected behavior, as the ZT-Norm computation does not handle ``NaN`` values appropriately.
-
 .. include:: links.rst
diff --git a/doc/implementation.rst b/doc/implementation.rst
index cccdb118..de9e7be2 100644
--- a/doc/implementation.rst
+++ b/doc/implementation.rst
@@ -1,5 +1,6 @@
 .. vim: set fileencoding=utf-8 :
 .. Manuel Guenther <Manuel.Guenther@idiap.ch>
+.. author: Tiago de Freitas Pereira <tiago.pereira@idiap.ch>
 .. Mon 23 04 2012
 
 ======================
@@ -62,7 +63,7 @@ If a class returns data that is **not** of type :py:class:`numpy.ndarray`, it ov
 The preprocessor is also responsible for reading the original data.
 How to read original data can be specified by the ``read_original_data`` parameter of the constructor.
 The ``read_original_data`` function gets three parameters: the :py:class:`bob.bio.base.database.BioFile` object from the database, the base ``directory`` where to read the data from, and the ``extension`` in which the original data is stored.
-By default, this function is :py:func:`bob.bio.base.read_original_data`, which simply calls: ``biofile.load(directory, extension)``, so that each database implementation can define an appropriate way, how data is read or written.
+By default, this function is `bob.bio.base.read_original_data`, which simply calls: ``biofile.load(directory, extension)``, so that each database implementation can define an appropriate way, how data is read or written.
 In the rare case that this is not the way that the preprocessor expects the data, another function can be passed to the constructor, i.e., in a configuration file of an experiment.
 
 
@@ -345,4 +346,9 @@ Then, you can simply add a line inside the according ``entry_points`` section of
 After re-running ``buildout``, your new resource should be listed in the output of ``resources.py``.
 
 
+Legacy with old bob.bio.base
+----------------------------
+
+
+
 .. include:: links.rst
diff --git a/doc/implemented.rst b/doc/implemented.rst
index 80a4ab1e..b5002f82 100644
--- a/doc/implemented.rst
+++ b/doc/implemented.rst
@@ -14,9 +14,7 @@ Base Classes
    bob.bio.base.preprocessor.Preprocessor
    bob.bio.base.extractor.Extractor
    bob.bio.base.algorithm.Algorithm
-   bob.bio.base.grid.Grid
    bob.bio.base.annotator.Annotator
-   bob.bio.base.baseline.Baseline
 
 
 Implementations
@@ -63,17 +61,6 @@ Databases
 
 .. automodule:: bob.bio.base.database
 
-Grid Configuration
-------------------
-
-.. automodule:: bob.bio.base.grid
-
-.. data:: PREDEFINED_QUEUES
-
-   A dictionary of predefined queue keywords, which are adapted to the Idiap_ SGE.
-
-
-   .. adapted from http://stackoverflow.com/a/29789910/3301902 to ge a nice dictionary content view
 
 Annotators
 ----------
@@ -81,10 +68,5 @@ Annotators
 .. automodule:: bob.bio.base.annotator
 
 
-Baselines
----------
-
-.. automodule:: bob.bio.base.baseline
-
 
 .. include:: links.rst
diff --git a/doc/index.rst b/doc/index.rst
index 21ac65cd..d5efc171 100644
--- a/doc/index.rst
+++ b/doc/index.rst
@@ -1,5 +1,6 @@
 .. vim: set fileencoding=utf-8 :
 .. author: Manuel Günther <manuel.guenther@idiap.ch>
+.. author: Tiago de Freitas Pereira <tiago.pereira@idiap.ch>
 .. date: Thu Sep 20 11:58:57 CEST 2012
 
 .. _bob.bio.base:
@@ -95,9 +96,7 @@ Users Guide
    experiments
    implementation
    filelist-guide
-   more
    annotations
-   openbr
 
 
 Reference Manual
diff --git a/doc/installation.rst b/doc/installation.rst
index 91f3c014..9145f2db 100644
--- a/doc/installation.rst
+++ b/doc/installation.rst
@@ -1,5 +1,6 @@
 .. vim: set fileencoding=utf-8 :
 .. author: Manuel Günther <manuel.guenther@idiap.ch>
+.. author: Tiago de Freitas Pereira <tiago.pereira@idiap.ch>
 .. date: Thu Sep 20 11:58:57 CEST 2012
 
 .. _bob.bio.base.installation:
diff --git a/doc/more.rst b/doc/more.rst
deleted file mode 100644
index 952672d9..00000000
--- a/doc/more.rst
+++ /dev/null
@@ -1,221 +0,0 @@
-.. vim: set fileencoding=utf-8 :
-.. author: Manuel Günther <manuel.guenther@idiap.ch>
-.. date: Thu Sep 20 11:58:57 CEST 2012
-
-==============================
-More about Running Experiments
-==============================
-
-Now that we have learned the implementation details, we can have a closer look into how experiments can be parametrized.
-
-.. _running_part_2:
-
-Running Experiments (part II)
------------------------------
-As mentioned before, running biometric recognition experiments can be achieved using configuration files for the ``verify.py`` script.
-In section :ref:`running_part_1`, we have used registered resources to run an experiment.
-However, the variables (and also the :ref:`bob.bio.base.command_line` of ``verify.py``) are more flexible, as you can have three different ways of defining tools:
-
-1. Choose a resource (see ``resources.py`` or ``verify.py --help`` or the result of ``verify.py --create-configuration-file`` for a list of registered resources):
-
-   .. code-block:: py
-
-      algorithm = "pca"
-
-
-2. Use a (pre-defined) configuration file, see: :ref:`bob.bio.base.configuration-files`.
-   In case several tools are specified inside the configuration file, only the variable that matches to your variable will be used.
-   For example, the file "bob/bio/base/config/algorithm/pca.py" might define several variables, but only the ``algorithm`` variable is used when setting:
-
-   .. code-block:: py
-
-      algorithm = "bob/bio/base/config/algorithm/pca.py"
-
-
-3. Instantiate a class and pass all desired parameters to its constructor:
-
-   .. code-block:: py
-
-      import bob.bio.base
-      import scipy.spatial
-      algorithm = bob.bio.base.algorithm.PCA(
-          subspace_dimension = 30,
-          distance_function = scipy.spatial.distance.euclidean,
-          is_distance_function = True
-      )
-
-   .. note::
-
-      When specified on the command line, usually quotes ``"..."`` are required, and the ``--imports`` need to be provided:
-
-      .. code-block:: sh
-
-         $ verify.py --algorithm "bob.bio.base.algorithm.PCA(subspace_dimension = 30, distance_function = scipy.spatial.distance.euclidean, is_distance_function = True)" --imports bob.bio.base scipy.spatial
-
-
-All these three ways can be used for any of the five variables: ``database``, ``preprocessor``, ``extractor``, ``algorithm`` and ``grid``.
-You can even mix these three types freely in a single configuration file.
-
-
-Score Level Fusion of Different Algorithms on the same Database
----------------------------------------------------------------
-
-In several of our publications, we have shown that the combination of several biometric recognition algorithms is able to outperform each single algorithm.
-This is particularly true, when the algorithms rely on different kind of data, e.g., we have `fused face and speaker recognition system on the MOBIO database <http://publications.idiap.ch/index.php/publications/show/2688>`__.
-As long as several algorithms are executed on the same database, we can simply generate a fusion system by using the ``fuse_scores.py`` script, generating a new score file:
-
-.. code-block:: sh
-
-   $ fuse_scores.py --dev
-
-This computation is based on the :py:class:`bob.learn.linear.CGLogRegTrainer`, which is trained on the scores of the development set files (``--dev-files``) for the given systems.
-Afterwards, the fusion is applied to the ``--dev-files`` and the resulting score file is written to the file specified by ``--fused-dev-file``.
-If ``--eval-files`` are specified, the same fusion that is trained on the development set is now applied to the evaluation set as well, and the ``--fused-eval-file`` is written.
-
-.. note::
-   When ``--eval-files`` are specified, they need to be in the same order as the ``--dev-files``, otherwise the result is undefined.
-
-The resulting ``--fused-dev-file`` and ``--fused-eval-file`` can then be evaluated normally, e.g., using the ``bob bio evaluate`` script.
-
-
-.. _grid-search:
-
-Finding the Optimal Configuration
----------------------------------
-Sometimes, configurations of tools (preprocessors, extractors or algorithms) are highly dependent on the database or even the employed protocol.
-Additionally, configuration parameters depend on each other.
-``bob.bio`` provides a relatively simple set up that allows to test different configurations in the same task, and find out the best set of configurations.
-For this, the ``grid_search.py`` script can be employed.
-This script executes a configurable series of experiments, which reuse data as far as possible.
-Please check out ``grid_search.py --help`` for a list of command line options.
-
-The Configuration File
-~~~~~~~~~~~~~~~~~~~~~~
-The most important parameter to the ``grid_search.py`` is the ``--configuration-file``.
-In this configuration file it is specified, which parameters of which part of the algorithms will be tested.
-An example for a configuration file can be found in the test scripts: ``bob/bio/base/test/dummy/grid_search.py``.
-The configuration file is a common python file, which can contain certain variables:
-
-1. ``preprocessor =``
-2. ``extractor =``
-3. ``algorithm =``
-4. ``replace =``
-5. ``requirement =``
-6. ``imports =``
-
-The variables from 1. to 3. usually contain instantiations for classes of :ref:`bob.bio.base.preprocessors`, :ref:`bob.bio.base.extractors` and :ref:`bob.bio.base.algorithms`, but also registered :ref:`bob.bio.base.resources` can be used.
-For any of the parameters of the classes, a *placeholder* can be put.
-By default, these place holders start with a ``#`` character, followed by a digit or character.
-The variables 1. to 3. can also be overridden by the command line options ``--preprocessor``, ``--extractor`` and ``--algorithm`` of the ``grid_search.py`` script.
-
-The ``replace`` variable has to be set as a dictionary.
-In it, you can define with which values your place holder key should be filled, and in which step of the tool chain execution this should happen.
-The steps are ``'preprocess'``, ``'extract'``, ``'project'``, ``'enroll'`` and ``'score'``.
-For each of the steps, it can be defined, which placeholder should be replaced by which values.
-To be able to differentiate the results later on, each of the replacement values is bound to a directory name.
-The final structure looks somewhat like that:
-
-.. code-block:: python
-
-  replace = {
-      step1 : {
-          '#a' : {
-              'Dir_a1' : 'Value_a1',
-              'Dir_a2' : 'Value_a2'
-           },
-
-          '#b' : {
-              'Dir_b1' : 'Value_b1',
-              'Dir_b2' : 'Value_b2'
-          }
-      },
-
-      step2 : {
-          '#c' : {
-              'Dir_c1' : 'Value_c1',
-              'Dir_c2' : 'Value_c2'
-          }
-      }
-  }
-
-
-Of course, more than two values can be selected.
-In the above example, the results of the experiments will be placed into a directory structure as ``results/[...]/Dir_a1/Dir_b1/Dir_c1/[...]``.
-
-.. note::
-   Please note that we are using a dictionary structure to define the replacements.
-   Hence, the order of the directories inside the same step might not be in the same order as written in the configuration file.
-   For the above example, a directory structure of ``results/[...]/Dir_b1/Dir_a1/Dir_c1/[...]`` might be possible as well.
-
-
-Additionally, tuples of place holders can be defined, in which case always the full tuple will be replaced in one shot.
-Continuing the above example, it is possible to add:
-
-.. code-block:: python
-
-  ...
-      step3 : {
-          '(#d,#e)' : {
-              'Dir_de1' : ('Value_d1', 'Value_e1'),
-              'Dir_de2' : ('Value_d2', 'Value_e2')
-          }
-      }
-
-.. warning::
-   **All possible combinations** of the configuration parameters are tested, which might result in a **huge number of executed experiments**.
-
-Some combinations of parameters might not make any sense.
-In this case, a set of requirements on the parameters can be set, using the ``requirement`` variable.
-In the requirements, any string including any placeholder can be put that can be evaluated using pythons ``eval`` function:
-
-.. code-block:: python
-
-  requirement = ['#a > #b', '2*#c != #a', ...]
-
-Finally, when any of the classes or variables need to import a certain python module, it needs to be declared in the ``imports`` variable.
-If you, e.g., test, which ``scipy.spatial`` distance function works best for your features, please add the imports (and don't forget the ``bob.bio.base`` and other ``bob.bio`` packages in case you use their tools):
-
-.. code-block:: python
-
-  imports = ['scipy', 'bob.bio.base', 'bob.bio.face']
-
-
-For a complete example of the grid search configuration file, you might want to have a look into `the actual file that is used to test the grid search <https://gitlab.idiap.ch/bob/bob.bio.base/blob/master/bob/bio/base/test/dummy/grid_search.py>`__.
-
-Further Command Line Options
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-The ``grid_search.py`` script has a further set of command line options.
-
-- The ``--database`` and the ``--protocol`` define, which database and (optionally) which protocol should be used.
-- The ``--sub-directory`` is similar to the one in the ``verify.py``.
-- ``--result-directory`` and ``--temp-directory`` specify directories to write results and temporary files into. Defaults are ``./results/grid_search`` and ``./temp/grid_search`` in the current directory. Make sure that the ``--temp-directory`` can store sufficient amount of data.
-- The ``--preprocessor``, ``--extractor`` and ``--algorithm`` can be used to override the ``preprocessor``, ``extractor`` and ``algorithm`` fields in the configuration file (in which case the configuration file does not need to contain these variables).
-- The ``--grid`` option can select the SGE_ configuration.
-- The ``--parallel`` option can run on the local machine using the given number of parallel threads.
-- The ``--preprocessed-directory`` can be used to select a directory of previously preprocessed data. This should not be used in combination with testing different preprocessor parameters.
-- The ``--gridtk-database-directory`` can be used to select another directory, where the ``submitted.sql3`` files will be stored.
-- Sometimes, the gridtk databases grow, and are too large for holding all experiments. Using the ``--gridtk-database-split-level``, databases can be split at the desired level.
-- The ``--write-commands`` directory can be selected to write the executed commands into (this is useful in case some experiments fail and need to be rerun).
-- The ``--dry-run`` flag should always be used before the final execution to see if the experiment definition works as expected.
-- The ``--skip-when-existent`` flag will only execute the experiments that have not yet finished (i.e., where the resulting score files are not produced yet).
-- With the ``--executable`` flag, you might select a different script rather that ``bob.bio.base.script.verify`` to run the experiments (such as the ``bob.bio.gmm.script.verify_gmm``).
-- Finally, additional options might be sent to the ``verify.py`` script directly. These options might be put after a ``--`` separation.
-
-Evaluation of Results
-~~~~~~~~~~~~~~~~~~~~~
-To evaluate a series of experiments, a special script iterates through all the results and computes EER on the development set and HTER on the evaluation set, for both the ``nonorm`` and the ``ztnorm`` directories.
-Simply call:
-
-.. code-block:: sh
-
-  $ collect_results.py -vv --directory [result-base-directory] --sort
-
-This will iterate through all result files found in ``[result-base-directory]`` and sort the results according to the EER on the development set (the sorting criterion can be modified using the ``--criterion``  and the ``--sort-key`` comamnd line options).
-Hence, to find the best results of your grid search experiments (with default directories), simply run:
-
-.. code-block:: sh
-
-  $ collect_results.py -vv --directory results/grid_search --sort --criterion EER --sort-key nonorm-dev
-
-
-.. include:: links.rst
diff --git a/doc/openbr.rst b/doc/openbr.rst
deleted file mode 100644
index 348d3b77..00000000
--- a/doc/openbr.rst
+++ /dev/null
@@ -1,105 +0,0 @@
-
-.. _bob.bio.base.openbr:
-
-=====================
-Score file conversion
-=====================
-Sometimes, it is required to export the score files generated by Bob to a
-different format, e.g., to be able to generate a plot comparing Bob's systems
-with other systems.  In this package, we provide source code to convert between
-different types of score files.
-
-Bob to OpenBR
--------------
-One of the supported formats is the matrix format that the National Institute
-of Standards and Technology (NIST) uses, and which is supported by OpenBR_.
-The scores are stored in two binary matrices, where the first matrix (usually
-with a ``.mtx`` filename extension) contains the raw scores, while a second
-mask matrix (extension ``.mask``) contains information, which scores are
-positives, and which are negatives.
-To convert from Bob's four column or five column score file to a pair of these
-matrices, you can use the :py:func:`bob.bio.base.score.openbr.write_matrix` function.
-In the simplest way, this function takes a score file
-``'five-column-sore-file'`` and writes the pair ``'openbr.mtx', 'openbr.mask'``
-of OpenBR_ compatible files:
-
-.. code-block:: py
-
-    >>> bob.bio.base.score.openbr.write_matrix('five-column-sore-file', 'openbr.mtx', 'openbr.mask', score_file_format = '5column')
-
-In this way, the score file will be parsed and the matrices will be written in
-the same order that is obtained from the score file.
-For most of the applications, this should be sufficient, but as the identity
-information is lost in the matrix files, no deeper analysis is possible anymore
-when just using the matrices.  To enforce an order of the models and probes
-inside the matrices, you can use the ``model_names`` and ``probe_names``
-parameters of :py:func:`bob.bio.base.score.openbr.write_matrix`:
-
-* The ``probe_names`` parameter lists the ``path`` elements stored in the score
-  files, which are the fourth column in a ``5column`` file, and the third
-  column in a ``4column`` file, see :py:func:`bob.bio.base.score.load.five_column` and
-  :py:func:`bob.bio.base.score.load.four_column`.
-
-* The ``model_names`` parameter is a bit more complicated.  In a ``5column``
-  format score file, the model names are defined by the second column of that
-  file, see :py:func:`bob.bio.base.score.load.five_column`.  In a ``4column`` format
-  score file, the model information is not contained, but only the client
-  information of the model.  Hence, for the ``4column`` format, the
-  ``model_names`` actually lists the client ids found in the first column, see
-  :py:func:`bob.bio.base.score.load.four_column`.
-
-.. warning::
-
-  The model information is lost, but required to write the matrix files.  In
-  the ``4column`` format, we use client ids instead of the model
-  information.  Hence, when several models exist per client, this function
-  will not work as expected.
-
-Additionally, there are fields in the matrix files, which define the gallery
-and probe list files that were used to generate the matrix.  These file names
-can be selected with the ``gallery_file_name`` and ``probe_file_name`` keyword
-parameters of :py:func:`bob.bio.base.score.openbr.write_matrix`.
-Finally, OpenBR defines a specific ``'search'`` score file format, which is
-designed to be used to compute CMC curves.  The score matrix contains
-descendingly sorted and possibly truncated list of scores, i.e., for each
-probe, a sorted list of all scores for the models is generated.  To generate
-these special score file format, you can specify the ``search`` parameter.  It
-specifies the number of highest scores per probe that should be kept.  If the
-``search`` parameter is set to a negative value, all scores will be kept.  If
-the ``search`` parameter is higher as the actual number of models, ``NaN``
-scores will be appended, and the according mask values will be set to ``0``
-(i.e., to be ignored).
-OpenBR to Bob
--------------
-On the other hand, you might also want to generate a Bob-compatible (four or
-five column) score file based on a pair of OpenBR matrix and mask files.  This
-is possible by using the :py:func:`bob.bio.base.score.openbr.write_score_file`
-function.  At the basic, it takes the given pair of matrix and mask files, as
-well as the desired output score file:
-
-.. code-block:: py
-
-    >>> bob.bio.base.score.openbr.write_score_file('openbr.mtx', 'openbr.mask', 'four-column-sore-file')
-
-This score file is sufficient to compute a CMC curve (see `bob.measure`), however it
-does not contain relevant client ids or paths for models and probes.
-Particularly, it assumes that each client has exactly one associated model.
-To add/correct these information, you can use additional parameters to
-:py:func:`bob.bio.base.score.openbr.write_score_file`.  Client ids of models and
-probes can be added using the ``models_ids`` and ``probes_ids`` keyword
-arguments.  The length of these lists must be identical to the number of models
-and probes as given in the matrix files, **and they must be in the same order
-as used to compute the OpenBR matrix**.  This includes that the same
-same-client and different-client pairs as indicated by the OpenBR mask will be
-generated, which will be checked inside the function.
-To add model and probe path information, the ``model_names`` and
-``probe_names`` parameters, which need to have the same size and order as the
-``models_ids`` and ``probes_ids``.  These information are simply stored in the
-score file, and no further check is applied.
-
-.. note:: The ``model_names`` parameter is used only when writing score files in ``score_file_format='5column'``, in the ``'4column'`` format, this parameter is ignored.
-
-
-.. Place youre references here:
-.. _openbr: http://openbiometrics.org
-
diff --git a/doc/py_api.rst b/doc/py_api.rst
index 03510bbd..5ab5c938 100644
--- a/doc/py_api.rst
+++ b/doc/py_api.rst
@@ -10,7 +10,6 @@ IO-related functions
 ~~~~~~~~~~~~~~~~~~~~
 
 .. autosummary::
-   bob.bio.base.read_original_data
    bob.bio.base.load
    bob.bio.base.save
    bob.bio.base.load_compressed
diff --git a/requirements.txt b/requirements.txt
index 67608425..9c33fd0d 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -8,6 +8,7 @@ bob.learn.em
 bob.learn.linear
 bob.math
 bob.measure
+bob.pipelines
 bob.sp
 bob.pipelines
 click
-- 
GitLab