Skip to content
Snippets Groups Projects
Commit 56203861 authored by Amir MOHAMMADI's avatar Amir MOHAMMADI
Browse files

simple documentation.

plotting functionality only in the script rather than the class.
parent d03dc86d
No related branches found
No related tags found
No related merge requests found
# from .utils import *
from . import algorithm from . import algorithm
from . import tools from . import tools
from . import config
from . import script
def get_config(): def get_config():
"""Returns a string containing the configuration information. """Returns a string containing the configuration information.
......
...@@ -3,7 +3,6 @@ ...@@ -3,7 +3,6 @@
from __future__ import division from __future__ import division
from __future__ import absolute_import from __future__ import absolute_import
from ..tools import grouping
import numpy as np import numpy as np
import pickle import pickle
...@@ -12,7 +11,7 @@ logger = bob.core.log.setup("bob.fusion.base") ...@@ -12,7 +11,7 @@ logger = bob.core.log.setup("bob.fusion.base")
class Algorithm(object): class Algorithm(object):
"""docstring for Algorithm""" """A class to be used in score fusion"""
def __init__(self, def __init__(self,
preprocessors=None, preprocessors=None,
...@@ -21,6 +20,13 @@ class Algorithm(object): ...@@ -21,6 +20,13 @@ class Algorithm(object):
**kwargs **kwargs
): ):
""" """
preprocessors: A list of preprocessors that follow the API of
:py:meth:`sklearn.preprocessing.StandardScaler`. Especially `fit_transform`
and `transform` must be implemented.
classifier: An instance of a class that implements `fit(X[, y])` and
`decision_function(X)` like:
:py:meth:`sklearn.linear_model.LogisticRegression`
kwargs : ``key=value`` pairs kwargs : ``key=value`` pairs
A list of keyword arguments to be written in the A list of keyword arguments to be written in the
...@@ -45,6 +51,13 @@ class Algorithm(object): ...@@ -45,6 +51,13 @@ class Algorithm(object):
return scores return scores
def train(self, train, devel=None): def train(self, train, devel=None):
"""If you use development data for training you need to override this
method.
train: A :py:meth:`tuple` of length 2 containing
the negatives and positives. negatives and positives should be
numpy.ndarray with the shape of (n_samples, n_systems).
devel: same as train but used for development (validation)
"""
(negatives, positives) = train (negatives, positives) = train
train_scores = np.vstack((negatives, positives)) train_scores = np.vstack((negatives, positives))
neg_len = negatives.shape[0] neg_len = negatives.shape[0]
...@@ -53,10 +66,10 @@ class Algorithm(object): ...@@ -53,10 +66,10 @@ class Algorithm(object):
self.classifier.fit(train_scores, y) self.classifier.fit(train_scores, y)
def fuse(self, scores): def fuse(self, scores):
if hasattr(self, 'classifier'): """
return self.classifier.decision_function(scores) scores: A numpy.ndarray with the shape of (n_samples, n_systems).
else: """
return self.decision_function(scores) return self.classifier.decision_function(scores)
def __str__(self): def __str__(self):
"""__str__() -> info """__str__() -> info
...@@ -74,81 +87,11 @@ class Algorithm(object): ...@@ -74,81 +87,11 @@ class Algorithm(object):
self._kwargs.items() if value is not None])) self._kwargs.items() if value is not None]))
def save(self, model_file): def save(self, model_file):
"""If your class cannot be pickled, you need to override this method."""
with open(model_file, "wb") as f: with open(model_file, "wb") as f:
pickle.dump(self, f) pickle.dump(self, f)
def load(self, model_file): def load(self, model_file):
"""If your class cannot be pickled, you need to override this method."""
with open(model_file, "rb") as f: with open(model_file, "rb") as f:
return pickle.load(f) return pickle.load(f)
def plot_boundary_decision(self, scores, score_labels, threshold,
thres_system1=None,
thres_system2=None,
do_grouping=False,
resolution=100,
x_pad=0.5,
y_pad=0.5,
alpha=0.75,
legends=None,
i1=0,
i2=1,
**kwargs
):
'''
Plots the boundary decision of the Algorithm
@param score_labels numpy.array A (scores.shape[0]) array containing
the true labels of scores.
@param threshold float threshold of the decision boundary
'''
if legends is None:
legends = ['Impostor', 'Genuine']
markers = ['x', 'o']
if scores.shape[1] > 2:
raise NotImplementedError(
"Currently plotting the decision boundary for more than two systems "
"is not supported.")
import matplotlib.pyplot as plt
plt.gca() # this is necessary for subplots to work.
X = scores[:, [i1, i2]]
Y = score_labels
x_min, x_max = X[:, i1].min() - x_pad, X[:, i1].max() + x_pad
y_min, y_max = X[:, i2].min() - y_pad, X[:, i2].max() + y_pad
xx, yy = np.meshgrid(
np.linspace(x_min, x_max, resolution),
np.linspace(y_min, y_max, resolution))
temp = np.c_[xx.ravel(), yy.ravel()]
temp = self.preprocess(temp)
Z = (self.fuse(temp) > threshold).reshape(xx.shape)
contourf = plt.contour(xx, yy, Z, 1, alpha=1, cmap=plt.cm.viridis)
if do_grouping:
negatives, positives = X[np.logical_not(Y)], X[Y]
negatives, positives = grouping(negatives, positives, **kwargs)
X = np.concatenate((negatives, positives), axis=0)
Y = np.concatenate(
(np.zeros(negatives.shape[0], dtype=np.bool8),
np.ones(positives.shape[0], dtype=np.bool8)),
axis=0)
negatives, positives = X[np.logical_not(Y)], X[Y]
colors = plt.cm.viridis(np.linspace(0, 1, 2))
for i, X in enumerate((negatives, positives)):
plt.scatter(
X[:, 0], X[:, 1], marker=markers[i], alpha=alpha,
c=colors[i], label=legends[i])
plt.legend()
if thres_system1 is not None:
plt.axvline(thres_system1, color='red')
plt.axhline(thres_system2, color='red')
plt.xlim([x_min, x_max])
plt.ylim([y_min, y_max])
return contourf
...@@ -16,8 +16,8 @@ logger = bob.core.log.setup("bob.fusion.base") ...@@ -16,8 +16,8 @@ logger = bob.core.log.setup("bob.fusion.base")
class MLP(Algorithm): class MLP(Algorithm):
"""This MLP is implemented using the bob tools """This MLP is implemented using the bob tools.
It may change its API and functionality in the future. The preprocessors used with this class should be pickleable.
""" """
def __init__(self, def __init__(self,
......
...@@ -12,7 +12,7 @@ logger = bob.core.log.setup("bob.fusion.base") ...@@ -12,7 +12,7 @@ logger = bob.core.log.setup("bob.fusion.base")
class Weighted_Sum(Algorithm): class Weighted_Sum(Algorithm):
"""docstring for Weighted_Sum weighted sum (default: mean)""" """weighted sum (default: mean)"""
def __init__(self, weights=None, *args, **kwargs): def __init__(self, weights=None, *args, **kwargs):
super(Weighted_Sum, self).__init__( super(Weighted_Sum, self).__init__(
......
...@@ -36,10 +36,84 @@ import bob.core ...@@ -36,10 +36,84 @@ import bob.core
from bob.measure.load import load_score, get_negatives_positives,\ from bob.measure.load import load_score, get_negatives_positives,\
get_all_scores get_all_scores
from bob.measure import eer_threshold from bob.measure import eer_threshold
from ..tools import grouping
logger = bob.core.log.setup("bob.fusion.base") logger = bob.core.log.setup("bob.fusion.base")
def plot_boundary_decision(algorithm, scores, score_labels, threshold,
thres_system1=None,
thres_system2=None,
do_grouping=False,
resolution=100,
x_pad=0.5,
y_pad=0.5,
alpha=0.75,
legends=None,
i1=0,
i2=1,
**kwargs
):
'''
Plots the boundary decision of the Algorithm
@param score_labels numpy.array A (scores.shape[0]) array containing
the true labels of scores.
@param threshold float threshold of the decision boundary
'''
if legends is None:
legends = ['Impostor', 'Genuine']
markers = ['x', 'o']
if scores.shape[1] > 2:
raise NotImplementedError(
"Currently plotting the decision boundary for more than two systems "
"is not supported.")
import matplotlib.pyplot as plt
plt.gca() # this is necessary for subplots to work.
X = scores[:, [i1, i2]]
Y = score_labels
x_min, x_max = X[:, i1].min() - x_pad, X[:, i1].max() + x_pad
y_min, y_max = X[:, i2].min() - y_pad, X[:, i2].max() + y_pad
xx, yy = numpy.meshgrid(
numpy.linspace(x_min, x_max, resolution),
numpy.linspace(y_min, y_max, resolution))
temp = numpy.c_[xx.ravel(), yy.ravel()]
temp = algorithm.preprocess(temp)
Z = (algorithm.fuse(temp) > threshold).reshape(xx.shape)
contourf = plt.contour(xx, yy, Z, 1, alpha=1, cmap=plt.cm.viridis)
if do_grouping:
negatives, positives = X[numpy.logical_not(Y)], X[Y]
negatives, positives = grouping(negatives, positives, **kwargs)
X = numpy.concatenate((negatives, positives), axis=0)
Y = numpy.concatenate(
(numpy.zeros(negatives.shape[0], dtype=numpy.bool8),
numpy.ones(positives.shape[0], dtype=numpy.bool8)),
axis=0)
negatives, positives = X[numpy.logical_not(Y)], X[Y]
colors = plt.cm.viridis(numpy.linspace(0, 1, 2))
for i, X in enumerate((negatives, positives)):
plt.scatter(
X[:, 0], X[:, 1], marker=markers[i], alpha=alpha,
c=colors[i], label=legends[i])
plt.legend()
if thres_system1 is not None:
plt.axvline(thres_system1, color='red')
plt.axhline(thres_system2, color='red')
plt.xlim([x_min, x_max])
plt.ylim([y_min, y_max])
return contourf
def main(command_line_parameters=None): def main(command_line_parameters=None):
args = docopt(__doc__, argv=command_line_parameters, args = docopt(__doc__, argv=command_line_parameters,
version=bob.fusion.base.get_config()) version=bob.fusion.base.get_config())
...@@ -60,8 +134,8 @@ def main(command_line_parameters=None): ...@@ -60,8 +134,8 @@ def main(command_line_parameters=None):
score_labels = score_lines['claimed_id'] == score_lines['real_id'] score_labels = score_lines['claimed_id'] == score_lines['real_id']
# plot the decision boundary # plot the decision boundary
algorithm.plot_boundary_decision( plot_boundary_decision(
scores, score_labels, threshold, algorithm, scores, score_labels, threshold,
do_grouping=True, do_grouping=True,
npoints=int(args['--group']), npoints=int(args['--group']),
seed=0, seed=0,
......
...@@ -58,12 +58,12 @@ source_suffix = '.rst' ...@@ -58,12 +58,12 @@ source_suffix = '.rst'
master_doc = 'index' master_doc = 'index'
# General information about the project. # General information about the project.
project = u'Bobs interface for running biometric recognition experiments' project = u'Bobs interface for running score fusion in biometric recognition experiments'
import time import time
copyright = u'%s, Idiap Research Institute' % time.strftime('%Y') copyright = u'%s, Idiap Research Institute' % time.strftime('%Y')
# Grab the setup entry # Grab the setup entry
distribution = pkg_resources.require('bob.bio.base')[0] distribution = pkg_resources.require('bob.fusion.base')[0]
# The version info for the project you're documenting, acts as replacement for # The version info for the project you're documenting, acts as replacement for
# |version| and |release|, also used in various other places throughout the # |version| and |release|, also used in various other places throughout the
......
.. vim: set fileencoding=utf-8 :
.. author: Manuel Günther <manuel.guenther@idiap.ch>
.. date: Thu Sep 20 11:58:57 CEST 2012
.. _bob.bio.base.experiments:
=========================================
Running Biometric Recognition Experiments
=========================================
Now, you are almost ready to run your first biometric recognition experiment.
Just a little bit of theory, and then: off we go.
Structure of a Biometric Recognition Experiment
-----------------------------------------------
Each biometric recognition experiment that is run with ``bob.bio`` is divided into several steps.
The steps are:
1. Data preprocessing: Raw data is preprocessed, e.g., for face recognition, faces are detected, images are aligned and photometrically enhanced.
2. Feature extractor training: Feature extraction parameters are learned.
3. Feature extraction: Features are extracted from the preprocessed data.
4. Feature projector training: Parameters of a subspace-projection of the features are learned.
5. Feature projection: The extracted features are projected into a subspace.
6. Model enroller training: The ways how to enroll models from extracted or projected features is learned.
7. Model enrollment: One model is enrolled from the features of one or more images.
8. Scoring: The verification scores between various models and probe features are computed.
9. Evaluation: The computed scores are evaluated and curves are plotted.
These 9 steps are divided into four distinct groups, which are discussed in more detail later:
* Preprocessing (only step 1)
* Feature extraction (steps 2 and 3)
* Biometric recognition (steps 4 to 8)
* Evaluation (step 9)
The communication between two steps is file-based, usually using a binary HDF5_ interface, which is implemented in the :py:class:`bob.io.base.HDF5File` class.
The output of one step usually serves as the input of the subsequent step(s).
Depending on the algorithm, some of the steps are not applicable/available.
E.g. most of the feature extractors do not need a special training step, or some algorithms do not require a subspace projection.
In these cases, the according steps are skipped.
``bob.bio`` takes care that always the correct files are forwarded to the subsequent steps.
.. _running_part_1:
Running Experiments (part I)
----------------------------
To run an experiment, we provide a generic script ``./bin/verify.py``, which is highly parametrizable.
To get a complete list of command line options, please run:
.. code-block:: sh
$ ./bin/verify.py --help
Whoops, that's a lot of options.
But, no worries, most of them have proper default values.
.. note::
Sometimes, command line options have a long version starting with ``--`` and a short one starting with a single ``-``.
In this section, only the long names of the arguments are listed, please refer to ``./bin/verify.py --help`` (or short: ``./bin/faceverify.py -h``) for the abbreviations.
There are five command line options, which are required and sufficient to define the complete biometric recognition experiment.
These five options are:
* ``--database``: The database to run the experiments on
* ``--preprocessor``: The data preprocessor
* ``--extractor``: The feature extractor
* ``--algorithm``: The recognition algorithm
* ``--sub-directory``: A descriptive name for your experiment, which will serve as a sub-directory
The first four parameters, i.e., the ``database``, the ``preprocessor``, the ``extractor`` and the ``algorithm`` can be specified in several different ways.
For the start, we will use only the registered :ref:`Resources <bob.bio.base.resources>`.
These resources define the source code that will be used to compute the experiments, as well as all the meta-parameters of the algorithms (which we will call the *configuration*).
To get a list of registered resources, please call:
.. code-block:: sh
$ ./bin/resources.py
Each package in ``bob.bio`` defines its own resources, and the printed list of registered resources differs according to the installed packages.
If only ``bob.bio.base`` is installed, no databases and no preprocessors will be listed.
.. note::
You will also find some ``grid`` resources being listed.
These type of resources will be explained :ref:`later <running_in_parallel>`.
Before going into :ref:`more details about the configurations <running_part_2>`, we will provide information about running default experiments.
One command line option, which is not required, but recommended, is the ``--verbose`` option.
By default, the algorithms are set up to execute quietly, and only errors are reported.
To change this behavior, you can use the ``--verbose`` option several times to increase the verbosity level to show:
1) Warning messages
2) Informative messages
3) Debug messages
When running experiments, my personal preference is verbose level 2, which can be enabled by ``--verbose --verbose``, or using the short version: ``-vv``.
So, a typical biometric recognition experiment (in this case, face recognition) could look something like:
.. code-block:: sh
$ ./bin/verify.py --database mobio-image --preprocessor face-crop-eyes --extractor linearize --algorithm pca --sub-directory pca-experiment -vv
.. note::
To be able to run exactly the command line from above, it requires to have :ref:`bob.bio.face <bob.bio.face>` installed.
Before running an experiment, it is recommended to add the ``--dry-run`` option, so that it will only print, which steps would be executed, without actually executing them, and make sure that everything works as expected.
The final result of the experiment will be one (or more) score file(s).
Usually, they will be called something like ``scores-dev``.
By default, you can find them in a sub-directory the ``result`` directory, but you can change this option using the ``--result-directory`` command line option.
.. note::
At Idiap_, the default result directory differs, see ``./bin/verify.py --help`` for your directory.
.. _bob.bio.base.evaluate:
Evaluating Experiments
----------------------
After the experiment has finished successfully, one or more text file containing all the scores are written.
To evaluate the experiment, you can use the generic ``./bin/evaluate.py`` script, which has properties for all prevalent evaluation types, such as CMC, ROC and DET plots, as well as computing recognition rates, EER/HTER, Cllr and minDCF.
Additionally, a combination of different algorithms can be plotted into the same files.
Just specify all the score files that you want to evaluate using the ``--dev-files`` option, and possible legends for the plots (in the same order) using the ``--legends`` option, and the according plots will be generated.
For example, to create a ROC curve for the experiment above, use:
.. code-block:: sh
$ ./bin/evaluate.py --dev-files results/pca-experiment/male/nonorm/scores-dev --legend MOBIO --roc MOBIO_MALE_ROC.pdf -vv
Please note that there exists another file called ``Experiment.info`` inside the result directory.
This file is a pure text file and contains the complete configuration of the experiment.
With this configuration it is possible to inspect all default parameters of the algorithms, and even to re-run the exact same experiment.
.. _running_in_parallel:
Running in Parallel
-------------------
One important property of the ``./bin/verify.py`` script is that it can run in parallel, using either several threads on the local machine, or an SGE grid.
To achieve that, ``bob.bio`` is well-integrated with our SGE grid toolkit GridTK_, which we have selected as a python package in the :ref:`Installation <bob.bio.base.installation>` section.
The ``./bin/verify.py`` script can submit jobs either to the SGE grid, or to a local scheduler, keeping track of dependencies between the jobs.
The GridTK_ keeps a list of jobs in a local database, which by default is called ``submitted.sql3``, but which can be overwritten with the ``--gridtk-database-file`` option.
Please refer to the `GridTK documentation <http://pythonhosted.org/gridtk>`_ for more details on how to use the Job Manager ``./bin/jman``.
Two different types of ``grid`` resources are defined, which can be used with the ``--grid`` command line option.
The first type of resources will submit jobs to an SGE grid.
They are mainly designed to run in the Idiap_ SGE grid and might need some adaptations to run on your grid.
The second type of resources will submit jobs to a local queue, which needs to be run by hand (e.g., using ``./bin/jman --local run-scheduler --parallel 4``), or by using the command line option ``--run-local-scheduler``.
The difference between the two types of resources is that the local submission usually starts with ``local-``, while the SGE resource does not.
Hence, to run the same experiment as above using four parallel threads on the local machine, re-nicing the jobs to level 10, simply call:
.. code-block:: sh
$ ./bin/verify.py --database mobio-image --preprocessor face-crop-eyes --extractor linearize --algorithm pca --sub-directory pca-experiment -vv --grid local-p4 --run-local-scheduler --nice 10
.. note::
You might realize that the second execution of the same experiment is much faster than the first one.
This is due to the fact that those parts of the experiment, which have been successfully executed before (i.e., the according files already exist), are skipped.
To override this behavior, i.e., to always regenerate all parts of the experiments, you can use the ``--force`` option.
Command Line Options to change Default Behavior
-----------------------------------------------
Additionally to the required command line arguments discussed above, there are several options to modify the behavior of the experiments.
One set of command line options change the directory structure of the output.
By default, intermediate (temporary) files are by default written to the ``temp`` directory, which can be overridden by the ``--temp-directory`` command line option, which expects relative or absolute paths:
Re-using Parts of Experiments
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
If you want to re-use parts previous experiments, you can specify the directories (which are relative to the ``--temp-directory``, but you can also specify absolute paths):
* ``--preprocessed-data-directory``
* ``--extracted-directory``
* ``--projected-directory``
* ``--models-directories`` (one for each the models and the ZT-norm-models, see below)
or even trained extractor, projector, or enroller (i.e., the results of the extractor, projector, or enroller training):
* ``--extractor-file``
* ``--projector-file``
* ``--enroller-file``
For that purpose, it is also useful to skip parts of the tool chain.
To do that you can use:
* ``--skip-preprocessing``
* ``--skip-extractor-training``
* ``--skip-extraction``
* ``--skip-projector-training``
* ``--skip-projection``
* ``--skip-enroller-training``
* ``--skip-enrollment``
* ``--skip-score-computation``
* ``--skip-concatenation``
* ``--skip-calibration``
although by default files that already exist are not re-created.
You can use the ``--force`` argument combined with the ``--skip...`` arguments (in which case the skip is preferred).
To run just a sub-selection of the tool chain, you can also use the ``--execute-only`` option, which takes a list of options out of: ``preprocessing``, ``extractor-training``, ``extraction``, ``projector-training``, ``projection``, ``enroller-training``, ``enrollment``, ``score-computation``, ``concatenation`` or ``calibration``.
Database-dependent Arguments
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Many databases define several protocols that can be executed.
To change the protocol, you can either modify the configuration file, or simply use the ``--protocol`` option.
Some databases define several kinds of evaluation setups.
For example, often two groups of data are defined, a so-called *development set* and an *evaluation set*.
The scores of the two groups will be concatenated into two files called **scores-dev** and **scores-eval**, which are located in the score directory (see above).
In this case, by default only the development set is employed.
To use both groups, just specify ``--groups dev eval`` (of course, you can also only use the ``'eval'`` set by calling ``--groups eval``).
One score normalization technique is the so-called ZT score normalization.
To enable this, simply use the ``--zt-norm`` option.
If the ZT-norm is enabled, two sets of scores will be computed, and they will be placed in two different sub-directories of the score directory, which are by default called **nonorm** and **ztnorm**, but which can be changed using the ``--zt-score-directories`` option.
Other Arguments
---------------
For some applications it is interesting to get calibrated scores.
Simply add the ``--calibrate-scores`` option and another set of score files will be created by training the score calibration on the scores of the ``'dev'`` group and execute it to all available groups.
The scores will be located at the same directory as the **nonorm** and **ztnorm** scores, and the file names are **calibrated-dev** (and **calibrated-eval** if applicable) .
.. include:: links.rst
This diff is collapsed.
.. _bob.bio.base.implemented:
=================================
Tools implemented in bob.bio.base
=================================
Summary
-------
Base Classes
~~~~~~~~~~~~
.. autosummary::
bob.bio.base.preprocessor.Preprocessor
bob.bio.base.extractor.Extractor
bob.bio.base.algorithm.Algorithm
bob.bio.base.database.Database
bob.bio.base.database.DatabaseZT
bob.bio.base.grid.Grid
Implementations
~~~~~~~~~~~~~~~
.. autosummary::
bob.bio.base.extractor.Linearize
bob.bio.base.algorithm.Distance
bob.bio.base.algorithm.PCA
bob.bio.base.algorithm.LDA
bob.bio.base.algorithm.PLDA
bob.bio.base.algorithm.BIC
bob.bio.base.database.DatabaseBob
bob.bio.base.database.DatabaseBobZT
bob.bio.base.database.DatabaseFileList
Preprocessors
-------------
.. automodule:: bob.bio.base.preprocessor
Extractors
----------
.. automodule:: bob.bio.base.extractor
Algorithms
----------
.. automodule:: bob.bio.base.algorithm
Databases
---------
.. automodule:: bob.bio.base.database
Grid Configuration
------------------
.. automodule:: bob.bio.base.grid
.. data:: PREDEFINED_QUEUES
A dictionary of predefined queue keywords, which are adapted to the Idiap_ SGE.
.. adapted from http://stackoverflow.com/a/29789910/3301902 to ge a nice dictionary content view
.. exec::
import json
from bob.bio.base.grid import PREDEFINED_QUEUES
json_obj = json.dumps(PREDEFINED_QUEUES, sort_keys=True, indent=2)
json_obj = json_obj.replace("\n", "\n ")
print ('.. code-block:: JavaScript\n\n PREDEFINED_QUEUES = %s\n\n' % json_obj)
.. include:: links.rst
.. vim: set fileencoding=utf-8 : .. vim: set fileencoding=utf-8 :
.. author: Manuel Günther <manuel.guenther@idiap.ch> .. author: Amir Mohammadi <amir.mohammadi@idiap.ch>
.. date: Thu Sep 20 11:58:57 CEST 2012
.. _bob.bio.base: .. _bob.fusion.base:
=========================================== ===================================================
Running Biometric Recognition Experiments Score Fusion in Biometric Recognition Experiments
=========================================== ===================================================
The ``bob.bio`` packages provide open source tools to run comparable and reproducible biometric recognition experiments. The ``bob.fusion.base`` package provides open source tools to run comparable and reproducible score fusion in biometric recognition experiments.
To design a biometric recognition experiment, one has to choose:
* a databases containing the original data, and a protocol that defines how to use the data, It is written to be tightly integrated with ``scikit-learn`` however you do not need to use it neccessairly.
* a data preprocessing algorithm, i.e., face detection for face recognition experiments or voice activity detection for speaker recognition,
* the type of features to extract from the preprocessed data,
* the biometric recognition algorithm to employ,
* the score fusion to combine outputs from different systems, and
* the way to evaluate the results
For any of these parts, several different types are implemented in the ``bob.bio`` packages, and basically any combination of the five parts can be executed.
For each type, several meta-parameters can be tested.
This results in a nearly infinite amount of possible experiments that can be run using the current setup.
But it is also possible to use your own database, preprocessor, feature extractor, or biometric recognition algorithm and test this against the baseline algorithms implemented in the our packages.
.. note::
The ``bob.bio`` packages are derived from the former `FaceRecLib <http://pypi.python.org/pypi/facereclib>`__, which is herewith outdated.
This package :py:mod:`bob.bio.base` includes the basic definition of a biometric recognition experiment, as well as a generic script, which can execute the full biometric experiment in a single command line.
Changing the employed tolls such as the database, protocol, preprocessor, feature extractor or recognition algorithm is as simple as changing a command line parameter.
The implementation of (most of) the tools is separated into other packages in the ``bob.bio`` namespace.
All these packages can be easily combined.
Here is a growing list of derived packages:
* :ref:`bob.bio.spear <bob.bio.spear>` Tools to run speaker recognition experiments, including voice activity detection, Cepstral feature extraction, and speaker databases
* :ref:`bob.bio.face <bob.bio.face>` Tools to run face recognition experiments, such as face detection, facial feature extraction and comparison, and face image databases
* :ref:`bob.bio.video <bob.bio.video>` An extension of face recognition algorithms to run on video data, and the according video databases
* :ref:`bob.bio.gmm <bob.bio.gmm>` Algorithms based on Gaussian Mixture Modeling (GMM) such as Inter-Session Variability modeling (ISV) or Total Variability modeling (TV, aka. I-Vector)
* `bob.bio.csu <http://pypi.python.org/pypi/bob.bio.csu>`__ for wrapper classes of the `CSU Face Recognition Resources <http://www.cs.colostate.edu/facerec>`__ (see `Installation Instructions <http://pythonhosted.org/bob.bio.csu/installation.html>`__ of ``bob.bio.csu``).
If you are interested, please continue reading:
=========== ===========
...@@ -49,10 +19,7 @@ Users Guide ...@@ -49,10 +19,7 @@ Users Guide
.. toctree:: .. toctree::
:maxdepth: 2 :maxdepth: 2
installation fusion
experiments
implementation
more
================ ================
Reference Manual Reference Manual
...@@ -61,7 +28,6 @@ Reference Manual ...@@ -61,7 +28,6 @@ Reference Manual
.. toctree:: .. toctree::
:maxdepth: 2 :maxdepth: 2
implemented
py_api py_api
...@@ -69,13 +35,6 @@ Reference Manual ...@@ -69,13 +35,6 @@ Reference Manual
References References
========== ==========
.. [TP91] *M. Turk and A. Pentland*. **Eigenfaces for recognition**. Journal of Cognitive Neuroscience, 3(1):71-86, 1991.
.. [ZKC+98] *W. Zhao, A. Krishnaswamy, R. Chellappa, D. Swets and J. Weng*. **Discriminant analysis of principal components for face recognition**, pages 73-85. Springer Verlag Berlin, 1998.
.. [Pri07] *S. J. D. Prince*. **Probabilistic linear discriminant analysis for inferences about identity**. Proceedings of the International Conference on Computer Vision. 2007.
.. [ESM+13] *L. El Shafey, Chris McCool, Roy Wallace and Sébastien Marcel*. **A scalable formulation of probabilistic linear discriminant analysis: applied to face recognition**. IEEE Transactions on Pattern Analysis and Machine Intelligence, 35(7):1788-1794, 7/2013.
.. [MWP98] *B. Moghaddam, W. Wahid and A. Pentland*. **Beyond eigenfaces: probabilistic matching for face recognition**. IEEE International Conference on Automatic Face and Gesture Recognition, pages 30-35. 1998.
.. [GW09] *M. Günther and R.P. Würtz*. **Face detection and recognition using maximum likelihood classifiers on Gabor graphs**. International Journal of Pattern Recognition and Artificial Intelligence, 23(3):433-461, 2009.
========= =========
ToDo-List ToDo-List
...@@ -85,7 +44,7 @@ This documentation is still under development. ...@@ -85,7 +44,7 @@ This documentation is still under development.
Here is a list of things that needs to be done: Here is a list of things that needs to be done:
.. todolist:: .. todolist::
testing
================== ==================
Indices and tables Indices and tables
......
.. vim: set fileencoding=utf-8 :
.. author: Manuel Günther <manuel.guenther@idiap.ch>
.. date: Thu Sep 20 11:58:57 CEST 2012
.. _bob.bio.base.installation:
=========================
Installation Instructions
=========================
As noted before, this package is part of the ``bob.bio`` packages, which in turn are part of the signal-processing and machine learning toolbox Bob_.
To install `Packages of Bob <https://github.com/idiap/bob/wiki/Packages>`_, please read the `Installation Instructions <https://github.com/idiap/bob/wiki/Installation>`_.
For Bob_ to be able to work properly, some dependent packages are required to be installed.
Please make sure that you have read the `Dependencies <https://github.com/idiap/bob/wiki/Dependencies>`_ for your operating system.
.. note::
Currently, running Bob_ under MS Windows in not yet supported.
However, we found that running Bob_ in a virtual Unix environment such as the one provided by VirtualBox_ is a good alternative.
The most simple and most convenient way to use the ``bob.bio`` tools is to use a ``zc.buildout`` package, as explained in more detail `here <https://github.com/idiap/bob/wiki/Installation#using-zcbuildout-for-production>`__.
There, in the ``eggs`` section of the ``buildout.cfg`` file, simply list the ``bob.bio`` packages that you want, like:
.. code-block:: python
eggs = bob.bio.base
bob.bio.face
bob.bio.gmm
bob.bio.video
bob.db.youtube
gridtk
in order to download and install all packages that are required for your experiments.
In the example above, you might want to run a video face recognition experiments using the :py:class:`bob.bio.face.preprocessor.FaceDetector` and the :py:class:`bob.bio.face.extractor.DCTBlocks` feature extractor defined in :ref:`bob.bio.face <bob.bio.face>`, the :py:class:`bob.bio.gmm.algorithm.IVector` algorithm defined in :ref:`bob.bio.gmm <bob.bio.gmm>` and the video extensions defined in :ref:`bob.bio.video <bob.bio.video>`, using the YouTube faces database interface defined in :ref:`bob.db.youtube <bob.db.youtube>`.
Running the simple command line:
.. code-block:: sh
$ python bootstrap-buildout.py
$ ./bin/buildout
will the download and install all dependent packages locally (relative to your current working directory), and create a ``./bin`` directory containing all the necessary scripts to run the experiments.
Databases
~~~~~~~~~
With ``bob.bio`` you will run biometric recognition experiments using some default biometric recognition databases.
Though the verification protocols are implemented in ``bob.bio``, the original data are **not included**.
To download the original data of the databases, please refer to the according Web-pages.
For a list of supported databases including their download URLs, please refer to the :ref:`verification_databases`.
After downloading the original data for the databases, you will need to tell ``bob.bio``, where these databases can be found.
For this purpose, we have decided to implement a special file, where you can set your directories.
By default, this file is located in ``~/.bob_bio_databases.txt``, and it contains several lines, each line looking somewhat like:
.. code-block:: text
[YOUR_ATNT_DIRECTORY] = /path/to/your/directory
.. note::
If this file does not exist, feel free to create and populate it yourself.
Please use ``./bin/databases.py`` for a list of known databases, where you can see the raw ``[YOUR_DATABASE_PATH]`` entries for all databases that you haven't updated, and the corrected paths for those you have.
.. note::
If you have installed only ``bob.bio.base``, there is no database listed -- as all databases are included in other packages, such as :ref:`bob.bio.face <bob.bio.face>` or :ref:`bob.bio.spear <bob.bio.spear>`.
Test your Installation
~~~~~~~~~~~~~~~~~~~~~~
One of the scripts that were generated during the bootstrap/buildout step is a test script.
To verify your installation, you should run the script running the nose tests for each of the ``bob.bio`` packages:
.. code-block:: sh
$ ./bin/nosetests -vs bob.bio.base
$ ./bin/nosetests -vs bob.bio.gmm
...
Some of the tests that are run require the images of the `AT&T database`_ database.
If the database is not found on your system, it will automatically download and extract the `AT&T database`_ a temporary directory, **which will not be erased**.
To avoid the download to happen each time you call the nose tests, please:
1. Download the `AT&T database`_ database and extract it to the directory of your choice.
2. Set an environment variable ``ATNT_DATABASE_DIRECTORY`` to the directory, where you extracted the database to.
For example, in a ``bash`` you can call:
.. code-block:: sh
$ export ATNT_DATABASE_DIRECTORY=/path/to/your/copy/of/atnt
.. note::
To set the directory permanently, you can also change the ``atnt_default_directory`` in the file `bob/bio/base/test/utils.py <file:../bob/bio/base/test/utils.py>`_.
In this case, there is no need to set the environment variable any more.
In case any of the tests fail for unexplainable reasons, please file a bug report through the `GitHub bug reporting system`_.
.. note::
Usually, all tests should pass with the latest stable versions of the Bob_ packages.
In other versions, some of the tests may fail.
Generate this documentation
~~~~~~~~~~~~~~~~~~~~~~~~~~~
Generally, the documentation of this package is `available online <http://pythonhosted.org/bob.bio.base>`__, and this should be your preferred resource.
However, to generate this documentation locally, you call:
.. code-block:: sh
$ ./bin/sphinx-build doc sphinx
Afterward, the documentation is available and you can read it, e.g., by using:
.. code-block:: sh
$ firefox sphinx/index.html
.. _buildout.cfg: file:../buildout.cfg
.. include:: links.rst
.. vim: set fileencoding=utf-8 :
.. author: Manuel Günther <manuel.guenther@idiap.ch>
.. date: Thu Sep 20 11:58:57 CEST 2012
==============================
More about Running Experiments
==============================
Now that we have learned the implementation details, we can have a closer look into how experiments can be parametrized.
.. _running_part_2:
Running Experiments (part II)
-----------------------------
As mentioned before, running biometric recognition experiments can be achieved using the ``./bin/verify.py`` command line.
In section :ref:`running_part_1`, we have used registered resources to run an experiment.
However, the command line options of ``./bin/verify.py`` is more flexible, as you can have three different ways of defining tools:
1. Choose a resource (see ``./bin/resources.py`` or ``./bin/verify.py --help`` for a list of registered resources):
.. code-block:: sh
$ ./bin/verify.py --algorithm pca
2. Use a configuration file. Make sure that your configuration file has the correct variable name:
.. code-block:: sh
$ ./bin/verify.py --algorithm bob/bio/base/config/algorithm/pca.py
3. Instantiate a class on the command line. Usually, quotes ``"..."`` are required, and the ``--imports`` need to be specified:
.. code-block:: sh
$ ./bin/verify.py --algorithm "bob.bio.base.algorithm.PCA(subspace_dimension = 30, distance_function = scipy.spatial.distance.euclidean, is_distance_function = True)" --imports bob.bio.base scipy.spatial
All these three ways can be used for any of the five command line options: ``--database``, ``--preprocessor``, ``--extractor``, ``--algorithm`` and ``--grid``.
You can even mix these three types freely in a single command line.
Score Level Fusion of Different Algorithms on the same Database
---------------------------------------------------------------
In several of our publications, we have shown that the combination of several biometric recognition algorithms is able to outperform each single algorithm.
This is particularly true, when the algorithms rely on different kind of data, e.g., we have `fused face and speaker recognition system on the MOBIO database <http://publications.idiap.ch/index.php/publications/show/2688>`__.
As long as several algorithms are executed on the same database, we can simply generate a fusion system by using the ``./bin/fuse_scores.py`` script, generating a new score file:
.. code-block:: sh
$ ./bin/fuse_scores.py --dev
This computation is based on the :py:class:`bob.learn.linear.CGLogRegTrainer`, which is trained on the scores of the development set files (``--dev-files``) for the given systems.
Afterwards, the fusion is applied to the ``--dev-files`` and the resulting score file is written to the file specified by ``--fused-dev-file``.
If ``--eval-files`` are specified, the same fusion that is trained on the development set is now applied to the evaluation set as well, and the ``--fused-eval-file`` is written.
.. note::
When ``--eval-files`` are specified, they need to be in the same order as the ``dev-files``, otherwise the result is undefined.
The resulting ``--fused-dev-file`` and ``fused-eval-file`` can then be evaluated normally, e.g., using the ``./bin/evaluate.py`` script.
.. _grid-search:
Finding the Optimal Configuration
---------------------------------
Sometimes, configurations of tools (preprocessors, extractors or algorithms) are highly dependent on the database or even the employed protocol.
Additionally, configuration parameters depend on each other.
``bob.bio`` provides a relatively simple set up that allows to test different configurations in the same task, and find out the best set of configurations.
For this, the ``./bin/grid_search.py`` script can be employed.
This script executes a configurable series of experiments, which reuse data as far as possible.
Please check out ``./bin/grid_search.py --help`` for a list of command line options.
The Configuration File
~~~~~~~~~~~~~~~~~~~~~~
The most important parameter to the ``./bin/grid_search.py`` is the ``--configuration-file``.
In this configuration file it is specified, which parameters of which part of the algorithms will be tested.
An example for a configuration file can be found in the test scripts: ``bob/bio/base/test/dummy/grid_search.py``.
The configuration file is a common python file, which can contain certain variables:
1. ``preprocessor =``
2. ``extractor =``
3. ``algorithm =``
4. ``replace =``
5. ``requirement =``
6. ``imports =``
The variables from 1. to 3. usually contain instantiations for classes of :ref:`bob.bio.base.preprocessors`, :ref:`bob.bio.base.extractors` and :ref:`bob.bio.base.algorithms`, but also registered :ref:`bob.bio.base.resources` can be used.
For any of the parameters of the classes, a *placeholder* can be put.
By default, these place holders start with a # character, followed by a digit or character.
The variables 1. to 3. can also be overridden by the command line options ``--preprocessor``, ``--extractor`` and ``--algorithm`` of the ``./bin/grid_search.py`` script.
The ``replace`` variable has to be set as a dictionary.
In it, you can define with which values your place holder key should be filled, and in which step of the tool chain execution this should happen.
The steps are ``'preprocess'``, ``'extract'``, ``'project'``, ``'enroll'`` and ``'score'``.
For each of the steps, it can be defined, which placeholder should be replaced by which values.
To be able to differentiate the results later on, each of the replacement values is bound to a directory name.
The final structure looks somewhat like that:
.. code-block:: python
replace = {
step1 : {
'#a' : {
'Dir_a1' : 'Value_a1',
'Dir_a2' : 'Value_a2'
},
'#b' : {
'Dir_b1' : 'Value_b1',
'Dir_b2' : 'Value_b2'
}
},
step2 : {
'#c' : {
'Dir_c1' : 'Value_c1',
'Dir_c2' : 'Value_c2'
}
}
}
Of course, more than two values can be selected.
In the above example, the results of the experiments will be placed into a directory structure as ``results/[...]/Dir_a1/Dir_b1/Dir_c1/[...]``.
.. note::
Please note that we are using a dictionary structure to define the replacements.
Hence, the order of the directories inside the same step might not be in the same order as written in the configuration file.
For the above example, a directory structure of `results/[...]/Dir_b1/Dir_a1/Dir_c1/[...]`` might be possible as well.
Additionally, tuples of place holders can be defined, in which case always the full tuple will be replaced in one shot.
Continuing the above example, it is possible to add:
.. code-block:: python
...
step3 : {
'(#d,#e)' : {
'Dir_de1' : ('Value_d1', 'Value_e1'),
'Dir_de2' : ('Value_d2', 'Value_e2')
}
}
.. warning::
*All possible combinations* of the configuration parameters are tested, which might result in a *huge number of executed experiments*.
Some combinations of parameters might not make any sense.
In this case, a set of requirements on the parameters can be set, using the ``requirement`` variable.
In the requirements, any string including any placeholder can be put that can be evaluated using pythons ``eval`` function:
.. code-block:: python
requirement = ['#a > #b', '2*#c != #a', ...]
Finally, when any of the classes or variables need to import a certain python module, it needs to be declared in the ``imports`` variable.
If you, e.g., test, which ``scipy.spatial`` distance function works best for your features, please add the imports (and don't forget the ``bob.bio.base`` and other ``bob.bio`` packages in case you use their tools):
.. code-block:: python
imports = ['scipy', 'bob.bio.base', 'bob.bio.face']
Further Command Line Options
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The ``./bin/grid_search.py`` script has a further set of command line options.
- The ``--database`` and the ``--protocol`` define, which database and (optionally) which protocol should be used.
- The ``--sub-directory`` is similar to the one in the ``./bin/verify.py``.
- ``--result-directory`` and ``--temp-directory`` specify directories to write results and temporary files into. Defaults are ``./results/grid_search`` and ``./temp/grid_search`` in the current directory. Make sure that the ``--temp-directory`` can store sufficient amount of data.
- The ``--preprocessor``, ``--extractor`` and ``--algorithm`` can be used to override the ``preprocessor``, ``extractor`` and ``algorithm`` fields in the configuration file (in which case the configuration file does not need to contain these variables).
- The ``--grid`` option can select the SGE_ configuration.
- The ``--parallel`` option can run on the local machine using the given number of parallel threads.
- The ``--preprocessed-directory`` can be used to select a directory of previously preprocessed data. This should not be used in combination with testing different preprocessor parameters.
- The ``--gridtk-database-directory`` can be used to select another directory, where the ``submitted.sql3`` files will be stored.
- Sometimes, the gridtk databases grow, and are too large for holding all experiments. Using the ``--gridtk-database-split-level``, databases can be split at the desired level.
- The ``--write-commands`` directory can be selected to write the executed commands into (this is useful in case some experiments fail and need to be rerun).
- The ``--dry-run`` flag should always be used before the final execution to see if the experiment definition works as expected.
- The ``--skip-when-existent`` flag will only execute the experiments that have not yet finished (i.e., where the resulting score files are not produced yet).
- With the ``--executable`` flag, you might select a different script rather that ``bob.bio.base.script.verify`` to run the experiments (such as the ``bob.bio.gmm.script.verify_gmm``).
- Finally, additional options might be sent to the ``./bin/verify.py`` script directly. These options might be put after a ``--`` separation.
Evaluation of Results
~~~~~~~~~~~~~~~~~~~~~
To evaluate a series of experiments, a special script iterates through all the results and computes EER on the development set and HTER on the evaluation set, for both the ``nonorm`` and the ``ztnorm`` directories.
Simply call:
.. code-block:: sh
$ ./bin/collect_results.py -vv --directory [result-base-directory] --sort
This will iterate through all result files found in ``[result-base-directory]`` and sort the results according to the EER on the development set (the sorting criterion can be modified using the ``--criterion`` and the ``--sort-key`` comamnd line options).
Hence, to find the best results of your grid search experiments (with default directories), simply run:
.. code-block:: sh
$ ./bin/collect_results.py -vv --directory results/grid_search --sort --criterion EER --sort-key nonorm-dev
.. include:: links.rst
=========================== ===========================
Python API for bob.bio.base Python API for bob.fusion.base
=========================== ===========================
Generic functions
-----------------
IO-related functions
~~~~~~~~~~~~~~~~~~~~
.. autosummary::
bob.bio.base.load
bob.bio.base.save
bob.bio.base.load_compressed
bob.bio.base.save_compressed
bob.bio.base.open_compressed
bob.bio.base.close_compressed
bob.bio.base.check_file
Functions dealing with resources
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autosummary::
bob.bio.base.load_resource
bob.bio.base.read_config_file
bob.bio.base.resource_keys
bob.bio.base.extensions
bob.bio.base.valid_keywords
Miscellaneous functions
~~~~~~~~~~~~~~~~~~~~~~~
.. autosummary::
bob.bio.base.get_config
bob.bio.base.score_fusion_strategy
bob.bio.base.selected_elements
bob.bio.base.selected_indices
Tools to run recognition experiments
------------------------------------
Command line generation
~~~~~~~~~~~~~~~~~~~~~~~
.. autosummary::
bob.bio.base.tools.command_line_parser
bob.bio.base.tools.initialize
bob.bio.base.tools.command_line
bob.bio.base.tools.write_info
bob.bio.base.tools.FileSelector
Controlling of elements
~~~~~~~~~~~~~~~~~~~~~~~
.. autosummary::
bob.bio.base.tools.groups
bob.bio.base.tools.indices
Preprocessing
~~~~~~~~~~~~~
.. autosummary::
bob.bio.base.tools.preprocess
bob.bio.base.tools.read_preprocessed_data
Feature Extraction
~~~~~~~~~~~~~~~~~~
.. autosummary::
bob.bio.base.tools.train_extractor
bob.bio.base.tools.extract
bob.bio.base.tools.read_features
Algorithm
~~~~~~~~~
.. autosummary::
bob.bio.base.tools.train_projector
bob.bio.base.tools.project
bob.bio.base.tools.train_enroller
bob.bio.base.tools.enroll
Scoring
~~~~~~~
.. autosummary::
bob.bio.base.tools.compute_scores
bob.bio.base.tools.concatenate
bob.bio.base.tools.calibrate
Details Details
------- -------
.. automodule:: bob.bio.base
.. attribute:: valid_keywords
Valid keywords, for which resources are defined, are ``('database', 'preprocessor', 'extractor', 'algorithm', 'grid')``
.. automodule:: bob.bio.base.tools
.. autoclass:: FileSelector
.. include:: links.rst .. include:: links.rst
...@@ -6,6 +6,6 @@ bob.measure ...@@ -6,6 +6,6 @@ bob.measure
bob.learn.activation bob.learn.activation
bob.learn.mlp bob.learn.mlp
bob.bio.base bob.bio.base
scikit-learn scikit-learn # Used for classifiers and pre-processors.
matplotlib # for plotting matplotlib # for plotting
docopt # for plotting script docopt # for plotting script
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment