From 82413510beb4917b927f8fe606a6c4f3dcafa55a Mon Sep 17 00:00:00 2001
From: Tiago Freitas Pereira <>
Date: Tue, 15 May 2018 18:07:19 +0200
Subject: [PATCH] Created the Baselines Concept

Created the Baselines Concept

Organized the documentation

Documented the helps

Added the baseline in the resources search

Removed some unused imports

Solved discussion

Solving other discussions
 bob/bio/base/            |  1 +
 bob/bio/base/baseline/   | 32 ++++++++++++
 bob/bio/base/baseline/   | 43 ++++++++++++++++
 bob/bio/base/script/     | 79 +++++++++++++++++++++++++++++
 bob/bio/base/script/    |  8 ++-
 bob/bio/base/test/dummy/ | 15 ++++++
 bob/bio/base/test/ | 21 ++++++++
 bob/bio/base/utils/     |  2 +-
 doc/baseline.rst                    | 77 ++++++++++++++++++++++++++++
 doc/index.rst                       |  1 +                            |  7 +++
 11 files changed, 283 insertions(+), 3 deletions(-)
 create mode 100644 bob/bio/base/baseline/
 create mode 100755 bob/bio/base/baseline/
 create mode 100644 bob/bio/base/script/
 create mode 100644 bob/bio/base/test/dummy/
 create mode 100644 bob/bio/base/test/
 create mode 100644 doc/baseline.rst

diff --git a/bob/bio/base/ b/bob/bio/base/
index 9ef81253..c69d5db3 100644
--- a/bob/bio/base/
+++ b/bob/bio/base/
@@ -6,6 +6,7 @@ from . import algorithm
 from . import tools
 from . import grid # only one file, not complete directory
 from . import annotator
+from . import baseline
 from . import script
 from . import test
diff --git a/bob/bio/base/baseline/ b/bob/bio/base/baseline/
new file mode 100644
index 00000000..bb3a9ee7
--- /dev/null
+++ b/bob/bio/base/baseline/
@@ -0,0 +1,32 @@
+#!/usr/bin/env python
+# vim: set fileencoding=utf-8 :
+# Tiago de Freitas Pereira <>
+class Baseline(object):
+    """
+    Base class to define baselines
+    A Baseline is composed by the triplet :any:``,
+    :any:`` and :any:``
+    Attributes
+    ----------
+      name: str
+        Name of the baseline. This name will be displayed in the command line interface
+      preprocessors: dict
+        Dictionary containing all possible preprocessors  
+      extractor: str
+        Registered resource or a config file containing the feature extractor
+      algorithm: str
+         Registered resource or a config file containing the algorithm
+    """
+    def __init__(self, name="", preprocessors=dict(), extractor="", algorithm="", **kwargs):
+        super(Baseline, self).__init__(**kwargs)
+ = name
+        self.preprocessors = preprocessors
+        self.extractor = extractor
+        self.algorithm = algorithm
diff --git a/bob/bio/base/baseline/ b/bob/bio/base/baseline/
new file mode 100755
index 00000000..12d62737
--- /dev/null
+++ b/bob/bio/base/baseline/
@@ -0,0 +1,43 @@
+from .Baseline import Baseline
+def get_available_databases():
+    """
+    Get all the available databases through the database entry-points
+    """
+    available_databases = dict()
+    all_databases ='database', strip=[])
+    for database in all_databases:        
+        try:               
+            database_entry_point =, 'database')
+            available_databases[database] = dict()
+            # Checking if the database has data for the ZT normalization
+            available_databases[database]["has_zt"] = hasattr(database_entry_point, "zobjects") and hasattr(database_entry_point, "tobjects")
+            available_databases[database]["groups"] = []
+            # Searching for database groups
+            try:
+                groups = list(database_entry_point.groups())
+                for g in ["dev", "eval"]:
+                    available_databases[database]["groups"] += [g] if g in groups else []
+            except:
+                # In case the method groups is not implemented
+                available_databases[database]["groups"] = ["dev"]
+        except:
+            pass
+    return available_databases
+def get_config():
+  """Returns a string containing the configuration information.
+  """
+  import bob.extension
+  return bob.extension.get_config(__name__)
+# gets sphinx autodoc done right - don't remove it
+__all__ = [_ for _ in dir() if not _.startswith('_')]
diff --git a/bob/bio/base/script/ b/bob/bio/base/script/
new file mode 100644
index 00000000..3b8be66b
--- /dev/null
+++ b/bob/bio/base/script/
@@ -0,0 +1,79 @@
+#!/usr/bin/env python
+# vim: set fileencoding=utf-8 :
+# Tiago de Freitas Pereira <>
+This script runs some face recognition baselines under some face databases
+This command line will run the facenet from David Sandberg using the ATnT dataset:
+  `bob bio baseline --baseline facenet_msceleba_inception_v1 --database atnt`
+import os
+from import main as verify
+from import get_available_databases
+from bob.extension.scripts.click_helper import (
+    verbosity_option, ConfigCommand, ResourceOption)
+import click
+@click.command(entry_point_group='', cls=ConfigCommand)
+@click.option('--database', '-d', required=True, cls=ResourceOption, help="Registered database. Check it out ` --types database` for ready to be used databases")
+@click.option('--baseline', '-b', required=True, cls=ResourceOption, help="Registered baseline. Check it out ` --types baseline` for ready to be used baseline")
+@click.option('--temp-dir', '-T', required=False, cls=ResourceOption, help="The directory for temporary files")
+@click.option('--result-dir', '-R', required=False, cls=ResourceOption, help="The directory for resulting score files")
+@click.option('--grid', '-g', help="Execute the algorithm in the SGE grid.", is_flag=True)
+@click.option('--zt-norm', '-z', help="Enable the computation of ZT norms (if the database supports it).", is_flag=True)
+def baseline(baseline, database, temp_dir, result_dir, grid, zt_norm, **kwargs):
+    """
+    Run a biometric recognition baselines
+    Check it out all baselines available by typing ` --types baseline`
+    """
+    def search_preprocessor(key, keys):
+        """
+        Wrapper that searches for preprocessors for specific databases.
+        If not found, the default preprocessor is returned
+        """
+        for k in keys:
+            if key.startswith(k):
+                return k
+        else:
+            return "default"
+    # Triggering training for each baseline/database    
+    loaded_baseline =, 'baseline', package_prefix="")
+    # this is the default sub-directory that is used
+    sub_directory = os.path.join(database, baseline)
+    database_data = get_available_databases()[database]
+    parameters = [
+        '-p', loaded_baseline.preprocessors[search_preprocessor(database, loaded_baseline.preprocessors.keys())],
+        '-e', loaded_baseline.extractor,
+        '-d', database,
+        '-a', loaded_baseline.algorithm,
+        '-vvv',
+        '--temp-directory', temp_dir,
+        '--result-directory', result_dir,
+        '--sub-directory', sub_directory
+    ]
+    parameters += ['--groups'] + database_data["groups"]
+    if grid:
+        parameters += ['-g', 'demanding']
+    if zt_norm and 'has_zt' in database_data:
+        parameters += ['--zt-norm']
+    verify(parameters)
diff --git a/bob/bio/base/script/ b/bob/bio/base/script/
index c68d6f22..f8f56451 100644
--- a/bob/bio/base/script/
+++ b/bob/bio/base/script/
@@ -9,8 +9,8 @@ def resources(command_line_parameters = None):
   import argparse
   parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.ArgumentDefaultsHelpFormatter)
   parser.add_argument("--types", '-t', nargs = '+',
-                      choices = ('d', 'database', 'p', 'preprocessor', 'e', 'extractor', 'a', 'algorithm', 'g', 'grid', 'c', 'config', 'an', 'annotator'),
-                      default = ('d', 'p', 'e', 'a', 'g', 'c', 'an'),
+                      choices = ('d', 'database', 'p', 'preprocessor', 'e', 'extractor', 'a', 'algorithm', 'g', 'grid', 'c', 'config', 'an', 'annotator', 'b', 'baseline'),
+                      default = ('d', 'p', 'e', 'a', 'g', 'c', 'an', 'b'),
                       help = "Select the resource types that should be listed.")
   parser.add_argument("--details", '-d', action='store_true', help = "Prints the complete configuration for all resources")
@@ -55,6 +55,10 @@ def resources(command_line_parameters = None):
     print ("\nList of registered annotators:")
     print ('annotator', **kwargs))
+  if 'b' in args.types or 'baseline' in args.types:
+    print ("\nList of registered baseline:")
+    print ('baseline', **kwargs))
 def databases(command_line_parameters = None):
diff --git a/bob/bio/base/test/dummy/ b/bob/bio/base/test/dummy/
new file mode 100644
index 00000000..e52717ec
--- /dev/null
+++ b/bob/bio/base/test/dummy/
@@ -0,0 +1,15 @@
+from import Baseline
+import pkg_resources
+import os
+dummy_dir = pkg_resources.resource_filename('', 'test/dummy')
+class DummyBaseline(Baseline):
+    def __init__(self, **kwargs):
+        super(DummyBaseline, self).__init__(**kwargs)
+baseline = DummyBaseline(name="dummy", 
+                         preprocessors={"default": os.path.join(dummy_dir, '')},
+                         extractor=os.path.join(dummy_dir, ''),
+                         algorithm=os.path.join(dummy_dir, ''))
diff --git a/bob/bio/base/test/ b/bob/bio/base/test/
new file mode 100644
index 00000000..42760e9d
--- /dev/null
+++ b/bob/bio/base/test/
@@ -0,0 +1,21 @@
+import tempfile
+import shutil
+from click.testing import CliRunner
+from import baseline
+def test_baselines():
+    try:
+        tmp_dir = tempfile.mkdtemp(prefix="bobtest_")
+        runner = CliRunner()
+        result = runner.invoke(baseline, args=('-d', 'dummy', '-b', 'dummy', '-T', tmp_dir, '-R', tmp_dir))
+        assertion_error_message = (
+              'Command exited with this output: `{}\' \n'
+              'If the output is empty, you can run this script locally to see '
+              'what is wrong:\n'
+              'bin/bob bio baseline  -d dummy -a dummy -o /tmp/temp_annotations'
+              ''.format(result.output))
+        assert result.exit_code == 0, assertion_error_message
+    finally:
+        shutil.rmtree(tmp_dir)
diff --git a/bob/bio/base/utils/ b/bob/bio/base/utils/
index fa5e3c42..57a5b5a2 100644
--- a/bob/bio/base/utils/
+++ b/bob/bio/base/utils/
@@ -21,7 +21,7 @@ logger = logging.getLogger("")
 #: Keywords for which resources are defined.
-valid_keywords = ('database', 'preprocessor', 'extractor', 'algorithm', 'grid', 'config', 'annotator')
+valid_keywords = ('database', 'preprocessor', 'extractor', 'algorithm', 'grid', 'config', 'annotator', 'baseline')
 def _collect_config(paths):
diff --git a/doc/baseline.rst b/doc/baseline.rst
new file mode 100644
index 00000000..2996e7ba
--- /dev/null
+++ b/doc/baseline.rst
@@ -0,0 +1,77 @@
+Defining baselines
+Once you have a biometric system well established, tuned and working for a particular database (or a particular set of databases), you may want to provide **an easier to reproduce** way to share it.
+For this purpose, we defined something called baseline.
+A baseline is composed by the triplet :any:``, :any:`` and :any:``.
+First, check it out the baselines ready to be triggered in your environment by doing:
+.. code-block:: sh
+    $ bob bio baseline --help
+To create your own baseline, you just need to define it like in the recipe below:
+.. code-block:: py
+    from import Baseline
+    class DummyBaseline(Baseline):
+        def __init__(self):
+            self.preprocessors = dict() # SHOULD BE DEFINED AS A DICTIONARY
+            self.preprocessors["default"] = 'my-preprocessor'
+            self.extractor = 'my-extractor'
+            self.algorithm = 'my-algorithm'
+    baseline = DummyBaseline()
+Some databases may require some especific preprocessors depending on the type of meta-informations provided.
+For instance, for some face recognition databases, faces should be cropped in a particular way depending on the annotations provided. 
+To approach this issue, the preprocessors are defined in a dictionary, with a generic preprocessor defined as **default** and the database specific preprocessor defined by database name as in the example below:
+.. code-block:: py
+    self.preprocessors = dict()
+    self.preprocessors["default"] = 'my-preprocessor'
+    self.preprocessors["database_name"] = 'my-specific-preprocessor'
+Follow below a full example on how to define a baseline with database specific preprocessors.
+.. code-block:: py
+    from import Baseline
+    class AnotherBaseline(Baseline):
+        def __init__(self):
+            self.preprocessors = dict() # SHOULD BE DEFINED AS A DICTIONARY
+            self.preprocessors["default"] = 'my-preprocessor'
+            self.preprocessors["database_name"] = 'my-specific-preprocessor'
+            self.extractor = 'my-extractor'
+            self.algorithm = 'my-algorithm'
+    baseline = AnotherBaseline()
+.. note::
+   The triplet can be a resource or a configuration file.
+   This works in the same way as in :ref:`Running Experiments <running_part_1>`.
+.. note::
+  Baselines are also registered as resources under the keyworkd ``. 
+You can find the list of readily available baselines using the ```` command:
+.. code-block:: sh
+    $ --types baseline
diff --git a/doc/index.rst b/doc/index.rst
index 2b966a5d..8e1e3a67 100644
--- a/doc/index.rst
+++ b/doc/index.rst
@@ -75,6 +75,7 @@ Users Guide
+   baseline
diff --git a/ b/
index 58dbbfcf..e488a1c3 100644
--- a/
+++ b/
@@ -147,12 +147,19 @@ setup(
         'dir               =',
         'gen               =',
         'evaluate          =',
+        'baseline          =',
       # annotators
       '': [
         'dummy             =',
+      #baselines
+      '':[
+        'dummy =',
+      ],
     # Classifiers are important if you plan to distribute this package through