From 82413510beb4917b927f8fe606a6c4f3dcafa55a Mon Sep 17 00:00:00 2001
From: Tiago Freitas Pereira <tiagofrepereira@gmail.com>
Date: Tue, 15 May 2018 18:07:19 +0200
Subject: [PATCH] Created the Baselines Concept

Created the Baselines Concept

Organized the documentation

Documented the helps

Added the baseline in the resources search

Removed some unused imports

Solved discussion

Solving other discussions
---
 bob/bio/base/__init__.py            |  1 +
 bob/bio/base/baseline/Baseline.py   | 32 ++++++++++++
 bob/bio/base/baseline/__init__.py   | 43 ++++++++++++++++
 bob/bio/base/script/baseline.py     | 79 +++++++++++++++++++++++++++++
 bob/bio/base/script/resources.py    |  8 ++-
 bob/bio/base/test/dummy/baseline.py | 15 ++++++
 bob/bio/base/test/test_baselines.py | 21 ++++++++
 bob/bio/base/utils/resources.py     |  2 +-
 doc/baseline.rst                    | 77 ++++++++++++++++++++++++++++
 doc/index.rst                       |  1 +
 setup.py                            |  7 +++
 11 files changed, 283 insertions(+), 3 deletions(-)
 create mode 100644 bob/bio/base/baseline/Baseline.py
 create mode 100755 bob/bio/base/baseline/__init__.py
 create mode 100644 bob/bio/base/script/baseline.py
 create mode 100644 bob/bio/base/test/dummy/baseline.py
 create mode 100644 bob/bio/base/test/test_baselines.py
 create mode 100644 doc/baseline.rst

diff --git a/bob/bio/base/__init__.py b/bob/bio/base/__init__.py
index 9ef81253..c69d5db3 100644
--- a/bob/bio/base/__init__.py
+++ b/bob/bio/base/__init__.py
@@ -6,6 +6,7 @@ from . import algorithm
 from . import tools
 from . import grid # only one file, not complete directory
 from . import annotator
+from . import baseline
 
 from . import script
 from . import test
diff --git a/bob/bio/base/baseline/Baseline.py b/bob/bio/base/baseline/Baseline.py
new file mode 100644
index 00000000..bb3a9ee7
--- /dev/null
+++ b/bob/bio/base/baseline/Baseline.py
@@ -0,0 +1,32 @@
+#!/usr/bin/env python
+# vim: set fileencoding=utf-8 :
+# Tiago de Freitas Pereira <tiago.pereira@idiap.ch>
+
+
+class Baseline(object):
+    """
+    Base class to define baselines
+
+    A Baseline is composed by the triplet :any:`bob.bio.base.preprocessor.Preprocessor`,
+    :any:`bob.bio.base.extractor.Extractor` and :any:`bob.bio.base.algorithm.Algorithm`
+
+    Attributes
+    ----------
+
+      name: str
+        Name of the baseline. This name will be displayed in the command line interface
+      preprocessors: dict
+        Dictionary containing all possible preprocessors  
+      extractor: str
+        Registered resource or a config file containing the feature extractor
+      algorithm: str
+         Registered resource or a config file containing the algorithm
+
+    """
+
+    def __init__(self, name="", preprocessors=dict(), extractor="", algorithm="", **kwargs):
+        super(Baseline, self).__init__(**kwargs)
+        self.name = name
+        self.preprocessors = preprocessors
+        self.extractor = extractor
+        self.algorithm = algorithm
diff --git a/bob/bio/base/baseline/__init__.py b/bob/bio/base/baseline/__init__.py
new file mode 100755
index 00000000..12d62737
--- /dev/null
+++ b/bob/bio/base/baseline/__init__.py
@@ -0,0 +1,43 @@
+from .Baseline import Baseline
+import bob.bio.base
+
+ 
+def get_available_databases():
+    """
+    Get all the available databases through the database entry-points
+    """
+    
+    available_databases = dict()
+    all_databases = bob.bio.base.resource_keys('database', strip=[])
+    for database in all_databases:        
+        try:               
+            database_entry_point = bob.bio.base.load_resource(database, 'database')
+
+            available_databases[database] = dict()
+
+            # Checking if the database has data for the ZT normalization
+            available_databases[database]["has_zt"] = hasattr(database_entry_point, "zobjects") and hasattr(database_entry_point, "tobjects")
+            available_databases[database]["groups"] = []
+            # Searching for database groups
+            try:
+                groups = list(database_entry_point.groups())
+                for g in ["dev", "eval"]:
+                    available_databases[database]["groups"] += [g] if g in groups else []
+            except:
+                # In case the method groups is not implemented
+                available_databases[database]["groups"] = ["dev"]
+        except:
+            pass
+    return available_databases
+
+
+def get_config():
+  """Returns a string containing the configuration information.
+  """
+
+  import bob.extension
+  return bob.extension.get_config(__name__)
+
+
+# gets sphinx autodoc done right - don't remove it
+__all__ = [_ for _ in dir() if not _.startswith('_')]
diff --git a/bob/bio/base/script/baseline.py b/bob/bio/base/script/baseline.py
new file mode 100644
index 00000000..3b8be66b
--- /dev/null
+++ b/bob/bio/base/script/baseline.py
@@ -0,0 +1,79 @@
+#!/usr/bin/env python
+# vim: set fileencoding=utf-8 :
+# Tiago de Freitas Pereira <tiago.pereira@idiap.ch>
+
+"""
+This script runs some face recognition baselines under some face databases
+
+Examples:
+
+This command line will run the facenet from David Sandberg using the ATnT dataset:
+  `bob bio baseline --baseline facenet_msceleba_inception_v1 --database atnt`
+  
+"""
+
+
+import bob.bio.base
+import bob.io.base
+import os
+from bob.bio.base.script.verify import main as verify
+from bob.bio.base.baseline import get_available_databases
+from bob.extension.scripts.click_helper import (
+    verbosity_option, ConfigCommand, ResourceOption)
+import click
+
+
+@click.command(entry_point_group='bob.bio.config', cls=ConfigCommand)
+@click.option('--database', '-d', required=True, cls=ResourceOption, help="Registered database. Check it out `resources.py --types database` for ready to be used databases")
+@click.option('--baseline', '-b', required=True, cls=ResourceOption, help="Registered baseline. Check it out `resources.py --types baseline` for ready to be used baseline")
+@click.option('--temp-dir', '-T', required=False, cls=ResourceOption, help="The directory for temporary files")
+@click.option('--result-dir', '-R', required=False, cls=ResourceOption, help="The directory for resulting score files")
+@click.option('--grid', '-g', help="Execute the algorithm in the SGE grid.", is_flag=True)
+@click.option('--zt-norm', '-z', help="Enable the computation of ZT norms (if the database supports it).", is_flag=True)
+@verbosity_option(cls=ResourceOption)
+
+def baseline(baseline, database, temp_dir, result_dir, grid, zt_norm, **kwargs):
+    """
+    Run a biometric recognition baselines
+
+    Check it out all baselines available by typing `resource.py --types baseline`
+
+    """
+
+    def search_preprocessor(key, keys):
+        """
+        Wrapper that searches for preprocessors for specific databases.
+        If not found, the default preprocessor is returned
+        """
+        for k in keys:
+            if key.startswith(k):
+                return k
+        else:
+            return "default"
+
+    # Triggering training for each baseline/database    
+    loaded_baseline = bob.bio.base.load_resource(baseline, 'baseline', package_prefix="bob.bio.")
+
+    # this is the default sub-directory that is used
+    sub_directory = os.path.join(database, baseline)
+    database_data = get_available_databases()[database]
+    parameters = [
+        '-p', loaded_baseline.preprocessors[search_preprocessor(database, loaded_baseline.preprocessors.keys())],
+        '-e', loaded_baseline.extractor,
+        '-d', database,
+        '-a', loaded_baseline.algorithm,
+        '-vvv',
+        '--temp-directory', temp_dir,
+        '--result-directory', result_dir,
+        '--sub-directory', sub_directory
+    ]
+    
+    parameters += ['--groups'] + database_data["groups"]
+
+    if grid:
+        parameters += ['-g', 'demanding']
+
+    if zt_norm and 'has_zt' in database_data:
+        parameters += ['--zt-norm']
+
+    verify(parameters)
diff --git a/bob/bio/base/script/resources.py b/bob/bio/base/script/resources.py
index c68d6f22..f8f56451 100644
--- a/bob/bio/base/script/resources.py
+++ b/bob/bio/base/script/resources.py
@@ -9,8 +9,8 @@ def resources(command_line_parameters = None):
   import argparse
   parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.ArgumentDefaultsHelpFormatter)
   parser.add_argument("--types", '-t', nargs = '+',
-                      choices = ('d', 'database', 'p', 'preprocessor', 'e', 'extractor', 'a', 'algorithm', 'g', 'grid', 'c', 'config', 'an', 'annotator'),
-                      default = ('d', 'p', 'e', 'a', 'g', 'c', 'an'),
+                      choices = ('d', 'database', 'p', 'preprocessor', 'e', 'extractor', 'a', 'algorithm', 'g', 'grid', 'c', 'config', 'an', 'annotator', 'b', 'baseline'),
+                      default = ('d', 'p', 'e', 'a', 'g', 'c', 'an', 'b'),
                       help = "Select the resource types that should be listed.")
 
   parser.add_argument("--details", '-d', action='store_true', help = "Prints the complete configuration for all resources")
@@ -55,6 +55,10 @@ def resources(command_line_parameters = None):
     print ("\nList of registered annotators:")
     print (bob.bio.base.list_resources('annotator', **kwargs))
 
+  if 'b' in args.types or 'baseline' in args.types:
+    print ("\nList of registered baseline:")
+    print (bob.bio.base.list_resources('baseline', **kwargs))
+
   print()
 
 def databases(command_line_parameters = None):
diff --git a/bob/bio/base/test/dummy/baseline.py b/bob/bio/base/test/dummy/baseline.py
new file mode 100644
index 00000000..e52717ec
--- /dev/null
+++ b/bob/bio/base/test/dummy/baseline.py
@@ -0,0 +1,15 @@
+from bob.bio.base.baseline import Baseline
+import pkg_resources
+import os
+
+
+dummy_dir = pkg_resources.resource_filename('bob.bio.base', 'test/dummy')
+class DummyBaseline(Baseline):
+
+    def __init__(self, **kwargs):
+        super(DummyBaseline, self).__init__(**kwargs)
+
+baseline = DummyBaseline(name="dummy", 
+                         preprocessors={"default": os.path.join(dummy_dir, 'preprocessor.py')},
+                         extractor=os.path.join(dummy_dir, 'extractor.py'),
+                         algorithm=os.path.join(dummy_dir, 'algorithm.py'))
diff --git a/bob/bio/base/test/test_baselines.py b/bob/bio/base/test/test_baselines.py
new file mode 100644
index 00000000..42760e9d
--- /dev/null
+++ b/bob/bio/base/test/test_baselines.py
@@ -0,0 +1,21 @@
+import tempfile
+import shutil
+from click.testing import CliRunner
+from bob.bio.base.script.baseline import baseline
+
+def test_baselines():
+
+    try:
+        tmp_dir = tempfile.mkdtemp(prefix="bobtest_")
+        runner = CliRunner()
+        result = runner.invoke(baseline, args=('-d', 'dummy', '-b', 'dummy', '-T', tmp_dir, '-R', tmp_dir))
+        assertion_error_message = (
+              'Command exited with this output: `{}\' \n'
+              'If the output is empty, you can run this script locally to see '
+              'what is wrong:\n'
+              'bin/bob bio baseline  -d dummy -a dummy -o /tmp/temp_annotations'
+              ''.format(result.output))
+        assert result.exit_code == 0, assertion_error_message
+  
+    finally:
+        shutil.rmtree(tmp_dir)
diff --git a/bob/bio/base/utils/resources.py b/bob/bio/base/utils/resources.py
index fa5e3c42..57a5b5a2 100644
--- a/bob/bio/base/utils/resources.py
+++ b/bob/bio/base/utils/resources.py
@@ -21,7 +21,7 @@ logger = logging.getLogger("bob.bio.base")
 
 
 #: Keywords for which resources are defined.
-valid_keywords = ('database', 'preprocessor', 'extractor', 'algorithm', 'grid', 'config', 'annotator')
+valid_keywords = ('database', 'preprocessor', 'extractor', 'algorithm', 'grid', 'config', 'annotator', 'baseline')
 
 
 def _collect_config(paths):
diff --git a/doc/baseline.rst b/doc/baseline.rst
new file mode 100644
index 00000000..2996e7ba
--- /dev/null
+++ b/doc/baseline.rst
@@ -0,0 +1,77 @@
+.. _bob.bio.base.baseline:
+
+==================
+Defining baselines
+==================
+
+
+Once you have a biometric system well established, tuned and working for a particular database (or a particular set of databases), you may want to provide **an easier to reproduce** way to share it.
+For this purpose, we defined something called baseline.
+
+A baseline is composed by the triplet :any:`bob.bio.base.preprocessor.Preprocessor`, :any:`bob.bio.base.extractor.Extractor` and :any:`bob.bio.base.algorithm.Algorithm`.
+
+First, check it out the baselines ready to be triggered in your environment by doing:
+
+.. code-block:: sh
+
+    $ bob bio baseline --help
+
+
+To create your own baseline, you just need to define it like in the recipe below:
+
+.. code-block:: py
+
+    from bob.bio.base.baseline import Baseline
+
+    class DummyBaseline(Baseline):
+
+        def __init__(self):
+
+            self.preprocessors = dict() # SHOULD BE DEFINED AS A DICTIONARY
+            self.preprocessors["default"] = 'my-preprocessor'
+            self.extractor = 'my-extractor'
+            self.algorithm = 'my-algorithm'
+    baseline = DummyBaseline()
+
+Some databases may require some especific preprocessors depending on the type of meta-informations provided.
+For instance, for some face recognition databases, faces should be cropped in a particular way depending on the annotations provided. 
+To approach this issue, the preprocessors are defined in a dictionary, with a generic preprocessor defined as **default** and the database specific preprocessor defined by database name as in the example below:
+
+.. code-block:: py
+
+    self.preprocessors = dict()
+    self.preprocessors["default"] = 'my-preprocessor'
+    self.preprocessors["database_name"] = 'my-specific-preprocessor'
+ 
+
+Follow below a full example on how to define a baseline with database specific preprocessors.
+
+.. code-block:: py
+
+    from bob.bio.base.baseline import Baseline
+
+    class AnotherBaseline(Baseline):
+
+        def __init__(self):
+
+            self.preprocessors = dict() # SHOULD BE DEFINED AS A DICTIONARY
+            self.preprocessors["default"] = 'my-preprocessor'
+            self.preprocessors["database_name"] = 'my-specific-preprocessor'
+            self.extractor = 'my-extractor'
+            self.algorithm = 'my-algorithm'
+    baseline = AnotherBaseline()
+
+.. note::
+
+   The triplet can be a resource or a configuration file.
+   This works in the same way as in :ref:`Running Experiments <running_part_1>`.
+
+.. note::
+
+  Baselines are also registered as resources under the keyworkd `bob.bio.baseline`. 
+
+You can find the list of readily available baselines using the ``resources.py`` command:
+
+.. code-block:: sh
+
+    $ resources.py --types baseline
diff --git a/doc/index.rst b/doc/index.rst
index 2b966a5d..8e1e3a67 100644
--- a/doc/index.rst
+++ b/doc/index.rst
@@ -75,6 +75,7 @@ Users Guide
    struct_bio_rec_sys
    experiments
    implementation
+   baseline
    filelist-guide
    more
    annotations
diff --git a/setup.py b/setup.py
index 58dbbfcf..e488a1c3 100644
--- a/setup.py
+++ b/setup.py
@@ -147,12 +147,19 @@ setup(
         'dir               = bob.bio.base.script.commands:dir',
         'gen               = bob.bio.base.script.gen:gen',
         'evaluate          = bob.bio.base.script.commands:evaluate',
+        'baseline          = bob.bio.base.script.baseline:baseline',
       ],
 
       # annotators
       'bob.bio.annotator': [
         'dummy             = bob.bio.base.test.dummy.annotator:annotator',
       ],
+
+      #baselines
+      'bob.bio.baseline':[
+        'dummy = bob.bio.base.test.dummy.baseline:baseline',
+      ],
+
    },
 
     # Classifiers are important if you plan to distribute this package through
-- 
GitLab