From a0022914eeb7630cd6aeedc749a419d84c7f6b2f Mon Sep 17 00:00:00 2001
From: Andre Anjos <andre.anjos@idiap.ch>
Date: Tue, 13 Sep 2016 15:49:55 +0200
Subject: [PATCH] [config_file] Implement multi-config readout

---
 bob/bio/base/script/resources.py      |   8 +-
 bob/bio/base/test/dummy/config.py     |   7 ++
 bob/bio/base/test/dummy/config2.py    |   2 +
 bob/bio/base/test/test_config_file.py | 104 ++++++++++++++++++++++++++
 bob/bio/base/tools/command_line.py    |   4 +-
 bob/bio/base/utils/resources.py       |  91 +++++++++++++++++++---
 develop.cfg                           |   1 +
 doc/implementation.rst                |   4 +-
 setup.py                              |   5 ++
 9 files changed, 209 insertions(+), 17 deletions(-)
 create mode 100644 bob/bio/base/test/dummy/config.py
 create mode 100644 bob/bio/base/test/dummy/config2.py

diff --git a/bob/bio/base/script/resources.py b/bob/bio/base/script/resources.py
index ffeaedac..15fc0211 100644
--- a/bob/bio/base/script/resources.py
+++ b/bob/bio/base/script/resources.py
@@ -9,8 +9,8 @@ def resources(command_line_parameters = None):
   import argparse
   parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.ArgumentDefaultsHelpFormatter)
   parser.add_argument("--types", '-t', nargs = '+',
-                      choices = ('d', 'database', 'p', 'preprocessor', 'e', 'extractor', 'a', 'algorithm', 'g', 'grid'),
-                      default = ('d', 'p', 'e', 'a', 'g'),
+                      choices = ('d', 'database', 'p', 'preprocessor', 'e', 'extractor', 'a', 'algorithm', 'g', 'grid', 'c', 'config'),
+                      default = ('d', 'p', 'e', 'a', 'g', 'c'),
                       help = "Select the resource types that should be listed.")
 
   parser.add_argument("--details", '-d', action='store_true', help = "Prints the complete configuration for all resources")
@@ -47,6 +47,10 @@ def resources(command_line_parameters = None):
     print ("\nList of registered grid configurations:")
     print (bob.bio.base.list_resources('grid', **kwargs))
 
+  if 'c' in args.types or 'config' in args.types:
+    print ("\nList of registered configurations:")
+    print (bob.bio.base.list_resources('config', **kwargs))
+
   print()
 
 def databases(command_line_parameters = None):
diff --git a/bob/bio/base/test/dummy/config.py b/bob/bio/base/test/dummy/config.py
new file mode 100644
index 00000000..c2e3b48a
--- /dev/null
+++ b/bob/bio/base/test/dummy/config.py
@@ -0,0 +1,7 @@
+from .database import database
+from .preprocessor import preprocessor
+from .extractor import extractor
+from .algorithm import algorithm
+zt_norm = True
+verbose = 1
+sub_directory = "test_dummy"
diff --git a/bob/bio/base/test/dummy/config2.py b/bob/bio/base/test/dummy/config2.py
new file mode 100644
index 00000000..ff740ebc
--- /dev/null
+++ b/bob/bio/base/test/dummy/config2.py
@@ -0,0 +1,2 @@
+verbose = 2
+sub_directory = "test_dummy2"
diff --git a/bob/bio/base/test/test_config_file.py b/bob/bio/base/test/test_config_file.py
index 5c851fc0..d00e1c33 100644
--- a/bob/bio/base/test/test_config_file.py
+++ b/bob/bio/base/test/test_config_file.py
@@ -227,3 +227,107 @@ def test_compare_to_cmdline_skip():
   finally:
     if test_dir: shutil.rmtree(test_dir)
     if test_config_file: del test_config_file
+
+
+def test_from_resource():
+
+  test_dir = None
+
+  try:
+    test_dir = tempfile.mkdtemp(prefix='bobtest_')
+    args = parse_arguments(['dummy'])
+
+    assert args.sub_directory.endswith('test_dummy')
+    assert args.allow_missing_files is False
+    assert args.zt_norm is True
+    assert args.verbose == 1
+
+    from bob.bio.base.test.dummy.database import DummyDatabase
+    assert isinstance(args.database, DummyDatabase)
+    from bob.bio.base.test.dummy.preprocessor import DummyPreprocessor
+    assert isinstance(args.preprocessor, DummyPreprocessor)
+    from bob.bio.base.test.dummy.extractor import DummyExtractor
+    assert isinstance(args.extractor, DummyExtractor)
+    from bob.bio.base.test.dummy.algorithm import DummyAlgorithm
+    assert isinstance(args.algorithm, DummyAlgorithm)
+
+  finally:
+    if test_dir: shutil.rmtree(test_dir)
+
+
+def test_from_module():
+
+  test_dir = None
+
+  try:
+    test_dir = tempfile.mkdtemp(prefix='bobtest_')
+    args = parse_arguments(['bob.bio.base.test.dummy.config'])
+
+    assert args.sub_directory.endswith('test_dummy')
+    assert args.allow_missing_files is False
+    assert args.zt_norm is True
+    assert args.verbose == 1
+
+    from bob.bio.base.test.dummy.database import DummyDatabase
+    assert isinstance(args.database, DummyDatabase)
+    from bob.bio.base.test.dummy.preprocessor import DummyPreprocessor
+    assert isinstance(args.preprocessor, DummyPreprocessor)
+    from bob.bio.base.test.dummy.extractor import DummyExtractor
+    assert isinstance(args.extractor, DummyExtractor)
+    from bob.bio.base.test.dummy.algorithm import DummyAlgorithm
+    assert isinstance(args.algorithm, DummyAlgorithm)
+
+  finally:
+    if test_dir: shutil.rmtree(test_dir)
+
+
+def test_order():
+
+  test_dir = None
+
+  try:
+    test_dir = tempfile.mkdtemp(prefix='bobtest_')
+    args = parse_arguments(['dummy', 'dummy2'])
+
+    assert args.sub_directory.endswith('test_dummy2')
+    assert args.allow_missing_files is False
+    assert args.zt_norm is True
+    assert args.verbose == 2
+
+    from bob.bio.base.test.dummy.database import DummyDatabase
+    assert isinstance(args.database, DummyDatabase)
+    from bob.bio.base.test.dummy.preprocessor import DummyPreprocessor
+    assert isinstance(args.preprocessor, DummyPreprocessor)
+    from bob.bio.base.test.dummy.extractor import DummyExtractor
+    assert isinstance(args.extractor, DummyExtractor)
+    from bob.bio.base.test.dummy.algorithm import DummyAlgorithm
+    assert isinstance(args.algorithm, DummyAlgorithm)
+
+  finally:
+    if test_dir: shutil.rmtree(test_dir)
+
+
+def test_order_inverse():
+
+  test_dir = None
+
+  try:
+    test_dir = tempfile.mkdtemp(prefix='bobtest_')
+    args = parse_arguments(['dummy2', 'dummy'])
+
+    assert args.sub_directory.endswith('test_dummy')
+    assert args.allow_missing_files is False
+    assert args.zt_norm is True
+    assert args.verbose == 1
+
+    from bob.bio.base.test.dummy.database import DummyDatabase
+    assert isinstance(args.database, DummyDatabase)
+    from bob.bio.base.test.dummy.preprocessor import DummyPreprocessor
+    assert isinstance(args.preprocessor, DummyPreprocessor)
+    from bob.bio.base.test.dummy.extractor import DummyExtractor
+    assert isinstance(args.extractor, DummyExtractor)
+    from bob.bio.base.test.dummy.algorithm import DummyAlgorithm
+    assert isinstance(args.algorithm, DummyAlgorithm)
+
+  finally:
+    if test_dir: shutil.rmtree(test_dir)
diff --git a/bob/bio/base/tools/command_line.py b/bob/bio/base/tools/command_line.py
index f88e8011..33f0ed66 100644
--- a/bob/bio/base/tools/command_line.py
+++ b/bob/bio/base/tools/command_line.py
@@ -47,7 +47,7 @@ def command_line_parser(description=__doc__, exclude_resources_from=[]):
   #######################################################################################
   ############## options that are required to be specified #######################
   config_group = parser.add_argument_group('\nParameters defining the experiment. Most of these parameters can be a registered resource, a configuration file, or even a string that defines a newly created object')
-  config_group.add_argument('configuration_file', metavar='PATH', nargs='?', help = 'A configuration file containing one or more of "database", "preprocessor", "extractor", "algorithm" and/or "grid"')
+  config_group.add_argument('configuration_file', metavar='PATH', nargs='*', help = 'A configuration file containing one or more of "database", "preprocessor", "extractor", "algorithm" and/or "grid"')
   config_group.add_argument('-d', '--database', metavar = 'x', nargs = '+',
       help = 'Database and the protocol; registered databases are: %s' % utils.resource_keys('database', exclude_resources_from))
   config_group.add_argument('-p', '--preprocessor', metavar = 'x', nargs = '+',
@@ -216,7 +216,7 @@ def initialize(parsers, command_line_parameters = None, skips = []):
   args = parser.parse_args(command_line_parameters)
 
   # first, read the configuration file and set everything from the config file to the args -- as long as not overwritten on command line
-  config = utils.read_config_file(args.configuration_file) if args.configuration_file is not None else None
+  config = utils.read_config_file(args.configuration_file) if args.configuration_file else None
   for keyword in ("database", "preprocessor", "extractor", "algorithm"):
     _take_from_config_or_command_line(args, config, keyword,
         parser.get_default(keyword))
diff --git a/bob/bio/base/utils/resources.py b/bob/bio/base/utils/resources.py
index 8be8e030..12fe0acb 100644
--- a/bob/bio/base/utils/resources.py
+++ b/bob/bio/base/utils/resources.py
@@ -20,10 +20,77 @@ logger = logging.getLogger("bob.bio.base")
 
 
 #: Keywords for which resources are defined.
-valid_keywords = ('database', 'preprocessor', 'extractor', 'algorithm', 'grid')
+valid_keywords = ('database', 'preprocessor', 'extractor', 'algorithm', 'grid', 'config')
 
-def read_config_file(filename, keyword = None):
-  """read_config_file(filename, keyword = None) -> config
+
+def _collect_config(paths):
+  '''Collect all python file resources into a module
+
+  This function recursively loads python modules (in a Python 3-compatible way)
+  so the last loaded module corresponds to the final state of the loading. In
+  this way, we load the first file, resolve its symbols, overwrite with the
+  second file and so on. We return a temporarily created module containing all
+  resolved variables, respecting the input order.
+
+
+  Parameters:
+
+    paths (list): A list of resources, modules or files (in order) to collect
+      resources from
+
+
+  Returns:
+
+    module: A valid Python module you can use to configure your tool
+
+  '''
+
+  def _attach_resources(src, dst):
+    for k in dir(src):
+      dst.__dict__[k] = getattr(src, k)
+
+  import random
+
+  name = "".join(random.sample(ascii_letters, 10))
+  retval = imp.new_module(name)
+
+  #loads used modules recursively, attach results to module to return
+  if len(paths) > 1:
+    deps = _collect_config(paths[:-1])
+    _attach_resources(deps, retval)
+
+  #execute the module code on the context of previously import modules
+  for ep in pkg_resources.iter_entry_points('bob.bio.config'):
+    if ep.name == paths[-1]:
+      tmp = ep.load() #loads the pointed module
+      _attach_resources(tmp, retval)
+      return retval
+
+  #if you get to this point, then it is not a resource, maybe it is a module?
+  try:
+    tmp = __import__(paths[-1], retval.__dict__, retval.__dict__, ['*'])
+    _attach_resources(tmp, retval)
+    return retval
+  except ImportError:
+    #module does not exist, ignore it
+    pass
+  except Exception as e:
+    raise IOError("The configuration module '%s' could not " \
+        "be loaded: %s" % (paths[-1], e))
+
+  #if you get to this point, then its not a resource nor a loadable module, is
+  #it on the file system?
+  if not os.path.exists(paths[-1]):
+    raise IOError("The configuration file, resource or module '%s' " \
+        "could not be found, loaded or imported" % paths[-1])
+
+  exec(compile(open(paths[-1], "rb").read(), paths[-1], 'exec'), retval.__dict__)
+
+  return retval
+
+
+def read_config_file(filenames, keyword = None):
+  """read_config_file(filenames, keyword = None) -> config
 
   Use this function to read the given configuration file.
   If a keyword is specified, only the configuration according to this keyword is returned.
@@ -31,8 +98,9 @@ def read_config_file(filename, keyword = None):
 
   **Parameters:**
 
-  filename : str
-    The name of the configuration file to read.
+  filenames : list
+    A list (pontentially empty) of configuration files or resources to read
+    running options from
 
   keyword : str or ``None``
     If specified, only the contents of the variable with the given name is returned.
@@ -45,19 +113,18 @@ def read_config_file(filename, keyword = None):
     Otherwise, the whole configuration is returned (as a local namespace).
   """
 
-  if not os.path.exists(filename):
-    raise IOError("The given configuration file '%s' could not be found" % filename)
+  if not filenames:
+    raise RuntimeError("At least one configuration file, resource or " \
+        "module name must be passed")
 
-  import string
-  import random
-  tmp_config = "".join(random.sample(ascii_letters, 10))
-  config = imp.load_source(tmp_config, filename)
+  config = _collect_config(filenames)
 
   if not keyword:
     return config
 
   if not hasattr(config, keyword):
-    raise ImportError("The desired keyword '%s' does not exist in your configuration file '%s'." %(keyword, filename))
+    raise ImportError("The desired keyword '%s' does not exist in any of " \
+        "your configuration files: %s" %(keyword, ', '.join(filenames)))
 
   return getattr(config, keyword)
 
diff --git a/develop.cfg b/develop.cfg
index b9e28af7..deda7f60 100644
--- a/develop.cfg
+++ b/develop.cfg
@@ -5,6 +5,7 @@
 [buildout]
 parts = scripts
 eggs = bob.bio.base
+       bob.db.atnt
        gridtk
 
 extensions = bob.buildout
diff --git a/doc/implementation.rst b/doc/implementation.rst
index 984742d6..b7bdcf7d 100644
--- a/doc/implementation.rst
+++ b/doc/implementation.rst
@@ -307,7 +307,7 @@ Resources
 ---------
 
 Finally, some of the configuration files, which sit in the ``bob/bio/*/config`` directories, are registered as *resources*.
-This means that a resource is nothing else than a short name for a registered instance of one of the tools (database, preprocessor, extractor, algorithm or grid configuration) of ``bob.bio``, which has a pre-defined set of parameters.
+A resource is nothing else than a short name for a registered instance of one of the tools (database, preprocessor, extractor, algorithm or grid configuration) of ``bob.bio`` or a python module which has a pre-defined set of parameters.
 
 The process of registering a resource is relatively easy.
 We use the SetupTools_ mechanism of registering so-called entry points in the ``setup.py`` file of the according ``bob.bio`` package.
@@ -318,6 +318,8 @@ Particularly, we use a specific list of entry points, which are:
 * ``bob.bio.extractor`` to register an instance of a (derivation of a) :py:class:`bob.bio.base.extractor.Extractor`
 * ``bob.bio.algorithm`` to register an instance of a (derivation of a) :py:class:`bob.bio.base.algorithm.Algorithm`
 * ``bob.bio.grid`` to register an instance of the :py:class:`bob.bio.base.grid.Grid`
+* ``bob.bio.config`` to register a Python module that contains the values of
+  resources and parameters to use for an experiment
 
 For each of the tools, several resources are defined, which you can list with the ``./bin/resources.py`` command line.
 
diff --git a/setup.py b/setup.py
index f10b8155..9a893c86 100644
--- a/setup.py
+++ b/setup.py
@@ -83,6 +83,11 @@ setup(
         'fuse_scores.py    = bob.bio.base.script.fuse_scores:main',
       ],
 
+      'bob.bio.config': [
+        'dummy             = bob.bio.base.test.dummy.config', # for test purposes only
+        'dummy2            = bob.bio.base.test.dummy.config2', # for test purposes only
+      ],
+
       'bob.bio.database': [
         'dummy             = bob.bio.base.test.dummy.database:database', # for test purposes only
       ],
-- 
GitLab