[config_file] Implement multi-config readout

a0022914 · André Anjos · 457b89f6 · a0022914 · a0022914 · a0022914
Commit a0022914 authored 8 years ago by André Anjos
--- a/bob/bio/base/script/resources.py
+++ b/bob/bio/base/script/resources.py
@@ -9,8 +9,8 @@ def resources(command_line_parameters = None):
  import argparse
  parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.ArgumentDefaultsHelpFormatter)
  parser.add_argument("--types", '-t', nargs = '+',
-                      choices = ('d', 'database', 'p', 'preprocessor', 'e', 'extractor', 'a', 'algorithm', 'g', 'grid'),
-                      default = ('d', 'p', 'e', 'a', 'g'),
+                      choices = ('d', 'database', 'p', 'preprocessor', 'e', 'extractor', 'a', 'algorithm', 'g', 'grid', 'c', 'config'),
+                      default = ('d', 'p', 'e', 'a', 'g', 'c'),
                      help = "Select the resource types that should be listed.")

  parser.add_argument("--details", '-d', action='store_true', help = "Prints the complete configuration for all resources")
@@ -47,6 +47,10 @@ def resources(command_line_parameters = None):
    print ("\nList of registered grid configurations:")
    print (bob.bio.base.list_resources('grid', **kwargs))

+  if 'c' in args.types or 'config' in args.types:
+    print ("\nList of registered configurations:")
+    print (bob.bio.base.list_resources('config', **kwargs))
+
  print()

 def databases(command_line_parameters = None):

--- a/bob/bio/base/test/dummy/config.py
+++ b/bob/bio/base/test/dummy/config.py
+from .database import database
+from .preprocessor import preprocessor
+from .extractor import extractor
+from .algorithm import algorithm
+zt_norm = True
+verbose = 1
+sub_directory = "test_dummy"
--- a/bob/bio/base/test/dummy/config2.py
+++ b/bob/bio/base/test/dummy/config2.py
+verbose = 2
+sub_directory = "test_dummy2"
--- a/bob/bio/base/test/test_config_file.py
+++ b/bob/bio/base/test/test_config_file.py
@@ -227,3 +227,107 @@ def test_compare_to_cmdline_skip():
  finally:
    if test_dir: shutil.rmtree(test_dir)
    if test_config_file: del test_config_file
+
+
+def test_from_resource():
+
+  test_dir = None
+
+  try:
+    test_dir = tempfile.mkdtemp(prefix='bobtest_')
+    args = parse_arguments(['dummy'])
+
+    assert args.sub_directory.endswith('test_dummy')
+    assert args.allow_missing_files is False
+    assert args.zt_norm is True
+    assert args.verbose == 1
+
+    from bob.bio.base.test.dummy.database import DummyDatabase
+    assert isinstance(args.database, DummyDatabase)
+    from bob.bio.base.test.dummy.preprocessor import DummyPreprocessor
+    assert isinstance(args.preprocessor, DummyPreprocessor)
+    from bob.bio.base.test.dummy.extractor import DummyExtractor
+    assert isinstance(args.extractor, DummyExtractor)
+    from bob.bio.base.test.dummy.algorithm import DummyAlgorithm
+    assert isinstance(args.algorithm, DummyAlgorithm)
+
+  finally:
+    if test_dir: shutil.rmtree(test_dir)
+
+
+def test_from_module():
+
+  test_dir = None
+
+  try:
+    test_dir = tempfile.mkdtemp(prefix='bobtest_')
+    args = parse_arguments(['bob.bio.base.test.dummy.config'])
+
+    assert args.sub_directory.endswith('test_dummy')
+    assert args.allow_missing_files is False
+    assert args.zt_norm is True
+    assert args.verbose == 1
+
+    from bob.bio.base.test.dummy.database import DummyDatabase
+    assert isinstance(args.database, DummyDatabase)
+    from bob.bio.base.test.dummy.preprocessor import DummyPreprocessor
+    assert isinstance(args.preprocessor, DummyPreprocessor)
+    from bob.bio.base.test.dummy.extractor import DummyExtractor
+    assert isinstance(args.extractor, DummyExtractor)
+    from bob.bio.base.test.dummy.algorithm import DummyAlgorithm
+    assert isinstance(args.algorithm, DummyAlgorithm)
+
+  finally:
+    if test_dir: shutil.rmtree(test_dir)
+
+
+def test_order():
+
+  test_dir = None
+
+  try:
+    test_dir = tempfile.mkdtemp(prefix='bobtest_')
+    args = parse_arguments(['dummy', 'dummy2'])
+
+    assert args.sub_directory.endswith('test_dummy2')
+    assert args.allow_missing_files is False
+    assert args.zt_norm is True
+    assert args.verbose == 2
+
+    from bob.bio.base.test.dummy.database import DummyDatabase
+    assert isinstance(args.database, DummyDatabase)
+    from bob.bio.base.test.dummy.preprocessor import DummyPreprocessor
+    assert isinstance(args.preprocessor, DummyPreprocessor)
+    from bob.bio.base.test.dummy.extractor import DummyExtractor
+    assert isinstance(args.extractor, DummyExtractor)
+    from bob.bio.base.test.dummy.algorithm import DummyAlgorithm
+    assert isinstance(args.algorithm, DummyAlgorithm)
+
+  finally:
+    if test_dir: shutil.rmtree(test_dir)
+
+
+def test_order_inverse():
+
+  test_dir = None
+
+  try:
+    test_dir = tempfile.mkdtemp(prefix='bobtest_')
+    args = parse_arguments(['dummy2', 'dummy'])
+
+    assert args.sub_directory.endswith('test_dummy')
+    assert args.allow_missing_files is False
+    assert args.zt_norm is True
+    assert args.verbose == 1
+
+    from bob.bio.base.test.dummy.database import DummyDatabase
+    assert isinstance(args.database, DummyDatabase)
+    from bob.bio.base.test.dummy.preprocessor import DummyPreprocessor
+    assert isinstance(args.preprocessor, DummyPreprocessor)
+    from bob.bio.base.test.dummy.extractor import DummyExtractor
+    assert isinstance(args.extractor, DummyExtractor)
+    from bob.bio.base.test.dummy.algorithm import DummyAlgorithm
+    assert isinstance(args.algorithm, DummyAlgorithm)
+
+  finally:
+    if test_dir: shutil.rmtree(test_dir)
--- a/bob/bio/base/tools/command_line.py
+++ b/bob/bio/base/tools/command_line.py
@@ -47,7 +47,7 @@ def command_line_parser(description=__doc__, exclude_resources_from=[]):
  #######################################################################################
  ############## options that are required to be specified #######################
  config_group = parser.add_argument_group('\nParameters defining the experiment. Most of these parameters can be a registered resource, a configuration file, or even a string that defines a newly created object')
-  config_group.add_argument('configuration_file', metavar='PATH', nargs='?', help = 'A configuration file containing one or more of "database", "preprocessor", "extractor", "algorithm" and/or "grid"')
+  config_group.add_argument('configuration_file', metavar='PATH', nargs='*', help = 'A configuration file containing one or more of "database", "preprocessor", "extractor", "algorithm" and/or "grid"')
  config_group.add_argument('-d', '--database', metavar = 'x', nargs = '+',
      help = 'Database and the protocol; registered databases are: %s' % utils.resource_keys('database', exclude_resources_from))
  config_group.add_argument('-p', '--preprocessor', metavar = 'x', nargs = '+',
@@ -216,7 +216,7 @@ def initialize(parsers, command_line_parameters = None, skips = []):
  args = parser.parse_args(command_line_parameters)

  # first, read the configuration file and set everything from the config file to the args -- as long as not overwritten on command line
-  config = utils.read_config_file(args.configuration_file) if args.configuration_file is not None else None
+  config = utils.read_config_file(args.configuration_file) if args.configuration_file else None
  for keyword in ("database", "preprocessor", "extractor", "algorithm"):
    _take_from_config_or_command_line(args, config, keyword,
        parser.get_default(keyword))

--- a/bob/bio/base/utils/resources.py
+++ b/bob/bio/base/utils/resources.py
@@ -20,10 +20,77 @@ logger = logging.getLogger("bob.bio.base")


 #: Keywords for which resources are defined.
-valid_keywords = ('database', 'preprocessor', 'extractor', 'algorithm', 'grid')
+valid_keywords = ('database', 'preprocessor', 'extractor', 'algorithm', 'grid', 'config')

-def read_config_file(filename, keyword = None):
-  """read_config_file(filename, keyword = None) -> config
+
+def _collect_config(paths):
+  '''Collect all python file resources into a module
+
+  This function recursively loads python modules (in a Python 3-compatible way)
+  so the last loaded module corresponds to the final state of the loading. In
+  this way, we load the first file, resolve its symbols, overwrite with the
+  second file and so on. We return a temporarily created module containing all
+  resolved variables, respecting the input order.
+
+
+  Parameters:
+
+    paths (list): A list of resources, modules or files (in order) to collect
+      resources from
+
+
+  Returns:
+
+    module: A valid Python module you can use to configure your tool
+
+  '''
+
+  def _attach_resources(src, dst):
+    for k in dir(src):
+      dst.__dict__[k] = getattr(src, k)
+
+  import random
+
+  name = "".join(random.sample(ascii_letters, 10))
+  retval = imp.new_module(name)
+
+  #loads used modules recursively, attach results to module to return
+  if len(paths) > 1:
+    deps = _collect_config(paths[:-1])
+    _attach_resources(deps, retval)
+
+  #execute the module code on the context of previously import modules
+  for ep in pkg_resources.iter_entry_points('bob.bio.config'):
+    if ep.name == paths[-1]:
+      tmp = ep.load() #loads the pointed module
+      _attach_resources(tmp, retval)
+      return retval
+
+  #if you get to this point, then it is not a resource, maybe it is a module?
+  try:
+    tmp = __import__(paths[-1], retval.__dict__, retval.__dict__, ['*'])
+    _attach_resources(tmp, retval)
+    return retval
+  except ImportError:
+    #module does not exist, ignore it
+    pass
+  except Exception as e:
+    raise IOError("The configuration module '%s' could not " \
+        "be loaded: %s" % (paths[-1], e))
+
+  #if you get to this point, then its not a resource nor a loadable module, is
+  #it on the file system?
+  if not os.path.exists(paths[-1]):
+    raise IOError("The configuration file, resource or module '%s' " \
+        "could not be found, loaded or imported" % paths[-1])
+
+  exec(compile(open(paths[-1], "rb").read(), paths[-1], 'exec'), retval.__dict__)
+
+  return retval
+
+
+def read_config_file(filenames, keyword = None):
+  """read_config_file(filenames, keyword = None) -> config

  Use this function to read the given configuration file.
  If a keyword is specified, only the configuration according to this keyword is returned.
@@ -31,8 +98,9 @@ def read_config_file(filename, keyword = None):

  **Parameters:**

-  filename : str
-    The name of the configuration file to read.
+  filenames : list
+    A list (pontentially empty) of configuration files or resources to read
+    running options from

  keyword : str or ``None``
    If specified, only the contents of the variable with the given name is returned.
@@ -45,19 +113,18 @@ def read_config_file(filename, keyword = None):
    Otherwise, the whole configuration is returned (as a local namespace).
  """

-  if not os.path.exists(filename):
-    raise IOError("The given configuration file '%s' could not be found" % filename)
+  if not filenames:
+    raise RuntimeError("At least one configuration file, resource or " \
+        "module name must be passed")

-  import string
-  import random
-  tmp_config = "".join(random.sample(ascii_letters, 10))
-  config = imp.load_source(tmp_config, filename)
+  config = _collect_config(filenames)

  if not keyword:
    return config

  if not hasattr(config, keyword):
-    raise ImportError("The desired keyword '%s' does not exist in your configuration file '%s'." %(keyword, filename))
+    raise ImportError("The desired keyword '%s' does not exist in any of " \
+        "your configuration files: %s" %(keyword, ', '.join(filenames)))

  return getattr(config, keyword)


--- a/develop.cfg
+++ b/develop.cfg
@@ -5,6 +5,7 @@
 [buildout]
 parts = scripts
 eggs = bob.bio.base
+       bob.db.atnt
       gridtk

 extensions = bob.buildout

--- a/doc/implementation.rst
+++ b/doc/implementation.rst
@@ -307,7 +307,7 @@ Resources
 ---------

 Finally, some of the configuration files, which sit in the ``bob/bio/*/config`` directories, are registered as *resources*.
-This means that a resource is nothing else than a short name for a registered instance of one of the tools (database, preprocessor, extractor, algorithm or grid configuration) of ``bob.bio``, which has a pre-defined set of parameters.
+A resource is nothing else than a short name for a registered instance of one of the tools (database, preprocessor, extractor, algorithm or grid configuration) of ``bob.bio`` or a python module which has a pre-defined set of parameters.

 The process of registering a resource is relatively easy.
 We use the SetupTools_ mechanism of registering so-called entry points in the ``setup.py`` file of the according ``bob.bio`` package.
@@ -318,6 +318,8 @@ Particularly, we use a specific list of entry points, which are:
 * ``bob.bio.extractor`` to register an instance of a (derivation of a) :py:class:`bob.bio.base.extractor.Extractor`
 * ``bob.bio.algorithm`` to register an instance of a (derivation of a) :py:class:`bob.bio.base.algorithm.Algorithm`
 * ``bob.bio.grid`` to register an instance of the :py:class:`bob.bio.base.grid.Grid`
+* ``bob.bio.config`` to register a Python module that contains the values of
+  resources and parameters to use for an experiment

 For each of the tools, several resources are defined, which you can list with the ``./bin/resources.py`` command line.


--- a/setup.py
+++ b/setup.py
@@ -83,6 +83,11 @@ setup(
        'fuse_scores.py    = bob.bio.base.script.fuse_scores:main',
      ],

+      'bob.bio.config': [
+        'dummy             = bob.bio.base.test.dummy.config', # for test purposes only
+        'dummy2            = bob.bio.base.test.dummy.config2', # for test purposes only
+      ],
+
      'bob.bio.database': [
        'dummy             = bob.bio.base.test.dummy.database:database', # for test purposes only
      ],