From 82413510beb4917b927f8fe606a6c4f3dcafa55a Mon Sep 17 00:00:00 2001 From: Tiago Freitas Pereira <tiagofrepereira@gmail.com> Date: Tue, 15 May 2018 18:07:19 +0200 Subject: [PATCH] Created the Baselines Concept Created the Baselines Concept Organized the documentation Documented the helps Added the baseline in the resources search Removed some unused imports Solved discussion Solving other discussions --- bob/bio/base/__init__.py | 1 + bob/bio/base/baseline/Baseline.py | 32 ++++++++++++ bob/bio/base/baseline/__init__.py | 43 ++++++++++++++++ bob/bio/base/script/baseline.py | 79 +++++++++++++++++++++++++++++ bob/bio/base/script/resources.py | 8 ++- bob/bio/base/test/dummy/baseline.py | 15 ++++++ bob/bio/base/test/test_baselines.py | 21 ++++++++ bob/bio/base/utils/resources.py | 2 +- doc/baseline.rst | 77 ++++++++++++++++++++++++++++ doc/index.rst | 1 + setup.py | 7 +++ 11 files changed, 283 insertions(+), 3 deletions(-) create mode 100644 bob/bio/base/baseline/Baseline.py create mode 100755 bob/bio/base/baseline/__init__.py create mode 100644 bob/bio/base/script/baseline.py create mode 100644 bob/bio/base/test/dummy/baseline.py create mode 100644 bob/bio/base/test/test_baselines.py create mode 100644 doc/baseline.rst diff --git a/bob/bio/base/__init__.py b/bob/bio/base/__init__.py index 9ef81253..c69d5db3 100644 --- a/bob/bio/base/__init__.py +++ b/bob/bio/base/__init__.py @@ -6,6 +6,7 @@ from . import algorithm from . import tools from . import grid # only one file, not complete directory from . import annotator +from . import baseline from . import script from . import test diff --git a/bob/bio/base/baseline/Baseline.py b/bob/bio/base/baseline/Baseline.py new file mode 100644 index 00000000..bb3a9ee7 --- /dev/null +++ b/bob/bio/base/baseline/Baseline.py @@ -0,0 +1,32 @@ +#!/usr/bin/env python +# vim: set fileencoding=utf-8 : +# Tiago de Freitas Pereira <tiago.pereira@idiap.ch> + + +class Baseline(object): + """ + Base class to define baselines + + A Baseline is composed by the triplet :any:`bob.bio.base.preprocessor.Preprocessor`, + :any:`bob.bio.base.extractor.Extractor` and :any:`bob.bio.base.algorithm.Algorithm` + + Attributes + ---------- + + name: str + Name of the baseline. This name will be displayed in the command line interface + preprocessors: dict + Dictionary containing all possible preprocessors + extractor: str + Registered resource or a config file containing the feature extractor + algorithm: str + Registered resource or a config file containing the algorithm + + """ + + def __init__(self, name="", preprocessors=dict(), extractor="", algorithm="", **kwargs): + super(Baseline, self).__init__(**kwargs) + self.name = name + self.preprocessors = preprocessors + self.extractor = extractor + self.algorithm = algorithm diff --git a/bob/bio/base/baseline/__init__.py b/bob/bio/base/baseline/__init__.py new file mode 100755 index 00000000..12d62737 --- /dev/null +++ b/bob/bio/base/baseline/__init__.py @@ -0,0 +1,43 @@ +from .Baseline import Baseline +import bob.bio.base + + +def get_available_databases(): + """ + Get all the available databases through the database entry-points + """ + + available_databases = dict() + all_databases = bob.bio.base.resource_keys('database', strip=[]) + for database in all_databases: + try: + database_entry_point = bob.bio.base.load_resource(database, 'database') + + available_databases[database] = dict() + + # Checking if the database has data for the ZT normalization + available_databases[database]["has_zt"] = hasattr(database_entry_point, "zobjects") and hasattr(database_entry_point, "tobjects") + available_databases[database]["groups"] = [] + # Searching for database groups + try: + groups = list(database_entry_point.groups()) + for g in ["dev", "eval"]: + available_databases[database]["groups"] += [g] if g in groups else [] + except: + # In case the method groups is not implemented + available_databases[database]["groups"] = ["dev"] + except: + pass + return available_databases + + +def get_config(): + """Returns a string containing the configuration information. + """ + + import bob.extension + return bob.extension.get_config(__name__) + + +# gets sphinx autodoc done right - don't remove it +__all__ = [_ for _ in dir() if not _.startswith('_')] diff --git a/bob/bio/base/script/baseline.py b/bob/bio/base/script/baseline.py new file mode 100644 index 00000000..3b8be66b --- /dev/null +++ b/bob/bio/base/script/baseline.py @@ -0,0 +1,79 @@ +#!/usr/bin/env python +# vim: set fileencoding=utf-8 : +# Tiago de Freitas Pereira <tiago.pereira@idiap.ch> + +""" +This script runs some face recognition baselines under some face databases + +Examples: + +This command line will run the facenet from David Sandberg using the ATnT dataset: + `bob bio baseline --baseline facenet_msceleba_inception_v1 --database atnt` + +""" + + +import bob.bio.base +import bob.io.base +import os +from bob.bio.base.script.verify import main as verify +from bob.bio.base.baseline import get_available_databases +from bob.extension.scripts.click_helper import ( + verbosity_option, ConfigCommand, ResourceOption) +import click + + +@click.command(entry_point_group='bob.bio.config', cls=ConfigCommand) +@click.option('--database', '-d', required=True, cls=ResourceOption, help="Registered database. Check it out `resources.py --types database` for ready to be used databases") +@click.option('--baseline', '-b', required=True, cls=ResourceOption, help="Registered baseline. Check it out `resources.py --types baseline` for ready to be used baseline") +@click.option('--temp-dir', '-T', required=False, cls=ResourceOption, help="The directory for temporary files") +@click.option('--result-dir', '-R', required=False, cls=ResourceOption, help="The directory for resulting score files") +@click.option('--grid', '-g', help="Execute the algorithm in the SGE grid.", is_flag=True) +@click.option('--zt-norm', '-z', help="Enable the computation of ZT norms (if the database supports it).", is_flag=True) +@verbosity_option(cls=ResourceOption) + +def baseline(baseline, database, temp_dir, result_dir, grid, zt_norm, **kwargs): + """ + Run a biometric recognition baselines + + Check it out all baselines available by typing `resource.py --types baseline` + + """ + + def search_preprocessor(key, keys): + """ + Wrapper that searches for preprocessors for specific databases. + If not found, the default preprocessor is returned + """ + for k in keys: + if key.startswith(k): + return k + else: + return "default" + + # Triggering training for each baseline/database + loaded_baseline = bob.bio.base.load_resource(baseline, 'baseline', package_prefix="bob.bio.") + + # this is the default sub-directory that is used + sub_directory = os.path.join(database, baseline) + database_data = get_available_databases()[database] + parameters = [ + '-p', loaded_baseline.preprocessors[search_preprocessor(database, loaded_baseline.preprocessors.keys())], + '-e', loaded_baseline.extractor, + '-d', database, + '-a', loaded_baseline.algorithm, + '-vvv', + '--temp-directory', temp_dir, + '--result-directory', result_dir, + '--sub-directory', sub_directory + ] + + parameters += ['--groups'] + database_data["groups"] + + if grid: + parameters += ['-g', 'demanding'] + + if zt_norm and 'has_zt' in database_data: + parameters += ['--zt-norm'] + + verify(parameters) diff --git a/bob/bio/base/script/resources.py b/bob/bio/base/script/resources.py index c68d6f22..f8f56451 100644 --- a/bob/bio/base/script/resources.py +++ b/bob/bio/base/script/resources.py @@ -9,8 +9,8 @@ def resources(command_line_parameters = None): import argparse parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument("--types", '-t', nargs = '+', - choices = ('d', 'database', 'p', 'preprocessor', 'e', 'extractor', 'a', 'algorithm', 'g', 'grid', 'c', 'config', 'an', 'annotator'), - default = ('d', 'p', 'e', 'a', 'g', 'c', 'an'), + choices = ('d', 'database', 'p', 'preprocessor', 'e', 'extractor', 'a', 'algorithm', 'g', 'grid', 'c', 'config', 'an', 'annotator', 'b', 'baseline'), + default = ('d', 'p', 'e', 'a', 'g', 'c', 'an', 'b'), help = "Select the resource types that should be listed.") parser.add_argument("--details", '-d', action='store_true', help = "Prints the complete configuration for all resources") @@ -55,6 +55,10 @@ def resources(command_line_parameters = None): print ("\nList of registered annotators:") print (bob.bio.base.list_resources('annotator', **kwargs)) + if 'b' in args.types or 'baseline' in args.types: + print ("\nList of registered baseline:") + print (bob.bio.base.list_resources('baseline', **kwargs)) + print() def databases(command_line_parameters = None): diff --git a/bob/bio/base/test/dummy/baseline.py b/bob/bio/base/test/dummy/baseline.py new file mode 100644 index 00000000..e52717ec --- /dev/null +++ b/bob/bio/base/test/dummy/baseline.py @@ -0,0 +1,15 @@ +from bob.bio.base.baseline import Baseline +import pkg_resources +import os + + +dummy_dir = pkg_resources.resource_filename('bob.bio.base', 'test/dummy') +class DummyBaseline(Baseline): + + def __init__(self, **kwargs): + super(DummyBaseline, self).__init__(**kwargs) + +baseline = DummyBaseline(name="dummy", + preprocessors={"default": os.path.join(dummy_dir, 'preprocessor.py')}, + extractor=os.path.join(dummy_dir, 'extractor.py'), + algorithm=os.path.join(dummy_dir, 'algorithm.py')) diff --git a/bob/bio/base/test/test_baselines.py b/bob/bio/base/test/test_baselines.py new file mode 100644 index 00000000..42760e9d --- /dev/null +++ b/bob/bio/base/test/test_baselines.py @@ -0,0 +1,21 @@ +import tempfile +import shutil +from click.testing import CliRunner +from bob.bio.base.script.baseline import baseline + +def test_baselines(): + + try: + tmp_dir = tempfile.mkdtemp(prefix="bobtest_") + runner = CliRunner() + result = runner.invoke(baseline, args=('-d', 'dummy', '-b', 'dummy', '-T', tmp_dir, '-R', tmp_dir)) + assertion_error_message = ( + 'Command exited with this output: `{}\' \n' + 'If the output is empty, you can run this script locally to see ' + 'what is wrong:\n' + 'bin/bob bio baseline -d dummy -a dummy -o /tmp/temp_annotations' + ''.format(result.output)) + assert result.exit_code == 0, assertion_error_message + + finally: + shutil.rmtree(tmp_dir) diff --git a/bob/bio/base/utils/resources.py b/bob/bio/base/utils/resources.py index fa5e3c42..57a5b5a2 100644 --- a/bob/bio/base/utils/resources.py +++ b/bob/bio/base/utils/resources.py @@ -21,7 +21,7 @@ logger = logging.getLogger("bob.bio.base") #: Keywords for which resources are defined. -valid_keywords = ('database', 'preprocessor', 'extractor', 'algorithm', 'grid', 'config', 'annotator') +valid_keywords = ('database', 'preprocessor', 'extractor', 'algorithm', 'grid', 'config', 'annotator', 'baseline') def _collect_config(paths): diff --git a/doc/baseline.rst b/doc/baseline.rst new file mode 100644 index 00000000..2996e7ba --- /dev/null +++ b/doc/baseline.rst @@ -0,0 +1,77 @@ +.. _bob.bio.base.baseline: + +================== +Defining baselines +================== + + +Once you have a biometric system well established, tuned and working for a particular database (or a particular set of databases), you may want to provide **an easier to reproduce** way to share it. +For this purpose, we defined something called baseline. + +A baseline is composed by the triplet :any:`bob.bio.base.preprocessor.Preprocessor`, :any:`bob.bio.base.extractor.Extractor` and :any:`bob.bio.base.algorithm.Algorithm`. + +First, check it out the baselines ready to be triggered in your environment by doing: + +.. code-block:: sh + + $ bob bio baseline --help + + +To create your own baseline, you just need to define it like in the recipe below: + +.. code-block:: py + + from bob.bio.base.baseline import Baseline + + class DummyBaseline(Baseline): + + def __init__(self): + + self.preprocessors = dict() # SHOULD BE DEFINED AS A DICTIONARY + self.preprocessors["default"] = 'my-preprocessor' + self.extractor = 'my-extractor' + self.algorithm = 'my-algorithm' + baseline = DummyBaseline() + +Some databases may require some especific preprocessors depending on the type of meta-informations provided. +For instance, for some face recognition databases, faces should be cropped in a particular way depending on the annotations provided. +To approach this issue, the preprocessors are defined in a dictionary, with a generic preprocessor defined as **default** and the database specific preprocessor defined by database name as in the example below: + +.. code-block:: py + + self.preprocessors = dict() + self.preprocessors["default"] = 'my-preprocessor' + self.preprocessors["database_name"] = 'my-specific-preprocessor' + + +Follow below a full example on how to define a baseline with database specific preprocessors. + +.. code-block:: py + + from bob.bio.base.baseline import Baseline + + class AnotherBaseline(Baseline): + + def __init__(self): + + self.preprocessors = dict() # SHOULD BE DEFINED AS A DICTIONARY + self.preprocessors["default"] = 'my-preprocessor' + self.preprocessors["database_name"] = 'my-specific-preprocessor' + self.extractor = 'my-extractor' + self.algorithm = 'my-algorithm' + baseline = AnotherBaseline() + +.. note:: + + The triplet can be a resource or a configuration file. + This works in the same way as in :ref:`Running Experiments <running_part_1>`. + +.. note:: + + Baselines are also registered as resources under the keyworkd `bob.bio.baseline`. + +You can find the list of readily available baselines using the ``resources.py`` command: + +.. code-block:: sh + + $ resources.py --types baseline diff --git a/doc/index.rst b/doc/index.rst index 2b966a5d..8e1e3a67 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -75,6 +75,7 @@ Users Guide struct_bio_rec_sys experiments implementation + baseline filelist-guide more annotations diff --git a/setup.py b/setup.py index 58dbbfcf..e488a1c3 100644 --- a/setup.py +++ b/setup.py @@ -147,12 +147,19 @@ setup( 'dir = bob.bio.base.script.commands:dir', 'gen = bob.bio.base.script.gen:gen', 'evaluate = bob.bio.base.script.commands:evaluate', + 'baseline = bob.bio.base.script.baseline:baseline', ], # annotators 'bob.bio.annotator': [ 'dummy = bob.bio.base.test.dummy.annotator:annotator', ], + + #baselines + 'bob.bio.baseline':[ + 'dummy = bob.bio.base.test.dummy.baseline:baseline', + ], + }, # Classifiers are important if you plan to distribute this package through -- GitLab