Commit 82413510 authored by Tiago de Freitas Pereira's avatar Tiago de Freitas Pereira
Browse files

Created the Baselines Concept

Created the Baselines Concept

Organized the documentation

Documented the helps

Added the baseline in the resources search

Removed some unused imports

Solved discussion

Solving other discussions
parent 8f48d57f
Pipeline #20209 passed with stage
in 35 minutes and 37 seconds
......@@ -6,6 +6,7 @@ from . import algorithm
from . import tools
from . import grid # only one file, not complete directory
from . import annotator
from . import baseline
from . import script
from . import test
......
#!/usr/bin/env python
# vim: set fileencoding=utf-8 :
# Tiago de Freitas Pereira <tiago.pereira@idiap.ch>
class Baseline(object):
"""
Base class to define baselines
A Baseline is composed by the triplet :any:`bob.bio.base.preprocessor.Preprocessor`,
:any:`bob.bio.base.extractor.Extractor` and :any:`bob.bio.base.algorithm.Algorithm`
Attributes
----------
name: str
Name of the baseline. This name will be displayed in the command line interface
preprocessors: dict
Dictionary containing all possible preprocessors
extractor: str
Registered resource or a config file containing the feature extractor
algorithm: str
Registered resource or a config file containing the algorithm
"""
def __init__(self, name="", preprocessors=dict(), extractor="", algorithm="", **kwargs):
super(Baseline, self).__init__(**kwargs)
self.name = name
self.preprocessors = preprocessors
self.extractor = extractor
self.algorithm = algorithm
from .Baseline import Baseline
import bob.bio.base
def get_available_databases():
"""
Get all the available databases through the database entry-points
"""
available_databases = dict()
all_databases = bob.bio.base.resource_keys('database', strip=[])
for database in all_databases:
try:
database_entry_point = bob.bio.base.load_resource(database, 'database')
available_databases[database] = dict()
# Checking if the database has data for the ZT normalization
available_databases[database]["has_zt"] = hasattr(database_entry_point, "zobjects") and hasattr(database_entry_point, "tobjects")
available_databases[database]["groups"] = []
# Searching for database groups
try:
groups = list(database_entry_point.groups())
for g in ["dev", "eval"]:
available_databases[database]["groups"] += [g] if g in groups else []
except:
# In case the method groups is not implemented
available_databases[database]["groups"] = ["dev"]
except:
pass
return available_databases
def get_config():
"""Returns a string containing the configuration information.
"""
import bob.extension
return bob.extension.get_config(__name__)
# gets sphinx autodoc done right - don't remove it
__all__ = [_ for _ in dir() if not _.startswith('_')]
#!/usr/bin/env python
# vim: set fileencoding=utf-8 :
# Tiago de Freitas Pereira <tiago.pereira@idiap.ch>
"""
This script runs some face recognition baselines under some face databases
Examples:
This command line will run the facenet from David Sandberg using the ATnT dataset:
`bob bio baseline --baseline facenet_msceleba_inception_v1 --database atnt`
"""
import bob.bio.base
import bob.io.base
import os
from bob.bio.base.script.verify import main as verify
from bob.bio.base.baseline import get_available_databases
from bob.extension.scripts.click_helper import (
verbosity_option, ConfigCommand, ResourceOption)
import click
@click.command(entry_point_group='bob.bio.config', cls=ConfigCommand)
@click.option('--database', '-d', required=True, cls=ResourceOption, help="Registered database. Check it out `resources.py --types database` for ready to be used databases")
@click.option('--baseline', '-b', required=True, cls=ResourceOption, help="Registered baseline. Check it out `resources.py --types baseline` for ready to be used baseline")
@click.option('--temp-dir', '-T', required=False, cls=ResourceOption, help="The directory for temporary files")
@click.option('--result-dir', '-R', required=False, cls=ResourceOption, help="The directory for resulting score files")
@click.option('--grid', '-g', help="Execute the algorithm in the SGE grid.", is_flag=True)
@click.option('--zt-norm', '-z', help="Enable the computation of ZT norms (if the database supports it).", is_flag=True)
@verbosity_option(cls=ResourceOption)
def baseline(baseline, database, temp_dir, result_dir, grid, zt_norm, **kwargs):
"""
Run a biometric recognition baselines
Check it out all baselines available by typing `resource.py --types baseline`
"""
def search_preprocessor(key, keys):
"""
Wrapper that searches for preprocessors for specific databases.
If not found, the default preprocessor is returned
"""
for k in keys:
if key.startswith(k):
return k
else:
return "default"
# Triggering training for each baseline/database
loaded_baseline = bob.bio.base.load_resource(baseline, 'baseline', package_prefix="bob.bio.")
# this is the default sub-directory that is used
sub_directory = os.path.join(database, baseline)
database_data = get_available_databases()[database]
parameters = [
'-p', loaded_baseline.preprocessors[search_preprocessor(database, loaded_baseline.preprocessors.keys())],
'-e', loaded_baseline.extractor,
'-d', database,
'-a', loaded_baseline.algorithm,
'-vvv',
'--temp-directory', temp_dir,
'--result-directory', result_dir,
'--sub-directory', sub_directory
]
parameters += ['--groups'] + database_data["groups"]
if grid:
parameters += ['-g', 'demanding']
if zt_norm and 'has_zt' in database_data:
parameters += ['--zt-norm']
verify(parameters)
......@@ -9,8 +9,8 @@ def resources(command_line_parameters = None):
import argparse
parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument("--types", '-t', nargs = '+',
choices = ('d', 'database', 'p', 'preprocessor', 'e', 'extractor', 'a', 'algorithm', 'g', 'grid', 'c', 'config', 'an', 'annotator'),
default = ('d', 'p', 'e', 'a', 'g', 'c', 'an'),
choices = ('d', 'database', 'p', 'preprocessor', 'e', 'extractor', 'a', 'algorithm', 'g', 'grid', 'c', 'config', 'an', 'annotator', 'b', 'baseline'),
default = ('d', 'p', 'e', 'a', 'g', 'c', 'an', 'b'),
help = "Select the resource types that should be listed.")
parser.add_argument("--details", '-d', action='store_true', help = "Prints the complete configuration for all resources")
......@@ -55,6 +55,10 @@ def resources(command_line_parameters = None):
print ("\nList of registered annotators:")
print (bob.bio.base.list_resources('annotator', **kwargs))
if 'b' in args.types or 'baseline' in args.types:
print ("\nList of registered baseline:")
print (bob.bio.base.list_resources('baseline', **kwargs))
print()
def databases(command_line_parameters = None):
......
from bob.bio.base.baseline import Baseline
import pkg_resources
import os
dummy_dir = pkg_resources.resource_filename('bob.bio.base', 'test/dummy')
class DummyBaseline(Baseline):
def __init__(self, **kwargs):
super(DummyBaseline, self).__init__(**kwargs)
baseline = DummyBaseline(name="dummy",
preprocessors={"default": os.path.join(dummy_dir, 'preprocessor.py')},
extractor=os.path.join(dummy_dir, 'extractor.py'),
algorithm=os.path.join(dummy_dir, 'algorithm.py'))
import tempfile
import shutil
from click.testing import CliRunner
from bob.bio.base.script.baseline import baseline
def test_baselines():
try:
tmp_dir = tempfile.mkdtemp(prefix="bobtest_")
runner = CliRunner()
result = runner.invoke(baseline, args=('-d', 'dummy', '-b', 'dummy', '-T', tmp_dir, '-R', tmp_dir))
assertion_error_message = (
'Command exited with this output: `{}\' \n'
'If the output is empty, you can run this script locally to see '
'what is wrong:\n'
'bin/bob bio baseline -d dummy -a dummy -o /tmp/temp_annotations'
''.format(result.output))
assert result.exit_code == 0, assertion_error_message
finally:
shutil.rmtree(tmp_dir)
......@@ -21,7 +21,7 @@ logger = logging.getLogger("bob.bio.base")
#: Keywords for which resources are defined.
valid_keywords = ('database', 'preprocessor', 'extractor', 'algorithm', 'grid', 'config', 'annotator')
valid_keywords = ('database', 'preprocessor', 'extractor', 'algorithm', 'grid', 'config', 'annotator', 'baseline')
def _collect_config(paths):
......
.. _bob.bio.base.baseline:
==================
Defining baselines
==================
Once you have a biometric system well established, tuned and working for a particular database (or a particular set of databases), you may want to provide **an easier to reproduce** way to share it.
For this purpose, we defined something called baseline.
A baseline is composed by the triplet :any:`bob.bio.base.preprocessor.Preprocessor`, :any:`bob.bio.base.extractor.Extractor` and :any:`bob.bio.base.algorithm.Algorithm`.
First, check it out the baselines ready to be triggered in your environment by doing:
.. code-block:: sh
$ bob bio baseline --help
To create your own baseline, you just need to define it like in the recipe below:
.. code-block:: py
from bob.bio.base.baseline import Baseline
class DummyBaseline(Baseline):
def __init__(self):
self.preprocessors = dict() # SHOULD BE DEFINED AS A DICTIONARY
self.preprocessors["default"] = 'my-preprocessor'
self.extractor = 'my-extractor'
self.algorithm = 'my-algorithm'
baseline = DummyBaseline()
Some databases may require some especific preprocessors depending on the type of meta-informations provided.
For instance, for some face recognition databases, faces should be cropped in a particular way depending on the annotations provided.
To approach this issue, the preprocessors are defined in a dictionary, with a generic preprocessor defined as **default** and the database specific preprocessor defined by database name as in the example below:
.. code-block:: py
self.preprocessors = dict()
self.preprocessors["default"] = 'my-preprocessor'
self.preprocessors["database_name"] = 'my-specific-preprocessor'
Follow below a full example on how to define a baseline with database specific preprocessors.
.. code-block:: py
from bob.bio.base.baseline import Baseline
class AnotherBaseline(Baseline):
def __init__(self):
self.preprocessors = dict() # SHOULD BE DEFINED AS A DICTIONARY
self.preprocessors["default"] = 'my-preprocessor'
self.preprocessors["database_name"] = 'my-specific-preprocessor'
self.extractor = 'my-extractor'
self.algorithm = 'my-algorithm'
baseline = AnotherBaseline()
.. note::
The triplet can be a resource or a configuration file.
This works in the same way as in :ref:`Running Experiments <running_part_1>`.
.. note::
Baselines are also registered as resources under the keyworkd `bob.bio.baseline`.
You can find the list of readily available baselines using the ``resources.py`` command:
.. code-block:: sh
$ resources.py --types baseline
......@@ -75,6 +75,7 @@ Users Guide
struct_bio_rec_sys
experiments
implementation
baseline
filelist-guide
more
annotations
......
......@@ -147,12 +147,19 @@ setup(
'dir = bob.bio.base.script.commands:dir',
'gen = bob.bio.base.script.gen:gen',
'evaluate = bob.bio.base.script.commands:evaluate',
'baseline = bob.bio.base.script.baseline:baseline',
],
# annotators
'bob.bio.annotator': [
'dummy = bob.bio.base.test.dummy.annotator:annotator',
],
#baselines
'bob.bio.baseline':[
'dummy = bob.bio.base.test.dummy.baseline:baseline',
],
},
# Classifiers are important if you plan to distribute this package through
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment