diff --git a/bob/bio/base/__init__.py b/bob/bio/base/__init__.py index 5b2461a9b9400164f5010f4413fbabf26d2c17b3..9ef81253a184f70511b99f0c3ecda10d28de32a0 100644 --- a/bob/bio/base/__init__.py +++ b/bob/bio/base/__init__.py @@ -5,6 +5,7 @@ from . import extractor from . import algorithm from . import tools from . import grid # only one file, not complete directory +from . import annotator from . import script from . import test diff --git a/bob/bio/base/annotator/Annotator.py b/bob/bio/base/annotator/Annotator.py new file mode 100644 index 0000000000000000000000000000000000000000..9a4cfb2b0bf55dd5aacf23423d1cae70f0f0088a --- /dev/null +++ b/bob/bio/base/annotator/Annotator.py @@ -0,0 +1,40 @@ +from bob.bio.base import read_original_data as base_read + + +class Annotator(object): + """Annotator class for all annotators. This class is meant to be used in + conjunction with the bob bio annotate script. + + Attributes + ---------- + read_original_data : callable + A function that loads the samples. The syntax is like + :any:`bob.bio.base.read_original_data`. + """ + + def __init__(self, read_original_data=None, **kwargs): + super(Annotator, self).__init__(**kwargs) + self.read_original_data = read_original_data or base_read + + def annotate(self, sample, **kwargs): + """Annotates a sample and returns annotations in a dictionary. + + Parameters + ---------- + sample : numpy.ndarray + The sample that is being annotated. + **kwargs + The extra arguments that may be passed. + + Returns + ------- + dict + A dictionary containing the annotations of the biometric sample. If + the program fails to annotate the sample, it should return an empty + dictionary. + """ + raise NotImplementedError + + # Alias call to annotate + def __call__(self, sample, **kwargs): + return self.annotate(sample, **kwargs) diff --git a/bob/bio/base/annotator/Callable.py b/bob/bio/base/annotator/Callable.py new file mode 100644 index 0000000000000000000000000000000000000000..0858a852dc7984d340c7b26c21c749b4ddbfe2f3 --- /dev/null +++ b/bob/bio/base/annotator/Callable.py @@ -0,0 +1,22 @@ +from . import Annotator + + +class Callable(Annotator): + """A class that wraps a callable object that annotates a sample into a + bob.bio.annotator object. + + Attributes + ---------- + callable : callable + A callable with the following signature: + ``annotations = callable(sample, **kwargs)`` that takes numpy array and + returns annotations in dictionary format for that biometric sample. + Please see :any:`Annotator` for more information. + """ + + def __init__(self, callable, **kwargs): + super(Callable, self).__init__(**kwargs) + self.callable = callable + + def annotate(self, sample, **kwargs): + return self.callable(sample, **kwargs) diff --git a/bob/bio/base/annotator/FailSafe.py b/bob/bio/base/annotator/FailSafe.py new file mode 100644 index 0000000000000000000000000000000000000000..ebeeb8ac69b61df5f3368785ce23d85d5129a4e9 --- /dev/null +++ b/bob/bio/base/annotator/FailSafe.py @@ -0,0 +1,48 @@ +import logging +from . import Annotator + +logger = logging.getLogger(__name__) + + +class FailSafe(Annotator): + """A fail-safe annotator. + This annotator takes a list of annotator and tries them until you get your + annotations. + The annotations of previous annotator is passed to the next one. + + Attributes + ---------- + annotators : list + A list of annotators to try + required_keys : list + A list of keys that should be available in annotations to stop trying + different annotators. + """ + + def __init__(self, annotators, required_keys, **kwargs): + super(FailSafe, self).__init__(**kwargs) + self.annotators = list(annotators) + self.required_keys = list(required_keys) + + def annotate(self, sample, **kwargs): + if 'annotations' not in kwargs or kwargs['annotations'] is None: + kwargs['annotations'] = {} + for annotator in self.annotators: + try: + annotations = annotator(sample, **kwargs) + except Exception: + logger.debug( + "The annotator `%s' failed to annotate!", annotator, + exc_info=True) + annotations = {} + if not annotations: + logger.debug( + "Annotator `%s' returned empty annotations.", annotator) + kwargs['annotations'].update(annotations) + # check if we have all the required annotations + if all(key in kwargs['annotations'] for key in self.required_keys): + break + else: # this else is for the for loop + # we don't want to return half of the annotations + kwargs['annotations'] = {} + return kwargs['annotations'] diff --git a/bob/bio/base/annotator/__init__.py b/bob/bio/base/annotator/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..8e3546ea80ddd9dd5f474384eaf1a711e67fd2ec --- /dev/null +++ b/bob/bio/base/annotator/__init__.py @@ -0,0 +1,31 @@ +from .Annotator import Annotator +from .FailSafe import FailSafe +from .Callable import Callable + + +# gets sphinx autodoc done right - don't remove it +def __appropriate__(*args): + """Says object was actually declared here, and not in the import module. + Fixing sphinx warnings of not being able to find classes, when path is + shortened. + + Parameters + ---------- + *args + An iterable of objects to modify + + Resolves `Sphinx referencing issues + <https://github.com/sphinx-doc/sphinx/issues/3048>` + """ + + for obj in args: + obj.__module__ = __name__ + + +__appropriate__( + Annotator, + FailSafe, + Callable, +) + +__all__ = [_ for _ in dir() if not _.startswith('_')] diff --git a/bob/bio/base/script/annotate.py b/bob/bio/base/script/annotate.py new file mode 100644 index 0000000000000000000000000000000000000000..4df5c528719a3e66ef8a7b123ea205b72d17a412 --- /dev/null +++ b/bob/bio/base/script/annotate.py @@ -0,0 +1,94 @@ +"""A script to help annotate databases. +""" +import logging +import json +import click +from os.path import dirname, isfile +from bob.extension.scripts.click_helper import ( + verbosity_option, ConfigCommand, ResourceOption) +from bob.io.base import create_directories_safe +from bob.bio.base.tools.grid import indices + +logger = logging.getLogger(__name__) + + +@click.command(entry_point_group='bob.bio.config', cls=ConfigCommand) +@click.option('--database', '-d', required=True, cls=ResourceOption, + entry_point_group='bob.bio.database') +@click.option('--annotator', '-a', required=True, cls=ResourceOption, + entry_point_group='bob.bio.annotator') +@click.option('--output-dir', '-o', required=True, cls=ResourceOption) +@click.option('--force', '-f', is_flag=True, cls=ResourceOption) +@click.option('--array', type=click.INT, default=1,) +@verbosity_option(cls=ResourceOption) +def annotate(database, annotator, output_dir, force, array, **kwargs): + """Annotates a database. + The annotations are written in text file (json) format which can be read + back using :any:`bob.db.base.read_annotation_file` (annotation_type='json') + + \b + Parameters + ---------- + database : :any:`bob.bio.database` + The database that you want to annotate. Can be a ``bob.bio.database`` + entry point or a path to a Python file which contains a variable + named `database`. + annotator : callable + A function that takes the database and a sample (biofile) of the + database and returns the annotations in a dictionary. Can be a + ``bob.bio.annotator`` entry point or a path to a Python file which + contains a variable named `annotator`. + output_dir : str + The directory to save the annotations. + force : bool, optional + Whether to overwrite existing annotations. + array : int, optional + Use this option alongside gridtk to submit this script as an array job. + verbose : int, optional + Increases verbosity (see help for --verbose). + + \b + [CONFIG]... Configuration files. It is possible to pass one or + several Python files (or names of ``bob.bio.config`` + entry points) which contain the parameters listed + above as Python variables. The options through the + command-line (see below) will override the values of + configuration files. + """ + logger.debug('database: %s', database) + logger.debug('annotator: %s', annotator) + logger.debug('force: %s', force) + logger.debug('output_dir: %s', output_dir) + logger.debug('array: %s', array) + logger.debug('kwargs: %s', kwargs) + + biofiles = database.objects(groups=None, protocol=database.protocol) + biofiles = sorted(biofiles) + + if array > 1: + start, end = indices(biofiles, array) + biofiles = biofiles[start:end] + + total = len(biofiles) + logger.info("Saving annotations in %s", output_dir) + logger.info("Annotating %d samples ...", total) + + for i, biofile in enumerate(biofiles): + outpath = biofile.make_path(output_dir, '.json') + if isfile(outpath): + if force: + logger.debug("Overwriting the annotations file `%s'", outpath) + else: + logger.debug("The annotation `%s' already exists", outpath) + continue + + logger.info( + "Extracting annotations for sample %d out of %d: %s", i + 1, total, + outpath) + data = annotator.read_original_data( + biofile, database.original_directory, database.original_extension) + annot = annotator(data) + + create_directories_safe(dirname(outpath)) + with open(outpath, 'w') as f: + json.dump(annot, f, indent=1, allow_nan=False) diff --git a/bob/bio/base/script/bio.py b/bob/bio/base/script/bio.py new file mode 100644 index 0000000000000000000000000000000000000000..34022948039bb7e6f2aeab072899231c1885a6df --- /dev/null +++ b/bob/bio/base/script/bio.py @@ -0,0 +1,12 @@ +"""The main entry for bob.bio (click-based) scripts. +""" +import click +import pkg_resources +from click_plugins import with_plugins + + +@with_plugins(pkg_resources.iter_entry_points('bob.bio.cli')) +@click.group() +def bio(): + """Entry for bob.bio commands.""" + pass diff --git a/bob/bio/base/script/evaluate.py b/bob/bio/base/script/evaluate.py index f9c89c8c2c804ce8555508011749bfedfe6642c0..21c27ee876fac1eb0c6f9748b79993dd73f10c71 100644 --- a/bob/bio/base/script/evaluate.py +++ b/bob/bio/base/script/evaluate.py @@ -109,6 +109,7 @@ def command_line_arguments(command_line_parameters): def _add_far_labels(min_far): # compute and apply tick marks + assert min_far > 0 ticks = [min_far] while ticks[-1] < 1.: ticks.append(ticks[-1] * 10.) pyplot.xticks(ticks) @@ -116,7 +117,7 @@ def _add_far_labels(min_far): -def _plot_roc(frrs, colors, labels, title, fontsize=10, position=None, farfrrs=None): +def _plot_roc(frrs, colors, labels, title, fontsize=10, position=None, farfrrs=None, min_far=None): if position is None: position = 'lower right' figure = pyplot.figure() @@ -133,7 +134,7 @@ def _plot_roc(frrs, colors, labels, title, fontsize=10, position=None, farfrrs=N else: pyplot.plot([x[0] for x in farfrrs], [(1.-x[1]) for x in farfrrs], '--', color='black') - _add_far_labels(frrs[0][0][0]) + _add_far_labels(min_far) # set label, legend and title pyplot.xlabel('FMR') @@ -366,7 +367,7 @@ def main(command_line_parameters=None): # create a multi-page PDF for the ROC curve pdf = PdfPages(args.roc) # create a separate figure for dev and eval - pdf.savefig(_plot_roc(frrs_dev, colors, args.legends, args.title[0] if args.title is not None else "ROC for development set", args.legend_font_size, args.legend_position, args.far_line_at), bbox_inches='tight') + pdf.savefig(_plot_roc(frrs_dev, colors, args.legends, args.title[0] if args.title is not None else "ROC for development set", args.legend_font_size, args.legend_position, args.far_line_at, min_far=args.min_far_value), bbox_inches='tight') del frrs_dev if args.eval_files: if args.far_line_at is not None: @@ -376,7 +377,7 @@ def main(command_line_parameters=None): farfrrs.append(bob.measure.farfrr(scores_eval[i][0], scores_eval[i][1], threshold)) else: farfrrs = None - pdf.savefig(_plot_roc(frrs_eval, colors, args.legends, args.title[1] if args.title is not None else "ROC for evaluation set", args.legend_font_size, args.legend_position, farfrrs), bbox_inches='tight') + pdf.savefig(_plot_roc(frrs_eval, colors, args.legends, args.title[1] if args.title is not None else "ROC for evaluation set", args.legend_font_size, args.legend_position, farfrrs, min_far=args.min_far_value), bbox_inches='tight') del frrs_eval pdf.close() except RuntimeError as e: diff --git a/bob/bio/base/test/dummy/annotator.py b/bob/bio/base/test/dummy/annotator.py new file mode 100644 index 0000000000000000000000000000000000000000..e58f6c62d3cc8c030100db4301de85ef70f3b113 --- /dev/null +++ b/bob/bio/base/test/dummy/annotator.py @@ -0,0 +1,19 @@ +from bob.bio.base.annotator import FailSafe, Callable + + +def simple_annotator(image, **kwargs): + return { + 'topleft': (0, 0), + 'bottomright': image.shape, + } + + +def fail_annotator(image, **kwargs): + return {} + + +annotator = FailSafe( + [Callable(fail_annotator), + Callable(simple_annotator)], + required_keys=['topleft', 'bottomright'], +) diff --git a/bob/bio/base/test/test_annotators.py b/bob/bio/base/test/test_annotators.py new file mode 100644 index 0000000000000000000000000000000000000000..ee4cdc77f1872fffadc2a602c6f8f95b76909320 --- /dev/null +++ b/bob/bio/base/test/test_annotators.py @@ -0,0 +1,27 @@ +import tempfile +import os +import shutil +from click.testing import CliRunner +from bob.bio.base.script.annotate import annotate +from bob.db.base import read_annotation_file + + +def test_annotate(): + + try: + tmp_dir = tempfile.mkdtemp(prefix="bobtest_") + runner = CliRunner() + result = runner.invoke(annotate, args=( + '-d', 'dummy', '-a', 'dummy', '-o', tmp_dir)) + assert result.exit_code == 0, result.output + + # test if annotations exist + for dirpath, dirnames, filenames in os.walk(tmp_dir): + for filename in filenames: + path = os.path.join(dirpath, filename) + annot = read_annotation_file(path, 'json') + assert annot['topleft'] == [0, 0] + # size of atnt images + assert annot['bottomright'] == [112, 92] + finally: + shutil.rmtree(tmp_dir) diff --git a/bob/bio/base/utils/resources.py b/bob/bio/base/utils/resources.py index e2c5816d326830619c5978b33ccb47f7528bf3ec..5f6f4c138a11eba79d7defba80aa8b6ced3a21df 100644 --- a/bob/bio/base/utils/resources.py +++ b/bob/bio/base/utils/resources.py @@ -20,7 +20,7 @@ logger = logging.getLogger("bob.bio.base") #: Keywords for which resources are defined. -valid_keywords = ('database', 'preprocessor', 'extractor', 'algorithm', 'grid', 'config') +valid_keywords = ('database', 'preprocessor', 'extractor', 'algorithm', 'grid', 'config', 'annotator') def _collect_config(paths): @@ -132,7 +132,7 @@ def load_resource(resource, keyword, imports = ['bob.bio.base'], package_prefix= return read_config_file([resource], keyword) if keyword not in valid_keywords: - raise ValueError("The given keyword '%s' is not valid. Please use one of %s!" % (str(keyword), str(valid_keywords))) + logger.warning("The given keyword '%s' is not valid. Please use one of %s!", keyword, valid_keywords) # now, we check if the resource is registered as an entry point in the resource files entry_points = [entry_point for entry_point in _get_entry_points(keyword, package_prefix=package_prefix) if entry_point.name == resource] diff --git a/doc/annotations.rst b/doc/annotations.rst new file mode 100644 index 0000000000000000000000000000000000000000..851a2f2fe48911d18a376f7a7350384962ca8363 --- /dev/null +++ b/doc/annotations.rst @@ -0,0 +1,29 @@ +.. _bob.bio.base.annotations: + +============================== +Annotating biometric databases +============================== + +It is often required to annotate the biometric samples before running +experiments. This often happens in face biometrics where each face is detected +and location of landmarks on the face is saved prior to running experiments. + +To facilitate the process of annotating a new database, this package provides +a command-line script: + +.. code-block:: sh + + $ bob bio annotate --help + +This script accepts two main parameters a database object that inherits from +:any:`bob.bio.base.database.BioDatabase` and an annotator object that inherits +from :any:`bob.bio.base.annotator.Annotator`. Please see the help message of +the script for more information. + +The script can also be run in parallel using :ref:`gridtk`: + +.. code-block:: sh + + $ jman submit --array 64 -- bob bio annotate /path/to/config.py --array 64 + +The number that is given to the ``--array`` options should match. diff --git a/doc/extra-intersphinx.txt b/doc/extra-intersphinx.txt index 86d9ac680e99acbbc500e9fd7475a6fad46b5484..c087b9d736f5a34bc72de90874ca8c78bd13883d 100644 --- a/doc/extra-intersphinx.txt +++ b/doc/extra-intersphinx.txt @@ -1,5 +1,4 @@ python -numpy bob.bio.face bob.bio.gmm bob.bio.video diff --git a/doc/implemented.rst b/doc/implemented.rst index 84b358bf4c913d9f72b54a080b0bae3adf02af72..052e56deb9b1f7c0bb0f8fb12aafc59215c1cb82 100644 --- a/doc/implemented.rst +++ b/doc/implemented.rst @@ -15,6 +15,7 @@ Base Classes bob.bio.base.extractor.Extractor bob.bio.base.algorithm.Algorithm bob.bio.base.grid.Grid + bob.bio.base.annotator.Annotator Implementations @@ -38,7 +39,8 @@ Implementations bob.bio.base.database.BioDatabase bob.bio.base.database.ZTBioDatabase bob.bio.base.database.FileListBioDatabase - + bob.bio.base.annotator.FailSafe + bob.bio.base.annotator.Callable Preprocessors ------------- @@ -72,5 +74,10 @@ Grid Configuration .. adapted from http://stackoverflow.com/a/29789910/3301902 to ge a nice dictionary content view +Annotators +---------- + +.. automodule:: bob.bio.base.annotator + .. include:: links.rst diff --git a/doc/index.rst b/doc/index.rst index d4141fc693bece4455423b4517e268af0e7aa38c..3ccf7ff508c80f07a649ece07465705974209018 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -64,7 +64,6 @@ If you run biometric recognition experiments using the bob.bio framework, please } -=========== Users Guide =========== @@ -77,8 +76,9 @@ Users Guide implementation filelist-guide more + annotations + -================ Reference Manual ================ @@ -89,7 +89,7 @@ Reference Manual py_api -========== + References ========== @@ -101,7 +101,6 @@ References .. [GW09] *M. Günther and R.P. Würtz*. **Face detection and recognition using maximum likelihood classifiers on Gabor graphs**. International Journal of Pattern Recognition and Artificial Intelligence, 23(3):433-461, 2009. -========= ToDo-List ========= @@ -111,7 +110,7 @@ Here is a list of things that needs to be done: .. todolist:: -================== + Indices and tables ================== diff --git a/requirements.txt b/requirements.txt index 5f863c468a4d0b8e4e819b893f7a5baf3c46b72e..bc8552c051bfbe99ce18336104afbc26fc222734 100644 --- a/requirements.txt +++ b/requirements.txt @@ -9,5 +9,8 @@ bob.learn.linear bob.math bob.measure bob.sp +click +click-plugins +numpy scipy setuptools diff --git a/setup.py b/setup.py index 5b500cbd01d0d2edd86879c1c8633bb6418149ad..e93fd72f55d01a62b57200f3fb94bdc5b808e3b3 100644 --- a/setup.py +++ b/setup.py @@ -123,10 +123,25 @@ setup( 'demanding = bob.bio.base.config.grid.demanding:grid', 'gpu = bob.bio.base.config.grid.gpu:grid', ], + # declare database to bob 'bob.db': [ 'bio_filelist = bob.bio.base.database.filelist.driver:Interface', ], + # main entry for bob bio cli + 'bob.cli': [ + 'bio = bob.bio.base.script.bio:bio', + ], + + # bob bio scripts + 'bob.bio.cli': [ + 'annotate = bob.bio.base.script.annotate:annotate', + ], + + # annotators + 'bob.bio.annotator': [ + 'dummy = bob.bio.base.test.dummy.annotator:annotator', + ], }, # Classifiers are important if you plan to distribute this package through