From 870a416b081fd5fef2723dab86e0610349597099 Mon Sep 17 00:00:00 2001
From: Amir MOHAMMADI <amir.mohammadi@idiap.ch>
Date: Fri, 2 Feb 2018 15:29:33 +0100
Subject: [PATCH] Add annotator support

---
 bob/bio/base/__init__.py             |  1 +
 bob/bio/base/annotator/Base.py       | 41 ++++++++++++
 bob/bio/base/annotator/Callable.py   | 13 ++++
 bob/bio/base/annotator/FailSafe.py   | 45 ++++++++++++++
 bob/bio/base/annotator/__init__.py   | 31 ++++++++++
 bob/bio/base/script/annotate.py      | 93 ++++++++++++++++++++++++++++
 bob/bio/base/test/dummy/annotator.py | 19 ++++++
 bob/bio/base/test/test_annotators.py | 27 ++++++++
 doc/annotations.rst                  | 28 +++++++++
 doc/extra-intersphinx.txt            |  1 -
 doc/implemented.rst                  | 10 ++-
 doc/index.rst                        | 13 ++--
 doc/py_api.rst                       |  1 -
 requirements.txt                     |  3 +
 setup.py                             | 13 +++-
 15 files changed, 327 insertions(+), 12 deletions(-)
 create mode 100644 bob/bio/base/annotator/Base.py
 create mode 100644 bob/bio/base/annotator/Callable.py
 create mode 100644 bob/bio/base/annotator/FailSafe.py
 create mode 100644 bob/bio/base/annotator/__init__.py
 create mode 100644 bob/bio/base/script/annotate.py
 create mode 100644 bob/bio/base/test/dummy/annotator.py
 create mode 100644 bob/bio/base/test/test_annotators.py
 create mode 100644 doc/annotations.rst

diff --git a/bob/bio/base/__init__.py b/bob/bio/base/__init__.py
index 5b2461a9..9ef81253 100644
--- a/bob/bio/base/__init__.py
+++ b/bob/bio/base/__init__.py
@@ -5,6 +5,7 @@ from . import extractor
 from . import algorithm
 from . import tools
 from . import grid # only one file, not complete directory
+from . import annotator
 
 from . import script
 from . import test
diff --git a/bob/bio/base/annotator/Base.py b/bob/bio/base/annotator/Base.py
new file mode 100644
index 00000000..c40e028a
--- /dev/null
+++ b/bob/bio/base/annotator/Base.py
@@ -0,0 +1,41 @@
+from bob.bio.base import read_original_data as base_read
+import numpy  # for documentation
+
+
+class Base(object):
+    """Base class for all annotators. This class is meant to be used in
+    conjunction with the bob bio annotate script.
+
+    Attributes
+    ----------
+    read_original_data : callable
+        A function that loads the samples. The syntax is like
+        :any:`bob.bio.base.read_original_data`.
+    """
+
+    def __init__(self, read_original_data=None, **kwargs):
+        super(Base, self).__init__(**kwargs)
+        self.read_original_data = read_original_data or base_read
+
+    def annotate(self, sample, **kwargs):
+        """Annotates a sample and returns annotations in a dictionary.
+
+        Parameters
+        ----------
+        sample : numpy.ndarray
+            The sample that is being annotated.
+        **kwargs
+            The extra arguments that may be passed.
+
+        Returns
+        -------
+        dict
+            A dictionary containing the annotations of the biometric sample. If
+            the program fails to annotate the sample, it should return an empty
+            dictionary.
+        """
+        raise NotImplementedError
+
+    # Alisa call to annotate
+    def __call__(self, sample, **kwargs):
+        return self.annotate(sample, **kwargs)
diff --git a/bob/bio/base/annotator/Callable.py b/bob/bio/base/annotator/Callable.py
new file mode 100644
index 00000000..b4736d2c
--- /dev/null
+++ b/bob/bio/base/annotator/Callable.py
@@ -0,0 +1,13 @@
+from .Base import Base
+
+
+class Callable(Base):
+    """A class that wraps a callable object that annotates a sample into a
+    bob.bio.annotator object."""
+
+    def __init__(self, callable, **kwargs):
+        super(Callable, self).__init__(**kwargs)
+        self.callable = callable
+
+    def annotate(self, sample, **kwargs):
+        return self.callable(sample, **kwargs)
diff --git a/bob/bio/base/annotator/FailSafe.py b/bob/bio/base/annotator/FailSafe.py
new file mode 100644
index 00000000..6b9cec69
--- /dev/null
+++ b/bob/bio/base/annotator/FailSafe.py
@@ -0,0 +1,45 @@
+import logging
+from . import Base
+
+logger = logging.getLogger(__name__)
+
+
+class FailSafe(Base):
+    """A fail-safe annotator.
+    This annotator takes a list of annotator and tries them until you get your
+    annotations.
+    The annotations of previous annotator is passed to the next one.
+
+    Attributes
+    ----------
+    annotators : list
+        A list of annotators to try
+    required_keys : list
+        A list of keys that should be available in annotations to stop trying
+        different annotators.
+    """
+
+    def __init__(self, annotators, required_keys, **kwargs):
+        super(FailSafe, self).__init__(**kwargs)
+        self.annotators = list(annotators)
+        self.required_keys = list(required_keys)
+
+    def annotate(self, sample, **kwargs):
+        if 'annotations' not in kwargs or kwargs['annotations'] is None:
+            kwargs['annotations'] = {}
+        for annotator in self.annotators:
+            try:
+                annotations = annotator(sample, **kwargs)
+            except Exception:
+                logger.debug(
+                    "The annotator `%s' failed to annotate!", annotator,
+                    exc_info=True)
+                annotations = {}
+            if not annotations:
+                logger.debug(
+                    "Annotator `%s' returned empty annotations.", annotator)
+            kwargs['annotations'].update(annotations)
+            # check if we have all the required annotations
+            if all(key in kwargs['annotations'] for key in self.required_keys):
+                break
+        return kwargs['annotations']
diff --git a/bob/bio/base/annotator/__init__.py b/bob/bio/base/annotator/__init__.py
new file mode 100644
index 00000000..e63b7dcc
--- /dev/null
+++ b/bob/bio/base/annotator/__init__.py
@@ -0,0 +1,31 @@
+from .Base import Base
+from .FailSafe import FailSafe
+from .Callable import Callable
+
+
+# gets sphinx autodoc done right - don't remove it
+def __appropriate__(*args):
+    """Says object was actually declared here, and not in the import module.
+    Fixing sphinx warnings of not being able to find classes, when path is
+    shortened.
+
+    Parameters
+    ----------
+    *args
+        An iterable of objects to modify
+
+    Resolves `Sphinx referencing issues
+    <https://github.com/sphinx-doc/sphinx/issues/3048>`
+    """
+
+    for obj in args:
+        obj.__module__ = __name__
+
+
+__appropriate__(
+    Base,
+    FailSafe,
+    Callable,
+)
+
+__all__ = [_ for _ in dir() if not _.startswith('_')]
diff --git a/bob/bio/base/script/annotate.py b/bob/bio/base/script/annotate.py
new file mode 100644
index 00000000..35f3f16d
--- /dev/null
+++ b/bob/bio/base/script/annotate.py
@@ -0,0 +1,93 @@
+"""A script to help annotate databases.
+"""
+import logging
+import json
+import click
+from os.path import dirname, isfile
+from bob.extension.scripts.click_helper import (
+    verbosity_option, Command, Option)
+from bob.io.base import create_directories_safe
+from bob.bio.base.tools.grid import indices
+
+logger = logging.getLogger(__name__)
+
+
+@click.command(entry_point_group='bob.bio.config', cls=Command)
+@click.option('--database', '-d', required=True, cls=Option,
+              entry_point_group='bob.bio.database')
+@click.option('--annotator', '-a', required=True, cls=Option,
+              entry_point_group='bob.bio.annotator')
+@click.option('--output-dir', '-o', required=True, cls=Option)
+@click.option('--force', '-f', is_flag=True, cls=Option)
+@click.option('--array', type=click.INT, default=1,)
+@verbosity_option(cls=Option)
+def annotate(database, annotator, output_dir, force, array, **kwargs):
+    """Annotates a database.
+    The annotations are written in text file (json) format which can be read
+    back using :any:`bob.db.base.read_annotation_file` (annotation_type='json')
+
+    \b
+    Parameters
+    ----------
+    database : :any:`bob.bio.database`
+        The database that you want to annotate. Can be a ``bob.bio.database``
+        entry point or a path to a Python file which contains a variable
+        named `database`.
+    annotator : callable
+        A function that takes the database and a sample (biofile) of the
+        database and returns the annotations in a dictionary. Can be a
+        ``bob.bio.annotator`` entry point or a path to a Python file which
+        contains a variable named `annotator`.
+    output_dir : str
+        The directory to save the annotations.
+    force : bool, optional
+        Wether to overwrite existing annotations.
+    array : int, optional
+        Use this option alongside gridtk to submit this script as an array job.
+    verbose : int, optional
+        Increases verbosity (see help for --verbose).
+
+    \b
+    [CONFIG]...            Configuration files. It is possible to pass one or
+                           several Python files (or names of ``bob.bio.config``
+                           entry points) which contain the parameters listed
+                           above as Python variables. The options through the
+                           command-line (see below) will override the values of
+                           configuration files.
+    """
+    logger.debug('database: %s', database)
+    logger.debug('annotator: %s', annotator)
+    logger.debug('force: %s', force)
+    logger.debug('output_dir: %s', output_dir)
+    logger.debug('array: %s', array)
+    logger.debug('kwargs: %s', kwargs)
+
+    biofiles = database.objects(groups=None, protocol=database.protocol)
+    biofiles = sorted(biofiles)
+
+    if array > 1:
+        start, end = indices(biofiles, array)
+        biofiles = biofiles[start:end]
+
+    total = len(biofiles)
+    logger.info("Saving annotations in %s", output_dir)
+    logger.info("Annotating %d samples ...", total)
+
+    for i, biofile in enumerate(biofiles):
+        outpath = biofile.make_path(output_dir, '.json')
+        if isfile(outpath):
+            if force:
+                logger.debug("Overwriting the annotations file `%s'", outpath)
+            else:
+                logger.debug("The annotation `%s' already exists", outpath)
+                continue
+
+        logger.info(
+            "Extracting annotations for sample %d out of %d", i + 1, total)
+        data = annotator.read_original_data(
+            biofile, database.original_directory, database.original_extension)
+        annot = annotator(data)
+
+        create_directories_safe(dirname(outpath))
+        with open(outpath, 'w') as f:
+            json.dump(annot, f, indent=1, allow_nan=False)
diff --git a/bob/bio/base/test/dummy/annotator.py b/bob/bio/base/test/dummy/annotator.py
new file mode 100644
index 00000000..e58f6c62
--- /dev/null
+++ b/bob/bio/base/test/dummy/annotator.py
@@ -0,0 +1,19 @@
+from bob.bio.base.annotator import FailSafe, Callable
+
+
+def simple_annotator(image, **kwargs):
+    return {
+        'topleft': (0, 0),
+        'bottomright': image.shape,
+    }
+
+
+def fail_annotator(image, **kwargs):
+    return {}
+
+
+annotator = FailSafe(
+    [Callable(fail_annotator),
+     Callable(simple_annotator)],
+    required_keys=['topleft', 'bottomright'],
+)
diff --git a/bob/bio/base/test/test_annotators.py b/bob/bio/base/test/test_annotators.py
new file mode 100644
index 00000000..ee4cdc77
--- /dev/null
+++ b/bob/bio/base/test/test_annotators.py
@@ -0,0 +1,27 @@
+import tempfile
+import os
+import shutil
+from click.testing import CliRunner
+from bob.bio.base.script.annotate import annotate
+from bob.db.base import read_annotation_file
+
+
+def test_annotate():
+
+    try:
+        tmp_dir = tempfile.mkdtemp(prefix="bobtest_")
+        runner = CliRunner()
+        result = runner.invoke(annotate, args=(
+            '-d', 'dummy', '-a', 'dummy', '-o', tmp_dir))
+        assert result.exit_code == 0, result.output
+
+        # test if annotations exist
+        for dirpath, dirnames, filenames in os.walk(tmp_dir):
+            for filename in filenames:
+                path = os.path.join(dirpath, filename)
+                annot = read_annotation_file(path, 'json')
+                assert annot['topleft'] == [0, 0]
+                # size of atnt images
+                assert annot['bottomright'] == [112, 92]
+    finally:
+        shutil.rmtree(tmp_dir)
diff --git a/doc/annotations.rst b/doc/annotations.rst
new file mode 100644
index 00000000..1bd5bfa1
--- /dev/null
+++ b/doc/annotations.rst
@@ -0,0 +1,28 @@
+.. _bob.bio.base.annotations:
+
+Annotating biometric databases
+==============================
+
+It is often required to annotate the biometric samples before running
+experiments. This often happens in face biometrics where each face is detected
+and location of landmarks on the face is saved prior to running experiments.
+
+To facilitate the process of annotating a new database, this package provides
+a command-line script:
+
+.. code-block:: sh
+
+    $ bob bio annotate --help
+
+This script accepts two main parameters a database object that inherits from
+:any:`bob.bio.base.database.BioDatabase` and an annotator object that inherits
+from :any:`bob.bio.base.annotator.Base`. Please see the help message of the
+script for more information.
+
+The script can also be run in parallel using :ref:`gridtk`:
+
+.. code-block:: sh
+
+    $ jman submit --array 64 -- bob bio annotate /path/to/config.py --array 64
+
+The number that is given to the ``--array`` options should match.
diff --git a/doc/extra-intersphinx.txt b/doc/extra-intersphinx.txt
index 86d9ac68..c087b9d7 100644
--- a/doc/extra-intersphinx.txt
+++ b/doc/extra-intersphinx.txt
@@ -1,5 +1,4 @@
 python
-numpy
 bob.bio.face
 bob.bio.gmm
 bob.bio.video
diff --git a/doc/implemented.rst b/doc/implemented.rst
index 84b358bf..e0f68d0d 100644
--- a/doc/implemented.rst
+++ b/doc/implemented.rst
@@ -1,6 +1,5 @@
 .. _bob.bio.base.implemented:
 
-=================================
 Tools implemented in bob.bio.base
 =================================
 
@@ -15,6 +14,7 @@ Base Classes
    bob.bio.base.extractor.Extractor
    bob.bio.base.algorithm.Algorithm
    bob.bio.base.grid.Grid
+   bob.bio.base.annotator.Base
 
 
 Implementations
@@ -38,7 +38,8 @@ Implementations
    bob.bio.base.database.BioDatabase
    bob.bio.base.database.ZTBioDatabase
    bob.bio.base.database.FileListBioDatabase
-
+   bob.bio.base.annotator.FailSafe
+   bob.bio.base.annotator.Callable
 
 Preprocessors
 -------------
@@ -72,5 +73,10 @@ Grid Configuration
 
    .. adapted from http://stackoverflow.com/a/29789910/3301902 to ge a nice dictionary content view
 
+Annotators
+----------
+
+.. automodule:: bob.bio.base.annotator
+
 
 .. include:: links.rst
diff --git a/doc/index.rst b/doc/index.rst
index d4141fc6..01d80cae 100644
--- a/doc/index.rst
+++ b/doc/index.rst
@@ -4,9 +4,9 @@
 
 .. _bob.bio.base:
 
-===========================================
+*******************************************
  Running Biometric Recognition Experiments
-===========================================
+*******************************************
 
 The ``bob.bio`` packages provide open source tools to run comparable and reproducible biometric recognition experiments.
 To design a biometric recognition experiment, you must choose:
@@ -64,7 +64,6 @@ If you run biometric recognition experiments using the bob.bio framework, please
   }
 
 
-===========
 Users Guide
 ===========
 
@@ -77,8 +76,9 @@ Users Guide
    implementation
    filelist-guide
    more
+   annotations
+
 
-================
 Reference Manual
 ================
 
@@ -89,7 +89,7 @@ Reference Manual
    py_api
 
 
-==========
+
 References
 ==========
 
@@ -101,7 +101,6 @@ References
 .. [GW09]    *M. Günther and R.P. Würtz*. **Face detection and recognition using maximum likelihood classifiers on Gabor graphs**. International Journal of Pattern Recognition and Artificial Intelligence, 23(3):433-461, 2009.
 
 
-=========
 ToDo-List
 =========
 
@@ -111,7 +110,7 @@ Here is a list of things that needs to be done:
 .. todolist::
 
 
-==================
+
 Indices and tables
 ==================
 
diff --git a/doc/py_api.rst b/doc/py_api.rst
index 13daaa8f..6483e2ca 100644
--- a/doc/py_api.rst
+++ b/doc/py_api.rst
@@ -1,5 +1,4 @@
 
-===========================
 Python API for bob.bio.base
 ===========================
 
diff --git a/requirements.txt b/requirements.txt
index 5f863c46..bc8552c0 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -9,5 +9,8 @@ bob.learn.linear
 bob.math
 bob.measure
 bob.sp
+click
+click-plugins
+numpy
 scipy
 setuptools
diff --git a/setup.py b/setup.py
index 95a397db..e93fd72f 100644
--- a/setup.py
+++ b/setup.py
@@ -123,14 +123,25 @@ setup(
         'demanding         = bob.bio.base.config.grid.demanding:grid',
         'gpu               = bob.bio.base.config.grid.gpu:grid',
       ],
+
       # declare database to bob
       'bob.db': [
         'bio_filelist      = bob.bio.base.database.filelist.driver:Interface',
       ],
-      # main entry for bob cli
+      # main entry for bob bio cli
       'bob.cli': [
         'bio               = bob.bio.base.script.bio:bio',
       ],
+
+      # bob bio scripts
+      'bob.bio.cli': [
+        'annotate          = bob.bio.base.script.annotate:annotate',
+      ],
+
+      # annotators
+      'bob.bio.annotator': [
+        'dummy             = bob.bio.base.test.dummy.annotator:annotator',
+      ],
    },
 
     # Classifiers are important if you plan to distribute this package through
-- 
GitLab