...
  View open merge request
Commits (1)
  • Amir MOHAMMADI's avatar
    Add bob bio annotate-samples command · 769f5bc3
    Amir MOHAMMADI authored
    This command works very similar to bob bio annotate
    except that works without a db interface. Instead,
    it requires a list of samples and two functions to
    do the job.
    769f5bc3
......@@ -3,45 +3,87 @@
import logging
import json
import click
import functools
from os.path import dirname, isfile, expanduser
from bob.extension.scripts.click_helper import (
verbosity_option, ConfigCommand, ResourceOption, log_parameters)
verbosity_option,
ConfigCommand,
ResourceOption,
log_parameters,
)
from bob.io.base import create_directories_safe
from bob.bio.base.tools.grid import indices
logger = logging.getLogger(__name__)
ANNOTATE_EPILOG = '''\b
def annotate_common_options(func):
@click.option(
"--annotator",
"-a",
required=True,
cls=ResourceOption,
entry_point_group="bob.bio.annotator",
help="A callable that takes the database and a sample (biofile) "
"of the database and returns the annotations in a dictionary.",
)
@click.option(
"--output-dir",
"-o",
required=True,
cls=ResourceOption,
help="The directory to save the annotations.",
)
@click.option(
"--force",
"-f",
is_flag=True,
cls=ResourceOption,
help="Whether to overwrite existing annotations.",
)
@click.option(
"--array",
type=click.INT,
default=1,
cls=ResourceOption,
help="Use this option alongside gridtk to submit this script as an array job.",
)
@functools.wraps(func)
def wrapper(*args, **kwds):
return func(*args, **kwds)
return wrapper
@click.command(
entry_point_group="bob.bio.config",
cls=ConfigCommand,
epilog="""\b
Examples:
$ bob bio annotate -vvv -d <database> -a <annotator> -o /tmp/annotations
$ jman submit --array 64 -- bob bio annotate ... --array 64
'''
@click.command(entry_point_group='bob.bio.config', cls=ConfigCommand,
epilog=ANNOTATE_EPILOG)
@click.option('--database', '-d', required=True, cls=ResourceOption,
entry_point_group='bob.bio.database',
help='''The database that you want to annotate.''')
@click.option('--annotator', '-a', required=True, cls=ResourceOption,
entry_point_group='bob.bio.annotator',
help='A callable that takes the database and a sample (biofile) '
'of the database and returns the annotations in a dictionary.')
@click.option('--output-dir', '-o', required=True, cls=ResourceOption,
help='The directory to save the annotations.')
@click.option('--force', '-f', is_flag=True, cls=ResourceOption,
help='Whether to overwrite existing annotations.')
@click.option('--array', type=click.INT, default=1, cls=ResourceOption,
help='Use this option alongside gridtk to submit this script as '
'an array job.')
@click.option('--database-directories-file', cls=ResourceOption,
default=expanduser('~/.bob_bio_databases.txt'),
help='(Deprecated) To support loading of old databases.')
""",
)
@click.option(
"--database",
"-d",
required=True,
cls=ResourceOption,
entry_point_group="bob.bio.database",
help="""The database that you want to annotate.""",
)
@annotate_common_options
@click.option(
"--database-directories-file",
cls=ResourceOption,
default=expanduser("~/.bob_bio_databases.txt"),
help="(Deprecated) To support loading of old databases.",
)
@verbosity_option(cls=ResourceOption)
def annotate(database, annotator, output_dir, force, array,
database_directories_file, **kwargs):
def annotate(
database, annotator, output_dir, force, array, database_directories_file, **kwargs
):
"""Annotates a database.
The annotations are written in text file (json) format which can be read
......@@ -53,18 +95,103 @@ def annotate(database, annotator, output_dir, force, array,
database.replace_directories(database_directories_file)
biofiles = database.objects(groups=None, protocol=database.protocol)
biofiles = sorted(biofiles)
samples = sorted(biofiles)
def reader(biofile):
return annotator.read_original_data(
biofile, database.original_directory, database.original_extension
)
def make_path(biofile, output_dir):
return biofile.make_path(output_dir, ".json")
return annotate_generic(
samples, reader, make_path, annotator, output_dir, force, array
)
@click.command(
entry_point_group="bob.bio.config",
cls=ConfigCommand,
epilog="""\b
Examples:
$ bob bio annotate-samples -vvv config.py -a <annotator> -o /tmp/annotations
$ jman submit --array 64 -- bob bio annotate-samples ... --array 64
You have to define samples, reader, and make_path in a python file (config.py) as in
examples.
""",
)
@click.option(
"--samples",
required=True,
cls=ResourceOption,
help="A list of all samples that you want to annotate. The list must be sorted or "
"deterministic in consequent calls. This is needed so that this script works "
"correctly on the grid.",
)
@click.option(
"--reader",
required=True,
cls=ResourceOption,
help="A function with the signature of ``data = reader(sample)`` which takes a "
"sample and returns the loaded data. The data is given to the annotator.",
)
@click.option(
"--make-path",
required=True,
cls=ResourceOption,
help="A function with the signature of ``path = make_path(sample, output_dir)`` "
"which takes a sample and output_dir and returns the unique path for that sample "
"to be saved in output_dir. The extension of the path must be '.json'.",
)
@annotate_common_options
@verbosity_option(cls=ResourceOption)
def annotate_samples(
samples, reader, make_path, annotator, output_dir, force, array, **kwargs
):
"""Annotates a list of samples.
This command is very similar to ``bob bio annotate`` except that it works without a
database interface. You only need to provide a list of **sorted** samples to be
annotated and two functions::
def reader(sample):
# load data from sample here
# for example:
data = bob.io.base.load(sample)
# data will be given to the annotator
return data
def make_path(sample, output_dir):
# create a unique path for this sample in the output_dir
# for example:
return os.path.join(output_dir, str(sample) + ".json")
Please note that your samples must be a list and must be sorted!
"""
log_parameters(logger, ignore=("samples",))
logger.debug("len(samples): %d", len(samples))
return annotate_generic(
samples, reader, make_path, annotator, output_dir, force, array
)
def annotate_generic(samples, reader, make_path, annotator, output_dir, force, array):
if array > 1:
start, end = indices(biofiles, array)
biofiles = biofiles[start:end]
start, end = indices(samples, array)
samples = samples[start:end]
total = len(biofiles)
total = len(samples)
logger.info("Saving annotations in %s", output_dir)
logger.info("Annotating %d samples ...", total)
for i, biofile in enumerate(biofiles):
outpath = biofile.make_path(output_dir, '.json')
for i, sample in enumerate(samples):
outpath = make_path(sample, output_dir)
if not outpath.endswith(".json"):
outpath += ".json"
if isfile(outpath):
if force:
logger.info("Overwriting the annotations file `%s'", outpath)
......@@ -73,12 +200,11 @@ def annotate(database, annotator, output_dir, force, array,
continue
logger.info(
"Extracting annotations for sample %d out of %d: %s", i + 1, total,
outpath)
data = annotator.read_original_data(
biofile, database.original_directory, database.original_extension)
"Extracting annotations for sample %d out of %d: %s", i + 1, total, outpath
)
data = reader(sample)
annot = annotator(data)
create_directories_safe(dirname(outpath))
with open(outpath, 'w') as f:
with open(outpath, "w") as f:
json.dump(annot, f, indent=1, allow_nan=False)
......@@ -138,6 +138,7 @@ setup(
# bob bio scripts
'bob.bio.cli': [
'annotate = bob.bio.base.script.annotate:annotate',
'annotate-samples = bob.bio.base.script.annotate:annotate_samples',
'metrics = bob.bio.base.script.commands:metrics',
'multi-metrics = bob.bio.base.script.commands:multi_metrics',
'roc = bob.bio.base.script.commands:roc',
......