Skip to content
Snippets Groups Projects
Commit 8c353346 authored by Amir MOHAMMADI's avatar Amir MOHAMMADI
Browse files

Merge branch 'annotate-samples' into 'master'

Add bob bio annotate-samples command

See merge request !176
parents b203ba40 6a8f94c6
No related branches found
No related tags found
1 merge request!176Add bob bio annotate-samples command
Pipeline #36810 passed
...@@ -3,45 +3,87 @@ ...@@ -3,45 +3,87 @@
import logging import logging
import json import json
import click import click
import functools
from os.path import dirname, isfile, expanduser from os.path import dirname, isfile, expanduser
from bob.extension.scripts.click_helper import ( from bob.extension.scripts.click_helper import (
verbosity_option, ConfigCommand, ResourceOption, log_parameters) verbosity_option,
ConfigCommand,
ResourceOption,
log_parameters,
)
from bob.io.base import create_directories_safe from bob.io.base import create_directories_safe
from bob.bio.base.tools.grid import indices from bob.bio.base.tools.grid import indices
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
ANNOTATE_EPILOG = '''\b def annotate_common_options(func):
@click.option(
"--annotator",
"-a",
required=True,
cls=ResourceOption,
entry_point_group="bob.bio.annotator",
help="A callable that takes the database and a sample (biofile) "
"of the database and returns the annotations in a dictionary.",
)
@click.option(
"--output-dir",
"-o",
required=True,
cls=ResourceOption,
help="The directory to save the annotations.",
)
@click.option(
"--force",
"-f",
is_flag=True,
cls=ResourceOption,
help="Whether to overwrite existing annotations.",
)
@click.option(
"--array",
type=click.INT,
default=1,
cls=ResourceOption,
help="Use this option alongside gridtk to submit this script as an array job.",
)
@functools.wraps(func)
def wrapper(*args, **kwds):
return func(*args, **kwds)
return wrapper
@click.command(
entry_point_group="bob.bio.config",
cls=ConfigCommand,
epilog="""\b
Examples: Examples:
$ bob bio annotate -vvv -d <database> -a <annotator> -o /tmp/annotations $ bob bio annotate -vvv -d <database> -a <annotator> -o /tmp/annotations
$ jman submit --array 64 -- bob bio annotate ... --array 64 $ jman submit --array 64 -- bob bio annotate ... --array 64
''' """,
)
@click.option(
@click.command(entry_point_group='bob.bio.config', cls=ConfigCommand, "--database",
epilog=ANNOTATE_EPILOG) "-d",
@click.option('--database', '-d', required=True, cls=ResourceOption, required=True,
entry_point_group='bob.bio.database', cls=ResourceOption,
help='''The database that you want to annotate.''') entry_point_group="bob.bio.database",
@click.option('--annotator', '-a', required=True, cls=ResourceOption, help="""The database that you want to annotate.""",
entry_point_group='bob.bio.annotator', )
help='A callable that takes the database and a sample (biofile) ' @annotate_common_options
'of the database and returns the annotations in a dictionary.') @click.option(
@click.option('--output-dir', '-o', required=True, cls=ResourceOption, "--database-directories-file",
help='The directory to save the annotations.') cls=ResourceOption,
@click.option('--force', '-f', is_flag=True, cls=ResourceOption, default=expanduser("~/.bob_bio_databases.txt"),
help='Whether to overwrite existing annotations.') help="(Deprecated) To support loading of old databases.",
@click.option('--array', type=click.INT, default=1, cls=ResourceOption, )
help='Use this option alongside gridtk to submit this script as '
'an array job.')
@click.option('--database-directories-file', cls=ResourceOption,
default=expanduser('~/.bob_bio_databases.txt'),
help='(Deprecated) To support loading of old databases.')
@verbosity_option(cls=ResourceOption) @verbosity_option(cls=ResourceOption)
def annotate(database, annotator, output_dir, force, array, def annotate(
database_directories_file, **kwargs): database, annotator, output_dir, force, array, database_directories_file, **kwargs
):
"""Annotates a database. """Annotates a database.
The annotations are written in text file (json) format which can be read The annotations are written in text file (json) format which can be read
...@@ -53,18 +95,103 @@ def annotate(database, annotator, output_dir, force, array, ...@@ -53,18 +95,103 @@ def annotate(database, annotator, output_dir, force, array,
database.replace_directories(database_directories_file) database.replace_directories(database_directories_file)
biofiles = database.objects(groups=None, protocol=database.protocol) biofiles = database.objects(groups=None, protocol=database.protocol)
biofiles = sorted(biofiles) samples = sorted(biofiles)
def reader(biofile):
return annotator.read_original_data(
biofile, database.original_directory, database.original_extension
)
def make_path(biofile, output_dir):
return biofile.make_path(output_dir, ".json")
return annotate_generic(
samples, reader, make_path, annotator, output_dir, force, array
)
@click.command(
entry_point_group="bob.bio.config",
cls=ConfigCommand,
epilog="""\b
Examples:
$ bob bio annotate-samples -vvv config.py -a <annotator> -o /tmp/annotations
$ jman submit --array 64 -- bob bio annotate-samples ... --array 64
You have to define samples, reader, and make_path in a python file (config.py) as in
examples.
""",
)
@click.option(
"--samples",
required=True,
cls=ResourceOption,
help="A list of all samples that you want to annotate. The list must be sorted or "
"deterministic in consequent calls. This is needed so that this script works "
"correctly on the grid.",
)
@click.option(
"--reader",
required=True,
cls=ResourceOption,
help="A function with the signature of ``data = reader(sample)`` which takes a "
"sample and returns the loaded data. The data is given to the annotator.",
)
@click.option(
"--make-path",
required=True,
cls=ResourceOption,
help="A function with the signature of ``path = make_path(sample, output_dir)`` "
"which takes a sample and output_dir and returns the unique path for that sample "
"to be saved in output_dir. The extension of the path must be '.json'.",
)
@annotate_common_options
@verbosity_option(cls=ResourceOption)
def annotate_samples(
samples, reader, make_path, annotator, output_dir, force, array, **kwargs
):
"""Annotates a list of samples.
This command is very similar to ``bob bio annotate`` except that it works without a
database interface. You only need to provide a list of **sorted** samples to be
annotated and two functions::
def reader(sample):
# load data from sample here
# for example:
data = bob.io.base.load(sample)
# data will be given to the annotator
return data
def make_path(sample, output_dir):
# create a unique path for this sample in the output_dir
# for example:
return os.path.join(output_dir, str(sample) + ".json")
Please note that your samples must be a list and must be sorted!
"""
log_parameters(logger, ignore=("samples",))
logger.debug("len(samples): %d", len(samples))
return annotate_generic(
samples, reader, make_path, annotator, output_dir, force, array
)
def annotate_generic(samples, reader, make_path, annotator, output_dir, force, array):
if array > 1: if array > 1:
start, end = indices(biofiles, array) start, end = indices(samples, array)
biofiles = biofiles[start:end] samples = samples[start:end]
total = len(biofiles) total = len(samples)
logger.info("Saving annotations in %s", output_dir) logger.info("Saving annotations in %s", output_dir)
logger.info("Annotating %d samples ...", total) logger.info("Annotating %d samples ...", total)
for i, biofile in enumerate(biofiles): for i, sample in enumerate(samples):
outpath = biofile.make_path(output_dir, '.json') outpath = make_path(sample, output_dir)
if not outpath.endswith(".json"):
outpath += ".json"
if isfile(outpath): if isfile(outpath):
if force: if force:
logger.info("Overwriting the annotations file `%s'", outpath) logger.info("Overwriting the annotations file `%s'", outpath)
...@@ -73,12 +200,11 @@ def annotate(database, annotator, output_dir, force, array, ...@@ -73,12 +200,11 @@ def annotate(database, annotator, output_dir, force, array,
continue continue
logger.info( logger.info(
"Extracting annotations for sample %d out of %d: %s", i + 1, total, "Extracting annotations for sample %d out of %d: %s", i + 1, total, outpath
outpath) )
data = annotator.read_original_data( data = reader(sample)
biofile, database.original_directory, database.original_extension)
annot = annotator(data) annot = annotator(data)
create_directories_safe(dirname(outpath)) create_directories_safe(dirname(outpath))
with open(outpath, 'w') as f: with open(outpath, "w") as f:
json.dump(annot, f, indent=1, allow_nan=False) json.dump(annot, f, indent=1, allow_nan=False)
...@@ -138,6 +138,7 @@ setup( ...@@ -138,6 +138,7 @@ setup(
# bob bio scripts # bob bio scripts
'bob.bio.cli': [ 'bob.bio.cli': [
'annotate = bob.bio.base.script.annotate:annotate', 'annotate = bob.bio.base.script.annotate:annotate',
'annotate-samples = bob.bio.base.script.annotate:annotate_samples',
'metrics = bob.bio.base.script.commands:metrics', 'metrics = bob.bio.base.script.commands:metrics',
'multi-metrics = bob.bio.base.script.commands:multi_metrics', 'multi-metrics = bob.bio.base.script.commands:multi_metrics',
'roc = bob.bio.base.script.commands:roc', 'roc = bob.bio.base.script.commands:roc',
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment