annotate.py 6.87 KB
Newer Older
Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
1
2
"""A script to help annotate databases.
"""
Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
3
4
import functools
import json
Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
5
import logging
Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
6

Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
7
import click
Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
8

Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
9
from bob.extension.scripts.click_helper import (
10
11
12
    ConfigCommand,
    ResourceOption,
    log_parameters,
Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
13
    verbosity_option,
14
)
Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
15
from bob.pipelines import DelayedSample, ToDaskBag, wrap
Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
16

Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
17
18
logger = logging.getLogger(__name__)

Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
19

20
21
def save_json(data, path):
    """
22
    Saves a dictionary ``data`` in a json file at ``path``.
23
24
25
26
    """
    with open(path, "w") as f:
        json.dump(data, f)

Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
27

28
29
def load_json(path):
    """
30
    Returns a dictionary from a json file at ``path``.
31
32
33
34
    """
    with open(path, "r") as f:
        return json.load(f)

Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
35

36
37
38
39
40
41
42
def annotate_common_options(func):
    @click.option(
        "--annotator",
        "-a",
        required=True,
        cls=ResourceOption,
        entry_point_group="bob.bio.annotator",
43
        help="An annotator (instance of class inheriting from "
Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
44
        "bob.bio.base.Annotator) or an annotator resource name.",
45
46
47
48
49
50
51
52
    )
    @click.option(
        "--output-dir",
        "-o",
        required=True,
        cls=ResourceOption,
        help="The directory to save the annotations.",
    )
53
54
55
56
57
58
    @click.option(
        "--dask-client",
        "-l",
        "dask_client",
        entry_point_group="dask.client",
        help="Dask client for the execution of the pipeline. If not specified, "
Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
59
        "uses a single threaded, local Dask Client.",
60
61
        cls=ResourceOption,
    )
62
63
64
65
66
67
68
69
70
71
72
    @functools.wraps(func)
    def wrapper(*args, **kwds):
        return func(*args, **kwds)

    return wrapper


@click.command(
    entry_point_group="bob.bio.config",
    cls=ConfigCommand,
    epilog="""\b
73
74
75
Examples:

  $ bob bio annotate -vvv -d <database> -a <annotator> -o /tmp/annotations
76
77
78
79
80
81
82
83
""",
)
@click.option(
    "--database",
    "-d",
    required=True,
    cls=ResourceOption,
    entry_point_group="bob.bio.database",
84
    help="Biometric Database (class that implements the methods: "
Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
85
    "`background_model_samples`, `references` and `probes`).",
86
87
)
@click.option(
88
89
90
    "--groups",
    "-g",
    multiple=True,
91
    help="Biometric Database group that will be annotated. Can be added multiple"
92
    "times for different groups. [Default: All groups]",
93
)
94
@annotate_common_options
Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
95
@verbosity_option(cls=ResourceOption)
Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
96
def annotate(database, groups, annotator, output_dir, dask_client, **kwargs):
Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
97
    """Annotates a database.
98

Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
99
    The annotations are written in text file (json) format which can be read
100
    back using :any:`read_annotation_file` (annotation_type='json')
Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
101
    """
102
    log_parameters(logger)
Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
103

104
    # Allows passing of Sample objects as parameters
105
    annotator = wrap(["sample"], annotator, output_attribute="annotations")
106
107
108

    # Will save the annotations in the `data` fields to a json file
    annotator = wrap(
Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
109
110
        ["checkpoint"],
        annotator,
111
112
113
114
115
        features_dir=output_dir,
        extension=".json",
        save_func=save_json,
        load_func=load_json,
        sample_attribute="annotations",
116
    )
Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
117

118
119
    # Allows reception of Dask Bags
    annotator = wrap(["dask"], annotator)
120

121
    # Transformer that splits the samples into several Dask Bags
122
    to_dask_bags = ToDaskBag(npartitions=50)
123

124
125
    logger.debug("Retrieving samples from database.")
    samples = database.all_samples(groups)
126
127
128

    # Sets the scheduler to local if no dask_client is specified
    if dask_client is not None:
Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
129
        scheduler = dask_client
130
    else:
Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
131
        scheduler = "single-threaded"
132

133
134
135
    # Splits the samples list into bags
    dask_bags = to_dask_bags.transform(samples)

136
137
138
139
    logger.info(f"Saving annotations in {output_dir}.")
    logger.info(f"Annotating {len(samples)} samples...")
    annotator.transform(dask_bags).compute(scheduler=scheduler)

140
    logger.info("All annotations written.")
141
142
143
144
145
146
147
148
149


@click.command(
    entry_point_group="bob.bio.config",
    cls=ConfigCommand,
    epilog="""\b
Examples:

  $ bob bio annotate-samples -vvv config.py -a <annotator> -o /tmp/annotations
Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
150

151
152
You have to define ``samples``, ``reader``, and ``make_key`` in python files
(config.py) as in examples.
153
154
155
156
""",
)
@click.option(
    "--samples",
157
    entry_point_group="bob.bio.config",
158
159
    required=True,
    cls=ResourceOption,
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
    help="A list of all samples that you want to annotate. They will be passed "
    "as is to the ``reader`` and ``make-key`` functions.",
)
@click.option(
    "--reader",
    required=True,
    cls=ResourceOption,
    help="A function with the signature of ``data = reader(sample)`` which "
    "takes a sample and returns the loaded data. The returned data is given to "
    "the annotator.",
)
@click.option(
    "--make-key",
    required=True,
    cls=ResourceOption,
    help="A function with the signature of ``key = make_key(sample)`` which "
    "takes a sample and returns a unique str identifier for that sample that "
    "will be use to save it in output_dir. ``key`` generally is the relative "
    "path to a sample's file from the dataset's root directory.",
179
180
181
182
)
@annotate_common_options
@verbosity_option(cls=ResourceOption)
def annotate_samples(
183
    samples, reader, make_key, annotator, output_dir, dask_client, **kwargs
184
185
186
):
    """Annotates a list of samples.

187
    This command is very similar to ``bob bio annotate`` except that it works
188
189
190
191
192
193
194
195
196
197
198
199
200
201
    without a database interface. You must provide a list of samples as well as
    two functions:

        def reader(sample):
            # Loads data from a sample.
            # for example:
            data = bob.io.base.load(sample)
            # data will be given to the annotator
            return data

        def make_key(sample):
            # Creates a unique str identifier for this sample.
            # for example:
            return str(sample)
202
203
204
    """
    log_parameters(logger, ignore=("samples",))

205
    # Allows passing of Sample objects as parameters
206
    annotator = wrap(["sample"], annotator, output_attribute="annotations")
207
208
209

    # Will save the annotations in the `data` fields to a json file
    annotator = wrap(
210
211
212
213
214
215
216
        bases=["checkpoint"],
        estimator=annotator,
        features_dir=output_dir,
        extension=".json",
        save_func=save_json,
        load_func=load_json,
        sample_attribute="annotations",
217
    )
218

219
220
    # Allows reception of Dask Bags
    annotator = wrap(["dask"], annotator)
221

222
    # Transformer that splits the samples into several Dask Bags
223
    to_dask_bags = ToDaskBag(npartitions=50)
224

225
    if dask_client is not None:
Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
226
        scheduler = dask_client
227
    else:
Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
228
        scheduler = "single-threaded"
229

230
231
232
    # Converts samples into a list of DelayedSample objects
    samples_obj = [
        DelayedSample(
Amir MOHAMMADI's avatar
Amir MOHAMMADI committed
233
            load=functools.partial(reader, s),
234
235
236
237
            key=make_key(s),
        )
        for s in samples
    ]
238

239
240
241
    # Splits the samples list into bags
    dask_bags = to_dask_bags.transform(samples_obj)

242
    logger.info(f"Saving annotations in {output_dir}")
243
    logger.info(f"Annotating {len(samples_obj)} samples...")
244
245
    annotator.transform(dask_bags).compute(scheduler=scheduler)

246
    logger.info("All annotations written.")