Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
bob
bob.bio.base
Commits
854eb190
Commit
854eb190
authored
Nov 09, 2020
by
Yannick DAYER
Browse files
annotate-samples now uses custom 'read' function.
parent
c21b1b03
Pipeline
#45107
failed with stage
in 1 minute
Changes
3
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
bob/bio/base/script/annotate.py
View file @
854eb190
...
...
@@ -10,7 +10,7 @@ from bob.extension.scripts.click_helper import (
ResourceOption
,
log_parameters
,
)
from
bob.pipelines
import
wrap
,
ToDaskBag
from
bob.pipelines
import
wrap
,
ToDaskBag
,
DelayedSample
logger
=
logging
.
getLogger
(
__name__
)
def
save_json
(
data
,
path
):
...
...
@@ -44,13 +44,6 @@ def annotate_common_options(func):
cls
=
ResourceOption
,
help
=
"The directory to save the annotations."
,
)
@
click
.
option
(
"--force"
,
"-f"
,
is_flag
=
True
,
cls
=
ResourceOption
,
help
=
"Whether to overwrite existing annotations."
,
)
@
click
.
option
(
"--dask-client"
,
"-l"
,
...
...
@@ -96,7 +89,7 @@ Examples:
@
annotate_common_options
@
verbosity_option
(
cls
=
ResourceOption
)
def
annotate
(
database
,
groups
,
annotator
,
output_dir
,
force
,
dask_client
,
**
kwargs
database
,
groups
,
annotator
,
output_dir
,
dask_client
,
**
kwargs
):
"""Annotates a database.
...
...
@@ -123,7 +116,7 @@ def annotate(
annotator
=
wrap
([
"dask"
],
annotator
)
# Transformer that splits the samples into several Dask Bags
to_dask_bags
=
ToDaskBag
()
to_dask_bags
=
ToDaskBag
(
npartitions
=
50
)
logger
.
debug
(
"Retrieving background model samples from database."
)
...
...
@@ -138,8 +131,16 @@ def annotate(
# Unravels all samples in one list (no SampleSets)
samples
=
background_model_samples
samples
.
extend
([
sample
for
r
in
references_samplesets
for
sample
in
r
.
samples
])
samples
.
extend
([
sample
for
p
in
probes_samplesets
for
sample
in
p
.
samples
])
samples
.
extend
([
sample
for
r
in
references_samplesets
for
sample
in
r
.
samples
])
samples
.
extend
([
sample
for
p
in
probes_samplesets
for
sample
in
p
.
samples
])
# Sets the scheduler to local if no dask_client is specified
if
dask_client
is
not
None
:
...
...
@@ -166,7 +167,8 @@ Examples:
$ bob bio annotate-samples -vvv config.py -a <annotator> -o /tmp/annotations
You have to define ``samples`` in a python file (config.py) as in examples.
You have to define ``samples``, ``reader``, and ``make_key`` in python files
(config.py) as in examples.
"""
,
)
@
click
.
option
(
...
...
@@ -174,17 +176,48 @@ You have to define ``samples`` in a python file (config.py) as in examples.
entry_point_group
=
"bob.bio.config"
,
required
=
True
,
cls
=
ResourceOption
,
help
=
"A list of all samples that you want to annotate."
,
help
=
"A list of all samples that you want to annotate. They will be passed "
"as is to the ``reader`` and ``make-key`` functions."
,
)
@
click
.
option
(
"--reader"
,
required
=
True
,
cls
=
ResourceOption
,
help
=
"A function with the signature of ``data = reader(sample)`` which "
"takes a sample and returns the loaded data. The returned data is given to "
"the annotator."
,
)
@
click
.
option
(
"--make-key"
,
required
=
True
,
cls
=
ResourceOption
,
help
=
"A function with the signature of ``key = make_key(sample)`` which "
"takes a sample and returns a unique str identifier for that sample that "
"will be use to save it in output_dir. ``key`` generally is the relative "
"path to a sample's file from the dataset's root directory."
,
)
@
annotate_common_options
@
verbosity_option
(
cls
=
ResourceOption
)
def
annotate_samples
(
samples
,
annotator
,
output_dir
,
force
,
dask_client
,
**
kwargs
samples
,
reader
,
make_key
,
annotator
,
output_dir
,
dask_client
,
**
kwargs
):
"""Annotates a list of samples.
This command is very similar to ``bob bio annotate`` except that it works
without a database interface.
without a database interface. You must provide a list of samples as well as
two functions:
def reader(sample):
# Loads data from a sample.
# for example:
data = bob.io.base.load(sample)
# data will be given to the annotator
return data
def make_key(sample):
# Creates a unique str identifier for this sample.
# for example:
return str(sample)
"""
log_parameters
(
logger
,
ignore
=
(
"samples"
,))
...
...
@@ -206,16 +239,26 @@ def annotate_samples(
annotator
=
wrap
([
"dask"
],
annotator
)
# Transformer that splits the samples into several Dask Bags
to_dask_bags
=
ToDaskBag
()
to_dask_bags
=
ToDaskBag
(
npartitions
=
50
)
if
dask_client
is
not
None
:
scheduler
=
dask_client
else
:
scheduler
=
"single-threaded"
# Converts samples into a list of DelayedSample objects
samples_obj
=
[
DelayedSample
(
load
=
functools
.
partial
(
reader
,
s
),
key
=
make_key
(
s
),
)
for
s
in
samples
]
# Splits the samples list into bags
dask_bags
=
to_dask_bags
.
transform
(
samples_obj
)
logger
.
info
(
f
"Saving annotations in
{
output_dir
}
"
)
logger
.
info
(
f
"Annotating
{
len
(
samples
)
}
samples..."
)
dask_bags
=
to_dask_bags
.
transform
(
samples
)
logger
.
info
(
f
"Annotating
{
len
(
samples_obj
)
}
samples..."
)
annotator
.
transform
(
dask_bags
).
compute
(
scheduler
=
scheduler
)
if
dask_client
is
not
None
:
...
...
bob/bio/base/test/dummy/samples_list.py
View file @
854eb190
import
os
from
bob.bio.base.test.dummy.database
import
database
samples
=
database
.
background_model_samples
()
\ No newline at end of file
import
bob.io.image
# Creates a list of unique str
samples
=
[
s
.
key
for
s
in
database
.
background_model_samples
()]
def
reader
(
sample
):
data
=
bob
.
io
.
image
.
load
(
os
.
path
.
join
(
database
.
database
.
original_directory
,
sample
+
database
.
database
.
original_extension
)
)
return
data
def
make_key
(
sample
):
return
sample
\ No newline at end of file
bob/bio/base/test/test_annotators.py
View file @
854eb190
...
...
@@ -18,7 +18,7 @@ def test_annotate():
'Command exited with this output: `{}
\'
\n
'
'If the output is empty, you can run this script locally to see '
'what is wrong:
\n
'
'bin/bob bio annotate -vvv
--force
-d dummy -g dev -a dummy -o /tmp/temp_annotations'
'bin/bob bio annotate -vvv -d dummy -g dev -a dummy -o /tmp/temp_annotations'
''
.
format
(
result
.
output
))
assert
result
.
exit_code
==
0
,
assertion_error_message
...
...
@@ -39,12 +39,12 @@ def test_annotate_samples():
tmp_dir
=
tempfile
.
mkdtemp
(
prefix
=
"bobtest_"
)
runner
=
CliRunner
()
result
=
runner
.
invoke
(
annotate_samples
,
args
=
(
'--samples'
,
'dummy_samples'
,
'-a'
,
'dummy'
,
'-o'
,
tmp_dir
))
'dummy_samples'
,
'-a'
,
'dummy'
,
'-o'
,
tmp_dir
))
assertion_error_message
=
(
'Command exited with this output: `{}
\'
\n
'
'If the output is empty, you can run this script locally to see '
'what is wrong:
\n
'
'bin/bob bio annotate-samples -vvv
--force --
samples
dummy
-a dummy -o /tmp/temp_annotations'
'bin/bob bio annotate-samples -vvv
dummy_
samples -a dummy -o /tmp/temp_annotations'
''
.
format
(
result
.
output
))
assert
result
.
exit_code
==
0
,
assertion_error_message
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment