Skip to content
Snippets Groups Projects
Commit 8c198bc2 authored by André Anjos's avatar André Anjos :speech_balloon:
Browse files

[dataset] New CLI script to list and check datasets

parent 7d1ab909
No related branches found
No related tags found
1 merge request!12Streamlining
from .binsegdataset import BinSegDataset """Data manipulation and raw dataset definitions"""
#!/usr/bin/env python #!/usr/bin/env python
# coding=utf-8 # coding=utf-8
"""DRIVE dataset for Vessel Segmentation
The DRIVE database has been established to enable comparative studies on
segmentation of blood vessels in retinal images.
* Reference: [DRIVE-2004]_
* Original resolution (height x width): 584 x 565
* Split reference: [DRIVE-2004]_
* Protocol ``default``:
* Training samples: 20 (including labels and masks)
* Test samples: 20 (including labels from annotator 1 and masks)
* Protocol ``second-annotation``:
* Test samples: 20 (including labels from annotator 2 and masks)
"""
import os import os
import pkg_resources import pkg_resources
...@@ -14,7 +33,7 @@ _protocols = [ ...@@ -14,7 +33,7 @@ _protocols = [
pkg_resources.resource_filename(__name__, "second-annotation.json"), pkg_resources.resource_filename(__name__, "second-annotation.json"),
] ]
_root_path = bob.extension.rc.get('bob.db.drive.datadir', _root_path = bob.extension.rc.get('bob.ip.binseg.drive.datadir',
os.path.realpath(os.curdir)) os.path.realpath(os.curdir))
def _loader(s): def _loader(s):
...@@ -25,14 +44,4 @@ def _loader(s): ...@@ -25,14 +44,4 @@ def _loader(s):
) )
dataset = JSONDataset(protocols=_protocols, root_path=_root_path, loader=_loader) dataset = JSONDataset(protocols=_protocols, root_path=_root_path, loader=_loader)
"""DRIVE dataset for Vessel Segmentation """DRIVE dataset object"""
The DRIVE database has been established to enable comparative studies on
segmentation of blood vessels in retinal images.
* Reference: [DRIVE-2004]_
* Original resolution (height x width): 584 x 565
* Training samples: 20 (including labels and masks)
* Test samples: 20 (including labels from 2 annotators and masks)
* Split reference: [DRIVE-2004]_
"""
#!/usr/bin/env python #!/usr/bin/env python
# coding=utf-8 # coding=utf-8
"""STARE dataset for Vessel Segmentation
A subset of the original STARE dataset contains 20 annotated eye fundus images
with a resolution of 700 x 605 (width x height). Two sets of ground-truth
vessel annotations are available. The first set by Adam Hoover ("ah") is
commonly used for training and testing. The second set by Valentina Kouznetsova
("vk") is typically used as a “human” baseline.
* Reference: [STARE-2000]_
* Original resolution (width x height): 700 x 605
* Split reference: [MANINIS-2016]_
* Protocol ``default``:
* Training samples: 10 (including labels from annotator "ah")
* Test samples: 10 (including labels from annotator "ah")
* Protocol ``second-annotation``:
* Training samples: 10 (including labels from annotator "vk")
* Test samples: 10 (including labels from annotator "vk")
"""
import os import os
import pkg_resources import pkg_resources
...@@ -14,7 +37,7 @@ _protocols = [ ...@@ -14,7 +37,7 @@ _protocols = [
pkg_resources.resource_filename(__name__, "second-annotation.json"), pkg_resources.resource_filename(__name__, "second-annotation.json"),
] ]
_root_path = bob.extension.rc.get('bob.db.stare.datadir', _root_path = bob.extension.rc.get('bob.ip.binseg.stare.datadir',
os.path.realpath(os.curdir)) os.path.realpath(os.curdir))
def _loader(s): def _loader(s):
...@@ -24,19 +47,4 @@ def _loader(s): ...@@ -24,19 +47,4 @@ def _loader(s):
) )
dataset = JSONDataset(protocols=_protocols, root_path=_root_path, loader=_loader) dataset = JSONDataset(protocols=_protocols, root_path=_root_path, loader=_loader)
"""STARE (training set) for Vessel Segmentation """STARE dataset object"""
A subset of the original STARE dataset contains 20 annotated eye fundus images
with a resolution of 700 x 605 (width x height). Two sets of ground-truth
vessel annotations are available. The first set by Adam Hoover is commonly used
for training and testing. The second set by Valentina Kouznetsova acts as a
“human” baseline.
* Reference: [STARE-2000]_
* Original resolution (width x height): 700 x 605
* Training samples: 10
* Test samples: 10
* Samples include labels from 2 annotators (AH, default and VK, seconda
annotator)
* Split reference: [MANINIS-2016]_
"""
#!/usr/bin/env python
# coding=utf-8
import importlib
import click
from bob.extension.scripts.click_helper import (
verbosity_option,
AliasedGroup,
)
import logging
logger = logging.getLogger(__name__)
def _get_installed_datasets():
"""Returns a list of installed datasets as regular expressions
* group(0): the name of the key for the dataset directory
* group("name"): the short name for the dataset
"""
import re
from bob.extension import rc
dataset_re = re.compile(r'^bob\.ip\.binseg\.(?P<name>[^\.]+)\.datadir$')
return [dataset_re.match(k) for k in rc.keys() if dataset_re.match(k)]
@click.group(cls=AliasedGroup)
def dataset():
"""Commands for listing, describing and copying configuration resources"""
pass
@dataset.command(
epilog="""Examples:
\b
1. To install a dataset, set up its data directory ("datadir"). For
example, to setup access to DRIVE files you downloaded locally at
the directory "/path/to/drive/files", do the following:
\b
$ bob config set "bob.ip.binseg.drive.datadir" "/path/to/drive/files"
Notice this setting is **NOT** case-insensitive.
2. List all raw datasets available (and configured):
$ bob binseg dataset list -vv
""",
)
@verbosity_option()
def list(**kwargs):
"""Lists all installed datasets"""
installed = _get_installed_datasets()
if installed:
click.echo("Configured datasets:")
for k in installed:
value = bob.extension.rc.get(k.group(0))
click.echo(f"- {k.group('name')}: {k.group(0)} = \"{value}\"")
else:
click.echo("No configured datasets")
click.echo("Try --help to get help in configuring a dataset")
@dataset.command(
epilog="""Examples:
1. Check if all files of the DRIVE dataset can be loaded:
$ bob binseg dataset check -vv drive
2. Check if all files of multiple installed datasets can be loaded:
$ bob binseg dataset check -vv drive stare
3. Check if all files of all installed datasets can be loaded:
$ bob binseg dataset check
""",
)
@click.argument(
'dataset',
nargs=-1,
)
@verbosity_option()
def check(dataset, **kwargs):
"""Checks file access on one or more datasets"""
to_check = _get_installed_datasets()
if dataset: #check only some
to_check = [k for k in to_check if k.group("name") in dataset]
if not dataset:
click.echo("No configured datasets matching specifications")
click.echo("Try bob binseg dataset list --help to get help in "
"configuring a dataset")
else:
for k in to_check:
click.echo(f"Checking \"{k.group('name')}\" dataset...")
module = importlib.import_module(f"...data.{k.group('name')}",
__name__)
module.dataset.check()
...@@ -58,6 +58,11 @@ test: ...@@ -58,6 +58,11 @@ test:
- bob binseg config describe drive -v - bob binseg config describe drive -v
- bob binseg config copy --help - bob binseg config copy --help
- bob binseg config copy drive /tmp/test.py - bob binseg config copy drive /tmp/test.py
- bob binseg dataset --help
- bob binseg dataset list --help
- bob binseg dataset list
- bob binseg dataset check --help
- bob binseg dataset check
- bob binseg train --help - bob binseg train --help
- bob binseg predict --help - bob binseg predict --help
- bob binseg evaluate --help - bob binseg evaluate --help
......
...@@ -32,6 +32,7 @@ setup( ...@@ -32,6 +32,7 @@ setup(
# bob binseg sub-commands # bob binseg sub-commands
"bob.ip.binseg.cli": [ "bob.ip.binseg.cli": [
"config = bob.ip.binseg.script.config:config", "config = bob.ip.binseg.script.config:config",
"dataset = bob.ip.binseg.script.dataset:dataset",
"train = bob.ip.binseg.script.train:train", "train = bob.ip.binseg.script.train:train",
"predict = bob.ip.binseg.script.predict:predict", "predict = bob.ip.binseg.script.predict:predict",
"evaluate = bob.ip.binseg.script.evaluate:evaluate", "evaluate = bob.ip.binseg.script.evaluate:evaluate",
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment