From 8c198bc2cbd33c68f1524ba6ac873f5a5c7843cc Mon Sep 17 00:00:00 2001 From: Andre Anjos <andre.dos.anjos@gmail.com> Date: Fri, 10 Apr 2020 09:35:10 +0200 Subject: [PATCH] [dataset] New CLI script to list and check datasets --- bob/ip/binseg/data/__init__.py | 2 +- bob/ip/binseg/data/drive/__init__.py | 33 +++++--- bob/ip/binseg/data/stare/__init__.py | 42 ++++++----- bob/ip/binseg/script/dataset.py | 108 +++++++++++++++++++++++++++ conda/meta.yaml | 5 ++ setup.py | 1 + 6 files changed, 161 insertions(+), 30 deletions(-) create mode 100644 bob/ip/binseg/script/dataset.py diff --git a/bob/ip/binseg/data/__init__.py b/bob/ip/binseg/data/__init__.py index d9854dc8..93e77e17 100644 --- a/bob/ip/binseg/data/__init__.py +++ b/bob/ip/binseg/data/__init__.py @@ -1 +1 @@ -from .binsegdataset import BinSegDataset +"""Data manipulation and raw dataset definitions""" diff --git a/bob/ip/binseg/data/drive/__init__.py b/bob/ip/binseg/data/drive/__init__.py index 2e6e8aef..f5531603 100644 --- a/bob/ip/binseg/data/drive/__init__.py +++ b/bob/ip/binseg/data/drive/__init__.py @@ -1,6 +1,25 @@ #!/usr/bin/env python # coding=utf-8 +"""DRIVE dataset for Vessel Segmentation + +The DRIVE database has been established to enable comparative studies on +segmentation of blood vessels in retinal images. + +* Reference: [DRIVE-2004]_ +* Original resolution (height x width): 584 x 565 +* Split reference: [DRIVE-2004]_ +* Protocol ``default``: + + * Training samples: 20 (including labels and masks) + * Test samples: 20 (including labels from annotator 1 and masks) + +* Protocol ``second-annotation``: + + * Test samples: 20 (including labels from annotator 2 and masks) + +""" + import os import pkg_resources @@ -14,7 +33,7 @@ _protocols = [ pkg_resources.resource_filename(__name__, "second-annotation.json"), ] -_root_path = bob.extension.rc.get('bob.db.drive.datadir', +_root_path = bob.extension.rc.get('bob.ip.binseg.drive.datadir', os.path.realpath(os.curdir)) def _loader(s): @@ -25,14 +44,4 @@ def _loader(s): ) dataset = JSONDataset(protocols=_protocols, root_path=_root_path, loader=_loader) -"""DRIVE dataset for Vessel Segmentation - -The DRIVE database has been established to enable comparative studies on -segmentation of blood vessels in retinal images. - -* Reference: [DRIVE-2004]_ -* Original resolution (height x width): 584 x 565 -* Training samples: 20 (including labels and masks) -* Test samples: 20 (including labels from 2 annotators and masks) -* Split reference: [DRIVE-2004]_ -""" +"""DRIVE dataset object""" diff --git a/bob/ip/binseg/data/stare/__init__.py b/bob/ip/binseg/data/stare/__init__.py index 62354bb9..3aa38840 100644 --- a/bob/ip/binseg/data/stare/__init__.py +++ b/bob/ip/binseg/data/stare/__init__.py @@ -1,6 +1,29 @@ #!/usr/bin/env python # coding=utf-8 +"""STARE dataset for Vessel Segmentation + +A subset of the original STARE dataset contains 20 annotated eye fundus images +with a resolution of 700 x 605 (width x height). Two sets of ground-truth +vessel annotations are available. The first set by Adam Hoover ("ah") is +commonly used for training and testing. The second set by Valentina Kouznetsova +("vk") is typically used as a “human†baseline. + +* Reference: [STARE-2000]_ +* Original resolution (width x height): 700 x 605 +* Split reference: [MANINIS-2016]_ +* Protocol ``default``: + + * Training samples: 10 (including labels from annotator "ah") + * Test samples: 10 (including labels from annotator "ah") + +* Protocol ``second-annotation``: + + * Training samples: 10 (including labels from annotator "vk") + * Test samples: 10 (including labels from annotator "vk") + +""" + import os import pkg_resources @@ -14,7 +37,7 @@ _protocols = [ pkg_resources.resource_filename(__name__, "second-annotation.json"), ] -_root_path = bob.extension.rc.get('bob.db.stare.datadir', +_root_path = bob.extension.rc.get('bob.ip.binseg.stare.datadir', os.path.realpath(os.curdir)) def _loader(s): @@ -24,19 +47,4 @@ def _loader(s): ) dataset = JSONDataset(protocols=_protocols, root_path=_root_path, loader=_loader) -"""STARE (training set) for Vessel Segmentation - -A subset of the original STARE dataset contains 20 annotated eye fundus images -with a resolution of 700 x 605 (width x height). Two sets of ground-truth -vessel annotations are available. The first set by Adam Hoover is commonly used -for training and testing. The second set by Valentina Kouznetsova acts as a -“human†baseline. - -* Reference: [STARE-2000]_ -* Original resolution (width x height): 700 x 605 -* Training samples: 10 -* Test samples: 10 -* Samples include labels from 2 annotators (AH, default and VK, seconda - annotator) -* Split reference: [MANINIS-2016]_ -""" +"""STARE dataset object""" diff --git a/bob/ip/binseg/script/dataset.py b/bob/ip/binseg/script/dataset.py new file mode 100644 index 00000000..ecbdc05f --- /dev/null +++ b/bob/ip/binseg/script/dataset.py @@ -0,0 +1,108 @@ +#!/usr/bin/env python +# coding=utf-8 + +import importlib +import click + +from bob.extension.scripts.click_helper import ( + verbosity_option, + AliasedGroup, +) + + +import logging +logger = logging.getLogger(__name__) + + +def _get_installed_datasets(): + """Returns a list of installed datasets as regular expressions + + * group(0): the name of the key for the dataset directory + * group("name"): the short name for the dataset + + """ + + import re + from bob.extension import rc + dataset_re = re.compile(r'^bob\.ip\.binseg\.(?P<name>[^\.]+)\.datadir$') + return [dataset_re.match(k) for k in rc.keys() if dataset_re.match(k)] + + +@click.group(cls=AliasedGroup) +def dataset(): + """Commands for listing, describing and copying configuration resources""" + pass + + +@dataset.command( + epilog="""Examples: + +\b + 1. To install a dataset, set up its data directory ("datadir"). For + example, to setup access to DRIVE files you downloaded locally at + the directory "/path/to/drive/files", do the following: +\b + $ bob config set "bob.ip.binseg.drive.datadir" "/path/to/drive/files" + + Notice this setting is **NOT** case-insensitive. + + 2. List all raw datasets available (and configured): + + $ bob binseg dataset list -vv + +""", +) +@verbosity_option() +def list(**kwargs): + """Lists all installed datasets""" + + installed = _get_installed_datasets() + if installed: + click.echo("Configured datasets:") + for k in installed: + value = bob.extension.rc.get(k.group(0)) + click.echo(f"- {k.group('name')}: {k.group(0)} = \"{value}\"") + else: + click.echo("No configured datasets") + click.echo("Try --help to get help in configuring a dataset") + + +@dataset.command( + epilog="""Examples: + + 1. Check if all files of the DRIVE dataset can be loaded: + + $ bob binseg dataset check -vv drive + + 2. Check if all files of multiple installed datasets can be loaded: + + $ bob binseg dataset check -vv drive stare + + 3. Check if all files of all installed datasets can be loaded: + + $ bob binseg dataset check +""", +) +@click.argument( + 'dataset', + nargs=-1, + ) +@verbosity_option() +def check(dataset, **kwargs): + """Checks file access on one or more datasets""" + + to_check = _get_installed_datasets() + + if dataset: #check only some + to_check = [k for k in to_check if k.group("name") in dataset] + + if not dataset: + click.echo("No configured datasets matching specifications") + click.echo("Try bob binseg dataset list --help to get help in " + "configuring a dataset") + else: + for k in to_check: + click.echo(f"Checking \"{k.group('name')}\" dataset...") + module = importlib.import_module(f"...data.{k.group('name')}", + __name__) + module.dataset.check() diff --git a/conda/meta.yaml b/conda/meta.yaml index 7b69b581..133f7c29 100644 --- a/conda/meta.yaml +++ b/conda/meta.yaml @@ -58,6 +58,11 @@ test: - bob binseg config describe drive -v - bob binseg config copy --help - bob binseg config copy drive /tmp/test.py + - bob binseg dataset --help + - bob binseg dataset list --help + - bob binseg dataset list + - bob binseg dataset check --help + - bob binseg dataset check - bob binseg train --help - bob binseg predict --help - bob binseg evaluate --help diff --git a/setup.py b/setup.py index 5a234f7c..8eb42600 100644 --- a/setup.py +++ b/setup.py @@ -32,6 +32,7 @@ setup( # bob binseg sub-commands "bob.ip.binseg.cli": [ "config = bob.ip.binseg.script.config:config", + "dataset = bob.ip.binseg.script.dataset:dataset", "train = bob.ip.binseg.script.train:train", "predict = bob.ip.binseg.script.predict:predict", "evaluate = bob.ip.binseg.script.evaluate:evaluate", -- GitLab