diff --git a/bob/ip/binseg/script/dataset.py b/bob/ip/binseg/script/dataset.py index ecbdc05f27d7d9b451e261f715d4ef458e280dff..24d9df702bb3abae6a2929b467e70026278ecb19 100644 --- a/bob/ip/binseg/script/dataset.py +++ b/bob/ip/binseg/script/dataset.py @@ -1,9 +1,12 @@ #!/usr/bin/env python # coding=utf-8 +import os +import pkg_resources import importlib import click +from bob.extension import rc from bob.extension.scripts.click_helper import ( verbosity_option, AliasedGroup, @@ -14,6 +17,20 @@ import logging logger = logging.getLogger(__name__) +def _get_supported_datasets(): + """Returns a list of supported dataset names + """ + + basedir = pkg_resources.resource_filename(__name__, '') + basedir = os.path.join(os.path.dirname(basedir), 'data') + + retval = [] + for k in os.listdir(basedir): + candidate = os.path.join(basedir, k) + if os.path.isdir(candidate) and 'test.py' in os.listdir(candidate): + retval.append(k) + return retval + def _get_installed_datasets(): """Returns a list of installed datasets as regular expressions @@ -23,7 +40,6 @@ def _get_installed_datasets(): """ import re - from bob.extension import rc dataset_re = re.compile(r'^bob\.ip\.binseg\.(?P<name>[^\.]+)\.datadir$') return [dataset_re.match(k) for k in rc.keys() if dataset_re.match(k)] @@ -44,27 +60,28 @@ def dataset(): \b $ bob config set "bob.ip.binseg.drive.datadir" "/path/to/drive/files" - Notice this setting is **NOT** case-insensitive. + Notice this setting **is** case-insensitive. - 2. List all raw datasets available (and configured): + 2. List all raw datasets supported (and configured): - $ bob binseg dataset list -vv + $ bob binseg dataset list """, ) @verbosity_option() def list(**kwargs): - """Lists all installed datasets""" + """Lists all supported and configured datasets""" + supported = _get_supported_datasets() installed = _get_installed_datasets() - if installed: - click.echo("Configured datasets:") - for k in installed: - value = bob.extension.rc.get(k.group(0)) - click.echo(f"- {k.group('name')}: {k.group(0)} = \"{value}\"") - else: - click.echo("No configured datasets") - click.echo("Try --help to get help in configuring a dataset") + installed = dict((k.group("name"), k.group(0)) for k in installed) + + click.echo("Supported datasets:") + for k in supported: + if k in installed: + click.echo(f"- {k}: {installed[k]} = \"{rc.get(installed[k])}\"") + else: + click.echo(f"* {k}: bob.ip.binseg.{k}.datadir (not set)") @dataset.command( diff --git a/doc/cli.rst b/doc/cli.rst index 03f7c2dc7c692c1aba580d3f05393396f585f4e1..9315447d778ef1917a1f9404d677cd8cbb2e70d4 100644 --- a/doc/cli.rst +++ b/doc/cli.rst @@ -13,6 +13,37 @@ applications can be retrieved using: .. command-output:: bob binseg --help +Setup +----- + +A CLI application to list and check installed (raw) datasets. + +.. _bob.ip.binseg.cli.dataset: + +.. command-output:: bob binseg dataset --help + + +List available datasets +======================= + +Lists supported and configured raw datasets. + +.. _bob.ip.binseg.cli.dataset.list: + +.. command-output:: bob binseg dataset list --help + + +Check available datasets +======================== + +Checks if we can load all files listed for a given dataset (all subsets in all +protocols). + +.. _bob.ip.binseg.cli.dataset.check: + +.. command-output:: bob binseg dataset check --help + + Preset Configuration Resources ------------------------------ diff --git a/doc/datasets.rst b/doc/datasets.rst index 92f74bf99bef44db877146b3e7328e28c4e1ea83..c8931e3a83f071fa6a7d34b6619bce5bc41b21ba 100644 --- a/doc/datasets.rst +++ b/doc/datasets.rst @@ -16,7 +16,6 @@ to generate iterators for training and testing. * - Dataset - Reference - - ``bob.db`` package - H x W - Samples - Mask @@ -28,7 +27,6 @@ to generate iterators for training and testing. - Test * - DRIVE_ - [DRIVE-2004]_ - - ``bob.db.drive`` - 584 x 565 - 40 - x @@ -40,7 +38,6 @@ to generate iterators for training and testing. - 20 * - STARE_ - [STARE-2000]_ - - ``bob.db.stare`` - 605 x 700 - 20 - @@ -64,7 +61,6 @@ to generate iterators for training and testing. - 20 * - HRF_ - [HRF-2013]_ - - ``bob.db.hrf`` - 2336 x 3504 - 45 - x @@ -76,7 +72,6 @@ to generate iterators for training and testing. - 30 * - IOSTAR_ - [IOSTAR-2016]_ - - ``bob.db.iostar`` - 1024 x 1024 - 30 - x @@ -88,7 +83,6 @@ to generate iterators for training and testing. - 10 * - DRIONS-DB_ - [DRIONSDB-2008]_ - - ``bob.db.drionsdb`` - 400 x 600 - 110 - @@ -100,7 +94,6 @@ to generate iterators for training and testing. - 50 * - `RIM-ONE r3`_ - [RIMONER3-2015]_ - - ``bob.db.rimoner3`` - 1424 x 1072 - 159 - @@ -112,7 +105,6 @@ to generate iterators for training and testing. - 60 * - Drishti-GS1_ - [DRISHTIGS1-2014]_ - - ``bob.db.drishtigs1`` - varying - 101 - @@ -124,7 +116,6 @@ to generate iterators for training and testing. - 51 * - REFUGE_ (train) - [REFUGE-2018]_ - - ``bob.db.refuge`` - 2056 x 2124 - 400 - @@ -136,7 +127,6 @@ to generate iterators for training and testing. - * - REFUGE_ (val) - [REFUGE-2018]_ - - ``bob.db.refuge`` - 1634 x 1634 - 400 - diff --git a/doc/setup.rst b/doc/setup.rst index 5eaa6653d555d4fe36bcbba5ac92cb7a77e94635..0955a905bc559e98a74106f1e6415425ebf7cc74 100644 --- a/doc/setup.rst +++ b/doc/setup.rst @@ -37,48 +37,36 @@ To setup a dataset, do the following: you unpack them in their **pristine** state. Changing the location of files within a dataset distribution will likely cause execution errors. -3. For each dataset that you are planning to use, set the ``datadir`` to the +2. For each dataset that you are planning to use, set the ``datadir`` to the root path where it is stored. E.g.: .. code-block:: sh - (<myenv>) $ bob config set bob.db.drive.datadir "/path/to/drivedataset/" + (<myenv>) $ bob config set bob.ip.binseg.drive.datadir "/path/to/drive" - To check your current setup, do the following: + To check supported raw datasets and your current setup, do the following: .. code-block:: sh - (<myenv>) $ bob config show - { - "bob.db.chasedb1.datadir": "/path/to/chasedb1/", - "bob.db.drionsdb.datadir": "/path/to/drionsdb", - "bob.db.drive.datadir": "/path/to/drive", - "bob.db.hrf.datadir": "/path/to/hrf", - } - - This command will show the set location for each configured dataset. These - paths are automatically used by the dataset iterators provided by the - ``bob.db`` packages to find the raw datafiles. - -4. To check whether the downloaded version is consistent with the structure - that is expected by our ``bob.db`` packages, run ``bob_dbmanage.py - <dataset> checkfiles``, where ``<dataset>`` should be replaced by the + (<myenv>) $ bob binseg dataset list + Supported datasets: + - drive: bob.ip.binseg.drive.datadir = "/Users/andre/work/bob/dbs/drive" + * stare: bob.ip.binseg.stare.datadir (not set) + + This command will show the set location for each configured dataset, and + the variable names for each supported dataset which has not yet been setup. + +3. To check whether the downloaded version is consistent with the structure + that is expected by this package, run ``bob binseg dataset check + <dataset>``, where ``<dataset>`` should be replaced by the dataset programmatic name. E.g., to check DRIVE files, use: .. code-block:: sh - (<myenv>) $ bob_dbmanage.py drive checkfiles - > checkfiles completed sucessfully + (<myenv>) $ bob binseg dataset check drive + ... If there are problems on the current file organisation, this procedure - should detect and highlight which files are missing. - - .. tip:: - - The programmatic name of datasets follow the ``bob.db.<dataset>`` - nomenclature. For example, the programmatic name of CHASE-DB1 is - ``chasedb1``, because the package name implementing iterators to its - files is ``bob.db.chasedb1``. - + should detect and highlight which files are missing (cannot be loaded). .. include:: links.rst