[doc] Revamp setup guide to use new strategy

eb193423 · André Anjos · 8c198bc2 · eb193423 · eb193423 · eb193423
Commit eb193423 authored 5 years ago by André Anjos
--- a/bob/ip/binseg/script/dataset.py
+++ b/bob/ip/binseg/script/dataset.py
 #!/usr/bin/env python
 # coding=utf-8
+import os
+import pkg_resources
 import importlib
 import click
+from bob.extension import rc
 from bob.extension.scripts.click_helper import (
    verbosity_option,
    AliasedGroup,
@@ -14,6 +17,20 @@ import logging
 logger = logging.getLogger(__name__)
+def _get_supported_datasets():
+    """Returns a list of supported dataset names
+    """
+    basedir = pkg_resources.resource_filename(__name__, '')
+    basedir = os.path.join(os.path.dirname(basedir), 'data')
+    retval = []
+    for k in os.listdir(basedir):
+        candidate = os.path.join(basedir, k)
+        if os.path.isdir(candidate) and 'test.py' in os.listdir(candidate):
+            retval.append(k)
+    return retval
 def _get_installed_datasets():
    """Returns a list of installed datasets as regular expressions
@@ -23,7 +40,6 @@ def _get_installed_datasets():
    """
    import re
-    from bob.extension import rc
    dataset_re = re.compile(r'^bob\.ip\.binseg\.(?P<name>[^\.]+)\.datadir$')
    return [dataset_re.match(k) for k in rc.keys() if dataset_re.match(k)]
@@ -44,27 +60,28 @@ def dataset():
 \b
       $ bob config set "bob.ip.binseg.drive.datadir" "/path/to/drive/files"
-       Notice this setting is **NOT** case-insensitive.
+       Notice this setting **is** case-insensitive.
-    2. List all raw datasets available (and configured):
+    2. List all raw datasets supported (and configured):
-       $ bob binseg dataset list -vv
+       $ bob binseg dataset list
 """,
 )
 @verbosity_option()
 def list(**kwargs):
-    """Lists all installed datasets"""
+    """Lists all supported and configured datasets"""
+    supported = _get_supported_datasets()
    installed = _get_installed_datasets()
-    if installed:
+    installed = dict((k.group("name"), k.group(0)) for k in installed)
-        click.echo("Configured datasets:")
-        for k in installed:
+    click.echo("Supported datasets:")
-            value = bob.extension.rc.get(k.group(0))
+    for k in supported:
-            click.echo(f"- {k.group('name')}: {k.group(0)} = \"{value}\"")
+        if k in installed:
-    else:
+            click.echo(f"- {k}: {installed[k]} = \"{rc.get(installed[k])}\"")
-        click.echo("No configured datasets")
+        else:
-        click.echo("Try --help to get help in configuring a dataset")
+            click.echo(f"* {k}: bob.ip.binseg.{k}.datadir (not set)")
 @dataset.command(

--- a/doc/cli.rst
+++ b/doc/cli.rst
@@ -13,6 +13,37 @@ applications can be retrieved using:
 .. command-output:: bob binseg --help
+Setup
+-----
+A CLI application to list and check installed (raw) datasets.
+.. _bob.ip.binseg.cli.dataset:
+.. command-output:: bob binseg dataset --help
+List available datasets
+=======================
+Lists supported and configured raw datasets.
+.. _bob.ip.binseg.cli.dataset.list:
+.. command-output:: bob binseg dataset list --help
+Check available datasets
+========================
+Checks if we can load all files listed for a given dataset (all subsets in all
+protocols).
+.. _bob.ip.binseg.cli.dataset.check:
+.. command-output:: bob binseg dataset check --help
 Preset Configuration Resources
 ------------------------------

--- a/doc/datasets.rst
+++ b/doc/datasets.rst
@@ -16,7 +16,6 @@ to generate iterators for training and testing.
   * - Dataset
     - Reference
-     - ``bob.db`` package
     - H x W
     - Samples
     - Mask
@@ -28,7 +27,6 @@ to generate iterators for training and testing.
     - Test
   * - DRIVE_
     - [DRIVE-2004]_
-     - ``bob.db.drive``
     - 584 x 565
     - 40
     - x
@@ -40,7 +38,6 @@ to generate iterators for training and testing.
     - 20
   * - STARE_
     - [STARE-2000]_
-     - ``bob.db.stare``
     - 605 x 700
     - 20
     -
@@ -64,7 +61,6 @@ to generate iterators for training and testing.
     - 20
   * - HRF_
     - [HRF-2013]_
-     - ``bob.db.hrf``
     - 2336 x 3504
     - 45
     - x
@@ -76,7 +72,6 @@ to generate iterators for training and testing.
     - 30
   * - IOSTAR_
     - [IOSTAR-2016]_
-     - ``bob.db.iostar``
     - 1024 x 1024
     - 30
     - x
@@ -88,7 +83,6 @@ to generate iterators for training and testing.
     - 10
   * - DRIONS-DB_
     - [DRIONSDB-2008]_
-     - ``bob.db.drionsdb``
     - 400 x 600
     - 110
     -
@@ -100,7 +94,6 @@ to generate iterators for training and testing.
     - 50
   * - `RIM-ONE r3`_
     - [RIMONER3-2015]_
-     - ``bob.db.rimoner3``
     - 1424 x 1072
     - 159
     -
@@ -112,7 +105,6 @@ to generate iterators for training and testing.
     - 60
   * - Drishti-GS1_
     - [DRISHTIGS1-2014]_
-     - ``bob.db.drishtigs1``
     - varying
     - 101
     -
@@ -124,7 +116,6 @@ to generate iterators for training and testing.
     - 51
   * - REFUGE_ (train)
     - [REFUGE-2018]_
-     - ``bob.db.refuge``
     - 2056 x 2124
     - 400
     -
@@ -136,7 +127,6 @@ to generate iterators for training and testing.
     -
   * - REFUGE_ (val)
     - [REFUGE-2018]_
-     - ``bob.db.refuge``
     - 1634 x 1634
     - 400
     -

--- a/doc/setup.rst
+++ b/doc/setup.rst
@@ -37,48 +37,36 @@ To setup a dataset, do the following:
      you unpack them in their **pristine** state.  Changing the location of
      files within a dataset distribution will likely cause execution errors.
-3.  For each dataset that you are planning to use, set the ``datadir`` to the
+2.  For each dataset that you are planning to use, set the ``datadir`` to the
    root path where it is stored.  E.g.:
    .. code-block:: sh
-       (<myenv>) $ bob config set bob.db.drive.datadir "/path/to/drivedataset/"
+       (<myenv>) $ bob config set bob.ip.binseg.drive.datadir "/path/to/drive"
-    To check your current setup, do the following:
+    To check supported raw datasets and your current setup, do the following:
    .. code-block:: sh
-       (<myenv>) $ bob config show
+       (<myenv>) $ bob binseg dataset list
-       {
+       Supported datasets:
-           "bob.db.chasedb1.datadir": "/path/to/chasedb1/",
+       - drive: bob.ip.binseg.drive.datadir = "/Users/andre/work/bob/dbs/drive"
-           "bob.db.drionsdb.datadir": "/path/to/drionsdb",
+       * stare: bob.ip.binseg.stare.datadir (not set)
-           "bob.db.drive.datadir": "/path/to/drive",
-           "bob.db.hrf.datadir": "/path/to/hrf",
+    This command will show the set location for each configured dataset, and
-       }
+    the variable names for each supported dataset which has not yet been setup.
-    This command will show the set location for each configured dataset.  These
+3. To check whether the downloaded version is consistent with the structure
-    paths are automatically used by the dataset iterators provided by the
+   that is expected by this package, run ``bob binseg dataset check
-    ``bob.db`` packages to find the raw datafiles.
+   <dataset>``, where ``<dataset>`` should be replaced by the
-4. To check whether the downloaded version is consistent with the structure
-   that is expected by our ``bob.db`` packages, run ``bob_dbmanage.py
-   <dataset> checkfiles``, where ``<dataset>`` should be replaced by the
   dataset programmatic name. E.g., to check DRIVE files, use:
   .. code-block:: sh
-      (<myenv>) $ bob_dbmanage.py drive checkfiles
+      (<myenv>) $ bob binseg dataset check drive
-      > checkfiles completed sucessfully
+      ...
   If there are problems on the current file organisation, this procedure
-   should detect and highlight which files are missing.
+   should detect and highlight which files are missing (cannot be loaded).
-   .. tip::
-      The programmatic name of datasets follow the ``bob.db.<dataset>``
-      nomenclature.  For example, the programmatic name of CHASE-DB1 is
-      ``chasedb1``, because the package name implementing iterators to its
-      files is ``bob.db.chasedb1``.
 .. include:: links.rst