diff --git a/doc/index.rst b/doc/index.rst
index bfaedab50625befd386de5e030f56d25ab6a73b7..aff3c441cc86760317c136da84ab6ab5a22df7f5 100644
--- a/doc/index.rst
+++ b/doc/index.rst
@@ -74,7 +74,7 @@ User Guide
    baselines
    data-model
    databases/index
-   models/index
+   models
    references
    cli
    api
diff --git a/doc/install.rst b/doc/install.rst
index ff05a1fce0362b3578bfb48836072811a1ed03aa..c2478b781b025130d3818bd1148c9fc626a2cbba 100644
--- a/doc/install.rst
+++ b/doc/install.rst
@@ -194,375 +194,4 @@ A list of out-of-the-box supported data modules for :ref:`classification
 is available in this guide.
 
 
-The following databases contain only the tuberculosis final diagnosis (0 or 1).
-In addition to the splits presented in the following table, 10 folds
-(for cross-validation) randomly generated are available for these databases.
-
-.. list-table::
-
-   * - Database
-     - Reference
-     - H x W
-     - Samples
-     - Training
-     - Validation
-     - Test
-   * - Montgomery_
-     - [MONTGOMERY-SHENZHEN-2014]_
-     - 4020 x 4892
-     - 138
-     - 88
-     - 22
-     - 28
-   * - Shenzhen_
-     - [MONTGOMERY-SHENZHEN-2014]_
-     - Varying
-     - 662
-     - 422
-     - 107
-     - 133
-   * - Indian_
-     - [INDIAN-2013]_
-     - Varying
-     - 155
-     - 83
-     - 20
-     - 52
-
-
-.. _mednet.setup.databases.tb+signs:
-
-Tuberculosis multilabel databases
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-The following databases contain the labels healthy, sick & non-TB, active TB,
-and latent TB. The implemented tbx11k database in this package is based on
-the simplified version, which is just a more compact version of the original.
-In addition to the splits presented in the following table, 10 folds
-(for cross-validation) randomly generated are available for these databases.
-
-.. list-table::
-
-   * - Database
-     - Reference
-     - H x W
-     - Samples
-     - Training
-     - Validation
-     - Test
-   * - TBX11K_
-     - [TBX11K-2020]_
-     - 512 x 512
-     - 11'200
-     - 6600
-     - 1800
-     - 2800
-   * - TBX11K_SIMPLIFIED_
-     - [TBX11K-SIMPLIFIED-2020]_
-     - 512 x 512
-     - 11'200
-     - 6600
-     - 1800
-     - 2800
-
-
-.. _mednet.setup.databases.tbmultilabel+signs:
-
-Tuberculosis + radiological findings databases
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-The following databases contain both the tuberculosis final diagnosis (0 or 1)
-and radiological findings.
-
-.. list-table::
-
-   * - Database
-     - Reference
-     - H x W
-     - Samples
-     - Train
-     - Test
-   * - PadChest_
-     - [PADCHEST-2019]_
-     - Varying
-     - 160'861
-     - 160'861
-     - 0
-
-
-.. _mednet.setup.databases.signs:
-
-Radiological findings databases
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-The following database contains only the radiological findings without any
-information about tuberculosis.
-
-.. note::
-
-   NIH CXR14 labels for training and validation sets are the relabeled
-   versions done by the author of the CheXNeXt study [CHEXNEXT-2018]_.
-
-.. list-table::
-
-   * - Database
-     - Reference
-     - H x W
-     - Samples
-     - Training
-     - Validation
-     - Test
-   * - NIH_CXR14_re_
-     - [NIH-CXR14-2017]_
-     - 1024 x 1024
-     - 109'041
-     - 98'637
-     - 6'350
-     - 4'054
-
-
-.. _mednet.setup.databases.hiv-tb:
-
-HIV-Tuberculosis databases
-~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-The following databases contain only the tuberculosis final diagnosis (0 or 1)
-and come from HIV infected patients. 10 folds (for cross-validation) randomly
-generated are available for these databases.
-
-Please contact the authors of these databases to have access to the data.
-
-.. list-table::
-
-   * - Database
-     - Reference
-     - H x W
-     - Samples
-   * - TB POC
-     - [TB-POC-2018]_
-     - 2048 x 2500
-     - 407
-   * - HIV TB
-     - [HIV-TB-2019]_
-     - 2048 x 2500
-     - 243
-
-
-.. _mednet.setup.databases.retinography:
-
-Retinography
-------------
-
-
-.. list-table:: Supported Retinography Datasets (``*``: provided within this package)
-
-   * - Dataset
-     - Reference
-     - H x W
-     - Samples
-     - Mask
-     - Vessel
-     - OD
-     - Cup
-     - Split Reference
-     - Train
-     - Test
-   * - DRIVE_
-     - [DRIVE-2004]_
-     - 584 x 565
-     - 40
-     - ``x``
-     - ``x``
-     -
-     -
-     - [DRIVE-2004]_
-     - 20
-     - 20
-   * - STARE_
-     - [STARE-2000]_
-     - 605 x 700
-     - 20
-     - ``*``
-     - ``x``
-     -
-     -
-     - [MANINIS-2016]_
-     - 10
-     - 10
-   * - CHASE-DB1_
-     - [CHASEDB1-2012]_
-     - 960 x 999
-     - 28
-     - ``*``
-     - ``x``
-     -
-     -
-     - [CHASEDB1-2012]_
-     - 8
-     - 20
-   * - HRF_
-     - [HRF-2013]_
-     - 2336 x 3504
-     - 45
-     - ``x``
-     - ``x``
-     -
-     -
-     - [ORLANDO-2017]_
-     - 15
-     - 30
-   * - IOSTAR_
-     - [IOSTAR-2016]_
-     - 1024 x 1024
-     - 30
-     - ``x``
-     - ``x``
-     - ``x``
-     -
-     - [MEYER-2017]_
-     - 20
-     - 10
-   * - DRIONS-DB_
-     - [DRIONSDB-2008]_
-     - 400 x 600
-     - 110
-     -
-     -
-     - ``x``
-     -
-     - [MANINIS-2016]_
-     - 60
-     - 50
-   * - `RIM-ONE r3`_
-     - [RIMONER3-2015]_
-     - 1424 x 1072
-     - 159
-     -
-     -
-     - ``x``
-     - ``x``
-     - [MANINIS-2016]_
-     - 99
-     - 60
-   * - Drishti-GS1_
-     - [DRISHTIGS1-2014]_
-     - varying
-     - 101
-     -
-     -
-     - ``x``
-     - ``x``
-     - [DRISHTIGS1-2014]_
-     - 50
-     - 51
-   * - REFUGE_
-     - [REFUGE-2018]_
-     - 2056 x 2124 (1634 x 1634)
-     - 1200
-     -
-     -
-     - ``x``
-     - ``x``
-     - [REFUGE-2018]_
-     - 400 (+400)
-     - 400
-   * - DRHAGIS_
-     - [DRHAGIS-2017]_
-     - Varying
-     - 39
-     - ``x``
-     - ``x``
-     -
-     -
-     - [DRHAGIS-2017]_
-     - 19
-     - 20
-
-.. warning:: **REFUGE Dataset Support**
-
-  The original directory ``Training400/AMD`` in REFUGE is considered to be
-  replaced by an updated version provided by the `AMD Grand-Challenge`_ (with
-  matching names).
-
-  The changes concerns images ``A0012.jpg``, which was corrupted in REFUGE, and
-  ``A0013.jpg``, which only exists in the AMD Grand-Challenge version.
-
-
-.. _mednet.setup.databases.xray:
-
-X-Ray
------
-
-.. list-table:: Supported X-Ray Datasets
-
-   * - Dataset
-     - Reference
-     - H x W
-     - Radiography Type
-     - Samples
-     - Mask
-     - Split Reference
-     - Train
-     - Test
-   * - `Montgomery County`_
-     - [MC-2014]_
-     - 4020 x 4892, or 4892 x 4020
-     - Digital Radiography (DR)
-     - 138
-     - ``*``
-     - [GAAL-2020]_
-     - 96 (+14)
-     - 28
-   * - JSRT_
-     - [JSRT-2000]_
-     - 2048 x 2048
-     - Digitized Radiography (laser digitizer)
-     - 247
-     - ``*``
-     - [GAAL-2020]_
-     - 172 (+25)
-     - 50
-   * - Shenzhen_
-     - [SHENZHEN-2014]_
-     - Varying
-     - Computed Radiography (CR)
-     - 662
-     - ``*``
-     - [GAAL-2020]_
-     - 396 (+56)
-     - 114
-   * - CXR8_
-     - [CXR8-2017]_
-     - 1024 x 1024
-     - Digital Radiography
-     - 112120
-     - ``x``
-     - [GAAL-2020]_
-     - 78484 (+11212)
-     - 22424
-
-.. warning:: **SHENZHEN/JSRT/CXR8 Dataset Support**
-
-  For some datasets (in which the annotations/masks are downloaded separately
-  from the dataset with the original images), both the original images and
-  annotations must be downloaded and placed inside the same directory, to match
-  the dataset reference dictionary's path.
-
-  * The Shenzhen_ root directory should then contain at least these two
-    subdirectories:
-
-    - ``CXR_png/`` (directory containing the CXR images)
-    - ``mask/`` (contains masks downloaded from `Shenzhen Annotations`_)
-
-  * The CXR8_ root directory:
-
-    - ``images/`` (directory containing the CXR images)
-    - ``segmentations/`` (contains masks downloaded from `CXR8 Annotations`_)
-
-  * The JSRT_ root directory:
-
-    - ``All247images/`` (directory containing the CXR images, in raw format)
-    - ``scratch/`` (contains masks downloaded from `JSRT Annotations`_)
-
-
 .. include:: links.rst
diff --git a/doc/links.rst b/doc/links.rst
index 4e4585991ed0a0965b1a45dac4629cdeaced0188..0899e0da0484c057f29c85453f1c34b39e3504c2 100644
--- a/doc/links.rst
+++ b/doc/links.rst
@@ -27,7 +27,6 @@
 .. _NIH_CXR14_re: https://nihcc.app.box.com/v/ChestXray-NIHCC
 .. _PadChest: https://bimcv.cipf.es/bimcv-projects/padchest/
 .. _TBX11K: https://mmcheng.net/tb/
-.. _TBX11K_simplified: https://www.kaggle.com/datasets/vbookshelf/tbx11k-simplified
 
 .. _drive: https://github.com/wfdubowen/Retina-Unet/tree/master/DRIVE/
 .. _stare: http://cecas.clemson.edu/~ahoover/stare/
@@ -47,8 +46,8 @@
 
 .. Annotation data websites
 .. _shenzhen annotations: https://www.kaggle.com/yoctoman/shcxr-lung-mask
-.. _cxr8 annotations: https://github.com/lucasmansilla/NIH_chest_xray14_segmentations
-.. _jsrt annotations: https://www.isi.uu.nl/Research/Databases/SCR/download.php
+.. _cxr8-annotations: https://github.com/lucasmansilla/NIH_chest_xray14_segmentations
+.. _jsrt-annotations: https://www.isi.uu.nl/Research/Databases/SCR/download.php
 
 .. models
 .. _imagenet: https://www.image-net.org
diff --git a/doc/models/classify.rst b/doc/models.rst
similarity index 50%
rename from doc/models/classify.rst
rename to doc/models.rst
index a06c4c7617ca9e9e21cb3c129346e12bb4fde17e..c075e115aa162cb8e35b065cbf70942fc5a32f85 100644
--- a/doc/models/classify.rst
+++ b/doc/models.rst
@@ -2,11 +2,19 @@
 ..
 .. SPDX-License-Identifier: GPL-3.0-or-later
 
+.. _mednet.models:
+
+=====================
+ Model Architectures
+=====================
+
+Deep-neural network models are categorized by tasks.
+
+
 .. _mednet.models.classify:
 
-================
- Classification
-================
+Classification
+--------------
 
 Pre-configured models supporting classification tasks.
 
@@ -41,4 +49,42 @@ Pre-configured models supporting classification tasks.
      - :py:class:`.models.classify.pasa.Pasa`
 
 
-.. include:: ../links.rst
+.. _mednet.models.segment:
+
+Semantic Segmentation
+---------------------
+
+Pre-configured models supporting semantic segmentation tasks.
+
+.. list-table:: Pre-configured models
+
+   * - Config. key
+     - Module
+     - Base type
+   * - ``driu``
+     - :py:mod:`.config.segment.models.driu`
+     - :py:class:`.models.segment.driu.DRIU`
+   * - ``driu-bn``
+     - :py:mod:`.config.segment.models.driu_bn`
+     - :py:class:`.models.segment.driu_bn.DRIUBN`
+   * - ``driu-od``
+     - :py:mod:`.config.segment.models.driu_od`
+     - :py:class:`.models.segment.driu_od.DRIUOD`
+   * - ``driu-pix``
+     - :py:mod:`.config.segment.models.driu_pix`
+     - :py:class:`.models.segment.driu_pix.DRIUPix`
+   * - ``hed``
+     - :py:mod:`.config.segment.models.hed`
+     - :py:class:`.models.segment.hed.HED`
+   * - ``lwnet``
+     - :py:mod:`.config.segment.models.lwnet`
+     - :py:class:`.models.segment.lwnet.LittleWNet`
+   * - ``m2unet``
+     - :py:mod:`.config.segment.models.m2unet`
+     - :py:class:`.models.segment.m2unet.M2Unet`
+   * - ``unet``
+     - :py:mod:`.config.segment.models.unet`
+     - :py:class:`.models.segment.unet.Unet`
+
+
+.. include:: links.rst
diff --git a/doc/models/index.rst b/doc/models/index.rst
deleted file mode 100644
index 86ab17786d93695448daf5caf53201b9ce8dc75f..0000000000000000000000000000000000000000
--- a/doc/models/index.rst
+++ /dev/null
@@ -1,17 +0,0 @@
-.. SPDX-FileCopyrightText: Copyright © 2024 Idiap Research Institute <contact@idiap.ch>
-..
-.. SPDX-License-Identifier: GPL-3.0-or-later
-
-.. _mednet.models:
-
-========
- Models
-========
-
-Deep-neural network models are categorized by tasks.
-
-.. toctree::
-   :maxdepth: 2
-
-   classify
-   segment
diff --git a/doc/models/segment.rst b/doc/models/segment.rst
deleted file mode 100644
index 55f7b3a4b31977696b7bf5241c1d23f6bf193788..0000000000000000000000000000000000000000
--- a/doc/models/segment.rst
+++ /dev/null
@@ -1,44 +0,0 @@
-.. SPDX-FileCopyrightText: Copyright © 2024 Idiap Research Institute <contact@idiap.ch>
-..
-.. SPDX-License-Identifier: GPL-3.0-or-later
-
-.. _mednet.models.segment:
-
-=======================
- Semantic Segmentation
-=======================
-
-Pre-configured models supporting semantic segmentation tasks.
-
-.. list-table:: Pre-configured models
-
-   * - Config. key
-     - Module
-     - Base type
-   * - ``driu``
-     - :py:mod:`.config.segment.models.driu`
-     - :py:class:`.models.segment.driu.DRIU`
-   * - ``driu-bn``
-     - :py:mod:`.config.segment.models.driu_bn`
-     - :py:class:`.models.segment.driu_bn.DRIUBN`
-   * - ``driu-od``
-     - :py:mod:`.config.segment.models.driu_od`
-     - :py:class:`.models.segment.driu_od.DRIUOD`
-   * - ``driu-pix``
-     - :py:mod:`.config.segment.models.driu_pix`
-     - :py:class:`.models.segment.driu_pix.DRIUPix`
-   * - ``hed``
-     - :py:mod:`.config.segment.models.hed`
-     - :py:class:`.models.segment.hed.HED`
-   * - ``lwnet``
-     - :py:mod:`.config.segment.models.lwnet`
-     - :py:class:`.models.segment.lwnet.LittleWNet`
-   * - ``m2unet``
-     - :py:mod:`.config.segment.models.m2unet`
-     - :py:class:`.models.segment.m2unet.M2Unet`
-   * - ``unet``
-     - :py:mod:`.config.segment.models.unet`
-     - :py:class:`.models.segment.unet.Unet`
-
-
-.. include:: ../links.rst
diff --git a/doc/references.rst b/doc/references.rst
index eeade046664c935410ff5c0dde39f7b3af879b0e..60b7812ad6f31d22e268a008977b8e7eaed32263 100644
--- a/doc/references.rst
+++ b/doc/references.rst
@@ -61,11 +61,6 @@
    In Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern
    Recognition, pages 2646–2655.
 
-.. [TBX11K-SIMPLIFIED-2020] *Liu, Y., Wu, Y.-H., Ban, Y., Wang, H., and Cheng, M.-*,
-   **Rethinking computer-aided tuberculosis diagnosis**,
-   In Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern
-   Recognition, pages 2646–2655.
-
 .. [SCORECAM-2020] *H. Wang et al.*, **Score-CAM: Score-Weighted Visual
    Explanations for Convolutional Neural Networks** 2020 IEEE/CVF Conference on
    Computer Vision and Pattern Recognition Workshops (CVPRW), Seattle, WA, USA,
@@ -164,11 +159,6 @@
    Computer Graphics, Visualization and Computer Vision, 2015.
    https://dspace5.zcu.cz/bitstream/11025/29670/1/Fumero.pdf
 
-.. [SHENZHEN-2014] *S. Jaeger, S. Candemir, S. Antani, Y. X. Wáng, P. X. Lu, G.
-   Thoma*, **Two public chest X-ray datasets for computer-aided screening of
-   pulmonary diseases.**, Quantitative imaging in medicine and surgery. 2014.
-   https://doi:10.3978/j.issn.2223-4292.2014.11.20
-
 .. [STARE-2000] *A. D. Hoover, V. Kouznetsova and M. Goldbaum*, **Locating blood
    vessels in retinal images by piecewise threshold probing of a matched filter
    response**, in IEEE Transactions on Medical Imaging, vol. 19, no. 3, pp.
@@ -187,11 +177,6 @@
    Surface Vessels**, SPIE Journal of Medical Imaging, 2017.
    https://doi.org/10.1117/1.jmi.4.1.014503
 
-.. [MC-2014] *S. Jaeger, S. Candemir, S. Antani, Y. X. Wáng, P. X. Lu, G.
-   Thoma*, **Two public chest X-ray datasets for computer-aided screening of
-   pulmonary diseases.**, Quantitative imaging in medicine and surgery. 2014.
-   https://doi.org/10.3978/j.issn.2223-4292.2014.11.20
-
 .. [VISCERAL-2016] *O. Jimenez-del-Toro et al.*, **Cloud-Based Evaluation of
    Anatomical Structure Segmentation and Landmark Detection Algorithms:
    VISCERAL Anatomy Benchmarks**, IEEE Transactions on Medical Imaging, vol.
diff --git a/src/mednet/data/classify/hivtb.py b/src/mednet/data/classify/hivtb.py
index 79d41475a6f2b17ce915aa984291ae80b1450943..1c3f4b9cb411274fffb1582d18b9bbe405c72a2e 100644
--- a/src/mednet/data/classify/hivtb.py
+++ b/src/mednet/data/classify/hivtb.py
@@ -3,30 +3,39 @@
 # SPDX-License-Identifier: GPL-3.0-or-later
 """HIV-TB dataset for computer-aided diagnosis (only BMP files).
 
+This databases contain only the tuberculosis final diagnosis (0 or 1) and come
+from HIV infected patients.
+
 * Database reference: [HIV-TB-2019]_
-* Original resolution, varying with most images being 2048 x 2500 pixels
-  or 2500 x 2048 pixels, but not all.
+
+.. important:: **Raw data organization**
+
+    The HIV-TB base datadir, which you should configure following the
+    :ref:`mednet.setup` instructions, must contain at least the directory
+    ``HIV-TB/HIV-TB_Algorithm_study_X-rays`` with all BMP and JPEG images.
 
 Data specifications:
 
 * Raw data input (on disk):
 
   * BMP (BMP3) and JPEG grayscale images encoded as 8-bit RGB, with
-    varying resolution
+    varying resolution (most images being 2048 x 2500 pixels or 2500 x 2048
+    pixels, but not all).
+  * Total samples: 243
 
 * Output image:
 
   * Transforms:
 
-    * Load raw BMP or JPEG with :py:mod:`PIL`
+    * Load raw BMP or JPEG with :py:mod:`PIL`, with auto-conversion to
+      grayscale
     * Remove black borders
     * Convert to torch tensor
-    * Torch center cropping to get square image
 
 * Final specifications
 
-  * Grayscale, encoded as a single plane tensor, 32-bit floats,
-    square at 2048 x 2048 pixels
+  * Grayscale, encoded as a single plane tensor, 32-bit floats, with varying
+    resolution depending on input.
   * Labels: 0 (healthy), 1 (active tuberculosis)
 
 This module contains the base declaration of common data modules and raw-data
diff --git a/src/mednet/data/classify/indian.py b/src/mednet/data/classify/indian.py
index 74d57ab703fcda63483cb124af8a712d0e520464..880492535c81012dc987131acf8031fa44d0fd1f 100644
--- a/src/mednet/data/classify/indian.py
+++ b/src/mednet/data/classify/indian.py
@@ -9,31 +9,28 @@ pulmonary tuberculosis (TB).  This database is also known as the "Database
 A/Database B" database.
 
 * Database reference: [INDIAN-2013]_
-* Original images PNG, 8-bit grayscale, 1024 x 1024 pixels
-* Split reference: [INDIAN-2013]_ with 20% of train set for the validation
+* Split references: [INDIAN-2013]_ with 20% of train set for the validation
   set
 
-Data specifications:
-
-* Raw data input (on disk):
+.. important:: **Raw data organization**
 
-  * PNG RGB 8-bit depth images with "inverted" grayscale scale
-  * Variable width and height
+    The Indian_ base datadir, which you should configure following the
+    :ref:`mednet.setup` instructions, must contain at least these two
+    subdirectories:
 
-* Output image:
+    - ``DatasetA/`` (directory containing the dataset A images in JPG format)
+    - ``DatasetB/`` (directory containing the dataset B images in DICOM format)
 
-  * Transforms:
+Data specifications:
 
-    * Load raw PNG with :py:mod:`PIL`
-    * Remove black borders
-    * Convert to torch tensor
-    * Torch center cropping to get square image
+* Raw data input (on disk):
 
-  * Final specifications:
+  * JPG RGB 8-bit depth images with "inverted" grayscale scale, with varying
+    resolution of at least 1024 x 1024 pixels per sample
+  * Samples: 156 images and associated labels
 
-    * Grayscale, encoded as a single plane tensor, 32-bit floats,
-      square, with varying resolutions, depending on the input raw image
-    * Labels: 0 (healthy), 1 (active tuberculosis)
+* Output image:  Use the same transforms and specifications as for
+  :py:mod:`.classify.shenzhen`
 
 This module contains the base declaration of common data modules and raw-data
 loaders for this database. All configured splits inherit from this definition.
diff --git a/src/mednet/data/classify/montgomery.py b/src/mednet/data/classify/montgomery.py
index 04c8362daba9ffd2df943505d2c822d4887faf30..a7bd1c1a5e67bc07b99128bdbfe2063631349e7f 100644
--- a/src/mednet/data/classify/montgomery.py
+++ b/src/mednet/data/classify/montgomery.py
@@ -7,16 +7,16 @@ The standard digital image database for Tuberculosis was created by the
 National Library of Medicine, Maryland, USA in collaboration with Shenzhen No.3
 People’s Hospital, Guangdong Medical College, Shenzhen, China.
 
-* Database reference: [MONTGOMERY-SHENZHEN-2014]_
-* Original resolution (height x width or width x height): 4020x4892 px or
-  4892x4020 px
+* Database references: [MONTGOMERY-SHENZHEN-2014]_,
 
 Data specifications:
 
 * Raw data input (on disk):
 
-  * PNG images 8 bit grayscale
-  * resolution: fixed to one of the cases above
+  * PNG images 8 bit grayscale issued from digital radiography machines
+  * Original resolution (height x width or width x height): 4020x4892 px or
+    4892x4020 px
+  * Samples: 138 images and associated labels
 
 * Output image:
 
@@ -29,7 +29,7 @@ Data specifications:
   * Final specifications
 
     * Grayscale, encoded as a single plane tensor, 32-bit floats,
-      square at 4020 x 4020 pixels
+      square at most 4020 x 4020 pixels
     * Labels: 0 (healthy), 1 (active tuberculosis)
 
 This module contains the base declaration of common data modules and raw-data
diff --git a/src/mednet/data/classify/nih_cxr14.py b/src/mednet/data/classify/nih_cxr14.py
index df91662f360769e94ec75f8bac720e7f3b41b235..5a1981a86b5d3e921e1634e30d60153c4a88cf02 100644
--- a/src/mednet/data/classify/nih_cxr14.py
+++ b/src/mednet/data/classify/nih_cxr14.py
@@ -10,19 +10,35 @@ cardiomegaly, emphysema, effusion, hernia, infiltration, mass, nodule,
 atelectasis, pneumothorax, pleural thickening, pneumonia, fibrosis, edema and
 consolidation. This is the relabeled version created in the CheXNeXt study.
 
-* Reference: [NIH-CXR14-2017]_
+* Database references:
+
+  * Original data: [NIH-CXR14-2017]_
+  * Labels and split references: [CHEXNEXT-2018]_
+
+.. important:: **Raw data organization**
+
+    The NIH_CXR14_re_ base datadir, which you should configure following the
+    :ref:`mednet.setup` instructions, must contain at least the directory
+    "images/" with all the images of the database.
+
+    The labels from [CHEXNEXT-2018]_ are already incorporated in this library
+    and do **not** need to be re-downloaded.
+
+    The flag ``idiap_folder_structure`` makes the loader search for files
+    named, e.g. ``images/00030621_006.png``, as
+    ``images/00030/00030621_006.png``.
+
 * Raw data input (on disk):
 
   * PNG RGB 8-bit depth images
   * Resolution: 1024 x 1024 pixels
+  * Total samples available: 109'041
 
-* Labels: [CHEXNEXT-2018]_
-* Split reference: [CHEXNEXT-2018]_
 * Output image:
 
   * Transforms:
 
-    * Load raw PNG with :py:mod:`PIL`
+    * Load raw PNG with :py:mod:`PIL`, with auto-conversion to grayscale
     * Convert to torch tensor
 
   * Final specifications:
diff --git a/src/mednet/data/classify/shenzhen.py b/src/mednet/data/classify/shenzhen.py
index 5a13789d86a5622f8147652450d0899ad69f907f..239be7600e7609f4e474b8c9f7b8c2b30288a31b 100644
--- a/src/mednet/data/classify/shenzhen.py
+++ b/src/mednet/data/classify/shenzhen.py
@@ -11,21 +11,31 @@ using Philips DR Digital Diagnose systems.
 
 * Database reference: [MONTGOMERY-SHENZHEN-2014]_
 
+.. important:: **Raw data organization**
+
+    The Shenzhen_ base datadir, which you should configure following the
+    :ref:`mednet.setup` instructions, must contain at this subdirectory:
+
+    - ``CXR_png/`` (directory containing the CXR images)
+
 Data specifications:
 
 * Raw data input (on disk):
 
-  * PNG 8-bit RGB images (grayscale, but encoded as RGB images with
-    "inverted" grayscale scale requiring special treatment).
-  * Variable width and height of 3000 x 3000 pixels or less
+  * PNG 8-bit RGB images issued from digital radiography machines (grayscale,
+    but encoded as RGB images with "inverted" grayscale scale requiring special
+    treatment).
+  * Original resolution: variable width and height of 3000 x 3000 pixels or
+    less
+  * Samples: 662 images and associated labels
 
 * Output image:
 
   * Transforms:
 
-    * Load raw PNG with :py:mod:`PIL`
-    * Remove black borders
-    * Torch center cropping to get square image
+    * Load raw data with :py:mod:`PIL` with auto-conversion to grayscale
+    * Remove (completely) black borders
+    * Convert to torch tensor
 
   * Final specifications:
 
diff --git a/src/mednet/data/classify/tbpoc.py b/src/mednet/data/classify/tbpoc.py
index bb25f2ac20d75fee01a21f5f55d9c6eb37640c1b..5f4c5d4bb7d981c912e741af19c05173ffdbe6cb 100644
--- a/src/mednet/data/classify/tbpoc.py
+++ b/src/mednet/data/classify/tbpoc.py
@@ -3,16 +3,25 @@
 # SPDX-License-Identifier: GPL-3.0-or-later
 """TB-POC dataset for computer-aided diagnosis.
 
+This databases contain only the tuberculosis final diagnosis (0 or 1) and come
+from HIV infected patients.
+
 * Database reference: [TB-POC-2018]_
-* Original resolution (height x width or width x height): 2048 x 2500 pixels
-  or 2500 x 2048 pixels
+
+.. important:: **Raw data organization**
+
+    The TB-POC base datadir, which you should configure following the
+    :ref:`mednet.setup` instructions, must contain at least the directory
+    ``TBPOC_CXR`` with all JPEG images.
 
 Data specifications:
 
 * Raw data input (on disk):
 
   * JPEG 8-bit Grayscale images
-  * resolution: fixed to one of the cases above
+  * Original resolution (height x width or width x height): 2048 x 2500 pixels
+    or 2500 x 2048 pixels
+  * Total samples: 407
 
 * Output image:
 
diff --git a/src/mednet/data/classify/tbx11k.py b/src/mednet/data/classify/tbx11k.py
index 2752a53c5964f617a0e9f1091f677a474ef11733..58516d73d521f07c5e5916c868b60cb0919c6b7e 100644
--- a/src/mednet/data/classify/tbx11k.py
+++ b/src/mednet/data/classify/tbx11k.py
@@ -81,6 +81,15 @@
     The selection of samples is stratified (see comments through our split
     code, which is shipped alongside this file.)
 
+.. important:: **Raw data organization**
+
+    The TBX11k_ base datadir, which you should configure following the
+    :ref:`mednet.setup` instructions, must contain at least these two
+    subdirectories:
+
+    - ``imgs/`` (directory containing sub-directories and images in PNG format)
+    - ``annotations/`` (directory containing labels in JSON and XML format)
+
 Data specifications:
 
 * Raw data input (on disk): PNG images 8 bits RGB, 512 x 512 pixels
@@ -90,6 +99,7 @@ Data specifications:
   * Transforms:
 
     - Load raw PNG with :py:mod:`PIL`
+    - Convert to torch tensor
 
   * Final specifications:
 
diff --git a/src/mednet/data/segment/chasedb1.py b/src/mednet/data/segment/chasedb1.py
index da4c0fba1b2fbdd70da36cb6f6395b6f78e36cfe..e55def08aa409dc829bf4cb2adeab400b647d1ca 100644
--- a/src/mednet/data/segment/chasedb1.py
+++ b/src/mednet/data/segment/chasedb1.py
@@ -17,17 +17,28 @@ blood vessels as compared with the background and wider arteriolars that have a
 bright strip running down the centre known as the central vessel reflex.
 
 * Reference: [CHASEDB1-2012]_
-* Original resolution (height x width): 960 x 999
-* Split reference: [CHASEDB1-2012]_
-* Protocol ``first-annotator``:
 
-  * Training samples: 8 (including labels from annotator "1stHO")
-  * Test samples: 20 (including labels from annotator "1stHO")
+Data specifications:
 
-* Protocol ``second-annotator``:
+* Raw data input (on disk):
 
-  * Training samples: 8 (including labels from annotator "2ndHO")
-  * Test samples: 20 (including labels from annotator "2ndHO")
+  * RGB images encoded in JPG format with resolution (HxW) = 960 x 999 pixels.
+  * Vessel annotations are encoded as PNG images with the same resolution as
+    input samples.
+  * Masks for the eye fundus are provided by this package.
+  * Total samples: 28
+
+* Output sample:
+
+    * Image: Load raw JPG images with :py:mod:`PIL`, with auto-conversion to RGB.
+    * Vessel annotations: Load annotations with :py:mod:`PIL`, with
+      auto-conversion to model ``1`` with no dithering.
+    * Eye fundus mask: Load mask with :py:mod:`PIL`, with
+      auto-conversion to model ``1`` with no dithering.
+
+Split ``first-annotator`` contains 8 training samples and 20 tests samples
+annotated by expert 1.  Split ``second-annotator`` contains the sample samples
+as in ``first-annotator``, but annotated by expert 2.
 
 This module contains the base declaration of common data modules and raw-data
 loaders for this database. All configured splits inherit from this definition.
diff --git a/src/mednet/data/segment/cxr8.py b/src/mednet/data/segment/cxr8.py
index 6adefe4d83668d31ac393f117db808d239a59040..570c2e59008d32ad7d68e4c35ea1a4d280f2eb5f 100644
--- a/src/mednet/data/segment/cxr8.py
+++ b/src/mednet/data/segment/cxr8.py
@@ -3,18 +3,46 @@
 # SPDX-License-Identifier: GPL-3.0-or-later
 """ChestX-ray8: Hospital-scale Chest X-ray Database.
 
-The database contains a total of 112120 images. Image size for each X-ray is
-1024 x 1024. One set of mask annotations is available for all images.
-
-* Reference: [CXR8-2017]_
-* Original resolution (height x width): 1024 x 1024
-* Configuration resolution: 256 x 256 (after rescaling)
-* Split reference: [GAAL-2020]_
-* Protocol ``default``:
-
-  * Training samples: 78484 (including labels)
-  * Validation samples: 11212 (including labels)
-  * Test samples: 22424 (including labels)
+The database contains a total of 112'120 images. Image size for each X-ray is
+1024 x 1024. One set of automatically generated mask annotations is available
+for all images.
+
+* Database references:
+
+  * Original data: [CXR8-2017]_
+  * Split reference: [GAAL-2020]_
+
+.. important:: **Raw data organization**
+
+    The CXR8_ base datadir, which you should configure following the
+    :ref:`mednet.setup` instructions, must contain at least the following
+    directories:
+
+    - ``images/`` (directory containing the CXR images, in PNG format)
+    - ``segmentations/`` (must contain masks downloaded from `CXR8-Annotations`_)
+
+    The flag ``idiap_folder_structure`` makes the loader search for files
+    named, e.g. ``images/00030621_006.png``, as
+    ``images/00030/00030621_006.png`` (this is valid for both images and
+    segmentation masks).
+
+* Raw data input (on disk):
+
+  * PNG RGB 8-bit depth images
+  * Resolution: 1024 x 1024 pixels
+  * Total samples available: 112'120
+
+* Output image:
+
+  * Transforms:
+
+    * Load raw PNG with :py:mod:`PIL`, with auto-conversion to RGB, convert to
+      tensor
+    * Labels for each of the lungs are read from the provided GIF files and
+      merged into a single output image.
+
+The ``default`` split contains 78'484 images for training, 11'212 images for
+validation, and 22'424 images for testing.
 
 This module contains the base declaration of common data modules and raw-data
 loaders for this database. All configured splits inherit from this definition.
diff --git a/src/mednet/data/segment/drive.py b/src/mednet/data/segment/drive.py
index 0b704d3a5c56aeeecde09118478f1bb8a24e5c95..995cf1ea73c09b6cdcd4bdd569f155db2edd3ab1 100644
--- a/src/mednet/data/segment/drive.py
+++ b/src/mednet/data/segment/drive.py
@@ -4,19 +4,29 @@
 """DRIVE dataset for vessel segmentation.
 
 The DRIVE database has been established to enable comparative studies on
-segmentation of blood vessels in retinal images.
+segmentation of blood vessels in retinal images.  The database contains
+annotations from 2 different experts (only for the test set).
 
-* Reference: [DRIVE-2004]_
-* Original resolution (height x width): 584 x 565
-* Split reference: [DRIVE-2004]_
-* Protocol ``default``:
+* Database reference: [DRIVE-2004]_
 
-  * Training samples: 20 (including labels and masks)
-  * Test samples: 20 (including labels from annotator 1 and masks)
+Data specifications:
 
-* Protocol ``second-annotator``:
+* Raw data input (on disk):
 
-  * Test samples: 20 (including labels from annotator 2 and masks)
+  * RGB images encoded in TIFF format with resolution (HxW) = 584 x 565 pixels
+  * Total samples: 40
+
+* Output sample:
+
+    * Image: Load raw TIFF images with :py:mod:`PIL`, with auto-conversion to RGB.
+    * Vessel annotations: Load annotations with :py:mod:`PIL`, with
+      auto-conversion to model ``1`` with no dithering.
+    * Eye fundus mask: Load mask with :py:mod:`PIL`, with
+      auto-conversion to model ``1`` with no dithering.
+
+Split ``default`` includes 20 images for training and another 20 for
+testing.  Split ``second-annotator`` includes only the 20 test images with
+different vessel annotations (expert 2).
 
 This module contains the base declaration of common data modules and raw-data
 loaders for this database. All configured splits inherit from this definition.
diff --git a/src/mednet/data/segment/hrf.py b/src/mednet/data/segment/hrf.py
index 80a7f2c037113a04c8a01f6dd70e067e930d1b43..720a35ae825c444fe41fb23b09693c31727d0f1d 100644
--- a/src/mednet/data/segment/hrf.py
+++ b/src/mednet/data/segment/hrf.py
@@ -8,14 +8,28 @@ glaucomatous eyes.  It contains a total  of 45 eye fundus images with a
 resolution of 3304 x 2336. One set of ground-truth vessel annotations is
 available.
 
-* Reference: [HRF-2013]_
-* Original resolution (height x width): 2336 x 3504
-* Configuration resolution: 1168 x 1648 (after specific cropping and rescaling)
-* Split reference: [ORLANDO-2017]_
-* Protocol ``default``:
-
-* Training samples: 15 (including labels)
-* Test samples: 30 (including labels)
+* Database references:
+
+  * Original data: [HRF-2013]_
+  * Split reference: [ORLANDO-2017]_
+
+Data specifications:
+
+* Raw data input (on disk):
+
+  * Original images encoded in (color) JPG format, with resolution 3504 x 2336
+    pixels (width x height).
+  * Vessel labels: encoded as TIFF files, with the same resolution as original
+    images.
+  * Total samples: 45
+
+* Output sample:
+
+  * Image: Load raw JPG images with :py:mod:`PIL`, with auto-conversion to RGB.
+  * Vessel annotations: Load annotations with :py:mod:`PIL`, with
+    auto-conversion to mode ``1`` with no dithering.
+
+The ``default`` split contains 15 images for training and 30 for testing.
 
 This module contains the base declaration of common data modules and raw-data
 loaders for this database. All configured splits inherit from this definition.
diff --git a/src/mednet/data/segment/jsrt.py b/src/mednet/data/segment/jsrt.py
index 5398b5d2ba24a5555122f08ba94aeec2f427b5fc..c0c96e83d7047b829c45c6e7d8a1424a5b17bf10 100644
--- a/src/mednet/data/segment/jsrt.py
+++ b/src/mednet/data/segment/jsrt.py
@@ -4,18 +4,48 @@
 """Japanese Society of Radiological Technology dataset for lung segmentation.
 
 The database includes 154 nodule and 93 non-nodule images.  It contains a total
-of 247 resolution of 2048 x 2048.  One set of ground-truth lung annotations is
+of 247 resolution of 2048 x 2048 pixels, issued from original digitized
+Radiographies (laser scanner). One set of ground-truth lung annotations is
 available.
 
-* Reference: [JSRT-2000]_
-* Original resolution (height x width): 2048 x 2048
-* Configuration resolution: 1024 x 1024 (after rescaling)
-* Split reference: [GAAL-2020]_
-* Protocol ``default``:
+* Database references:
 
-* Training samples: 172 (including labels)
-* Validation samples: 25 (including labels)
-* Test samples: 50 (including labels)
+  * Original data: [JSRT-2000]_
+  * Split: [GAAL-2020]_
+
+.. important:: **Raw data organization**
+
+   The JSRT_ base datadir, which you should configure following the
+   :ref:`mednet.setup` instructions, must contain at least the following
+   directories:
+
+   - ``All247images/`` (directory containing the CXR images, in raw format)
+   - ``scratch/`` (must contain masks downloaded from `JSRT-Annotations`_)
+
+Data specifications:
+
+* Raw data input (on disk):
+
+  * Original images encoded in proprietary 12-bit RAW format.  A PNG-converted
+    set of images is provided at JSRT-Kaggle_ for your reference.  Input
+    resolution is 2048 x 2048 pixels.
+  * Masks: encoded as GIF files with separate portions for left and right
+    lungs, with a resolution of 1024 x 1024 pixels
+  * Total samples: 247
+
+* Output sample:
+
+    * Image: Load raw image from folder ``All247images/`` using
+      :py:func:`numpy.fromfile`, then applies a simple histogram equalization
+      to the 8-bit representation of the image, to obtain something along the
+      lines of the PNG (unofficial) version distributed at JSRT-Kaggle_.
+      Output images have a size of 1024 x 1024 pixels, achieved by resizing the
+      original input with bilinear interpolation.
+    * Labels for each of the lungs are read from the provided GIF files and
+      merged into a single output image.
+
+The ``default`` split contains 172 samples for training, 25 for validation and
+50 for test.
 
 This module contains the base declaration of common data modules and raw-data
 loaders for this database. All configured splits inherit from this definition.
@@ -97,7 +127,9 @@ class RawDataLoader(SegmentationRawDataLoader):
             The sample representation.
         """
 
-        image = to_tensor(self.load_pil_raw_12bit_jsrt(self.datadir / sample[0]))
+        image = self.load_pil_raw_12bit_jsrt(self.datadir / sample[0])
+        assert image.size == (2048, 2048)
+        image = to_tensor(image.resize((1024, 1024), PIL.Image.Resampling.BILINEAR))
 
         # Combine left and right lung masks into a single tensor
         assert sample[2] is not None
diff --git a/src/mednet/data/segment/montgomery.py b/src/mednet/data/segment/montgomery.py
index 69a21fcb44866fb245ecb8bd750b9e6c50efbf1b..c14f14d52b24f90a24db37cc3f5df714b244c859 100644
--- a/src/mednet/data/segment/montgomery.py
+++ b/src/mednet/data/segment/montgomery.py
@@ -7,7 +7,7 @@ The standard digital image database for Tuberculosis was created by the National
 Library of Medicine, Maryland, USA in collaboration with Shenzhen No.3 People’s
 Hospital, Guangdong Medical College, Shenzhen, China. The Chest X-rays are from
 
-* Database reference: [MONTGOMERY-SHENZHEN-2014]_
+* Database reference: [MONTGOMERY-SHENZHEN-2014]_, [GAAL-2020]_
 * Original resolution (height x width or width x height): 4020x4892 px or
   4892x4020 px
 
@@ -15,8 +15,10 @@ Data specifications:
 
 * Raw data input (on disk):
 
-  * PNG images 8 bit grayscale
-  * resolution: fixed to one of the cases above
+  * PNG images 8 bit grayscale issued from digital radiography machines
+  * Original resolution (height x width or width x height): 4020x4892 px or
+    4892x4020 px
+  * Samples: 138 images and associated labels
 
 * Output image:
 
@@ -29,9 +31,9 @@ Data specifications:
 
     * image: Grayscale, encoded as a single plane tensor, 32-bit floats,
       original size.
-    * target: A mask containing ones where lungs are in the original image,
-      otherwise, zeroes.
-    * mask: All ones (no specific mask)
+    * target: A binary mask containing ones where lungs are in the original
+      image, otherwise, zeroes.
+    * mask: Binary, with all ones (no specific mask)
 
 This module contains the base declaration of common data modules and raw-data
 loaders for this database. All configured splits inherit from this definition.
diff --git a/src/mednet/data/segment/refuge.py b/src/mednet/data/segment/refuge.py
index 6b985189d9168daf05fc037eddd67dcd98c8d0be..1aa6d028632f0192985d27a9a588fda2b9fb4c4e 100644
--- a/src/mednet/data/segment/refuge.py
+++ b/src/mednet/data/segment/refuge.py
@@ -8,29 +8,40 @@ challenge. The goal of the challenge is to evaluate and compare automated
 algorithms for glaucoma detection and optic disc/cup segmentation on a common
 dataset of retinal fundus images.
 
-* Reference (including train/dev/test split): [REFUGE-2018]_
-* Protocols ``optic-disc`` and ``cup``:
+* Database reference (including train/dev/test split): [REFUGE-2018]_
 
-* Training samples:
+.. warning::
 
-  * 400
-  * includes optic-disc and cup labels
-  * includes label: glaucomatous and non-glaucomatous
-  * original resolution: 2056 x 2124
+   The original directory ``Training400/AMD`` in REFUGE is considered to be
+   replaced by an updated version provided by the `AMD Grand-Challenge`_ (with
+   matching names).
 
-* Validation samples:
+   The changes concerns images ``A0012.jpg``, which was corrupted in REFUGE,
+   and ``A0013.jpg``, which only exists in the AMD Grand-Challenge version.
 
-  * 400
-  * includes optic-disc and cup labels
-  * includes label: glaucomatous and non-glaucomatous
-  * original resolution: 1634 x 1634
+Data specifications:
 
-* Test samples:
+* Raw data input (on disk):
 
-  * 400
-  * includes optic-disc and cup labels
-  * includes label: glaucomatous and non-glaucomatous
-  * original resolution:
+  * RGB images encoded in JPG format with varying resolution.  Training images
+    are (HxW) 2056 x 2124 pixels; Validation (and test) images are 1634 x 1634
+    pixels.
+  * Vessel annotations are encoded as BMP images with the same resolution as
+    input samples.
+  * Masks for the eye fundus are provided by this package.
+  * Total samples: 1200 distributed as 400 (training), 400 (validation) and 400
+    (test).
+
+* Output sample:
+
+    * Image: Load raw TIFF images with :py:mod:`PIL`, with auto-conversion to RGB.
+    * Vessel annotations: Load annotations with :py:mod:`PIL`, with
+      auto-conversion to mode ``1`` with no dithering.
+    * Eye fundus mask: Load mask with :py:mod:`PIL`, with
+      auto-conversion to mode ``1`` with no dithering.
+
+Splits ``optic-disc`` and ``cup`` contain annotations for optic-disc or cup
+segmentation.
 
 This module contains the base declaration of common data modules and raw-data
 loaders for this database. All configured splits inherit from this definition.
diff --git a/src/mednet/data/segment/shenzhen.py b/src/mednet/data/segment/shenzhen.py
index 188d577841cbf8018084bdb6cfaff62618b0bc1b..c90cc7f174ecbbc9ab084e82879e1fa6242c2cec 100644
--- a/src/mednet/data/segment/shenzhen.py
+++ b/src/mednet/data/segment/shenzhen.py
@@ -1,22 +1,57 @@
 # SPDX-FileCopyrightText: Copyright © 2024 Idiap Research Institute <contact@idiap.ch>
 #
 # SPDX-License-Identifier: GPL-3.0-or-later
-"""Shenzhen No.3 People’s Hospital dataset for lung segmentation.
+"""Shenzhen DataModule for computer-aided semantic sementation of lungs.
+
+The standard digital image database for Tuberculosis was created by the
+National Library of Medicine, Maryland, USA in collaboration with Shenzhen No.3
+People’s Hospital, Guangdong Medical College, Shenzhen, China. The Chest X-rays
+are from out-patient clinics, and were captured as part of the daily routine
+using Philips DR Digital Diagnose systems.
 
 The database includes 336 cases with manifestation of tuberculosis, and 326
-normal cases.  It contains a total  of 662 images. Image size varies for each
+normal cases.  It contains a total  of 662 images.  Image size varies for each
 X-ray. It is approximately 3K x 3K. One set of ground-truth lung annotations is
 available for 566 of the 662 images.
 
-* Reference: [SHENZHEN-2014]_
-* Original resolution (height x width): Approximately 3K x 3K (varies)
-* Configuration resolution: 512 x 512 (after rescaling)
-* Split reference: [GAAL-2020]_
-* Protocol ``default``:
+* Database references:
+
+  * Original data [MONTGOMERY-SHENZHEN-2014]_
+  * Splits: [GAAL-2020]_
+
+.. important:: **Raw data organization**
+
+    The Shenzhen_ base datadir, which you should configure following the
+    :ref:`mednet.setup` instructions, must contain at least these two
+    subdirectories:
+
+    - ``CXR_png/`` (directory containing the CXR images)
+    - ``mask/`` (contains masks downloaded from `Shenzhen Annotations`_)
+
+Data specifications:
+
+* Raw data input (on disk):
+
+  * PNG 8-bit RGB images issued from digital radiography machines (grayscale,
+    but encoded as RGB images with "inverted" grayscale scale requiring special
+    treatment).
+  * Original resolution: variable width and height of 3000 x 3000 pixels or
+    less
+  * Samples: 566 images and associated labels
+
+* Output image:
+
+  * Transforms:
+
+    * Load raw PNG with :py:mod:`PIL`
+    * Torch center cropping to get square image
+
+  * Final specifications:
 
-* Training samples: 396 (including labels)
-* Validation samples: 56 (including labels)
-* Test samples: 114 (including labels)
+    * Grayscale, encoded as a 3-plane plane tensor, 32-bit floats,
+      square with varying resolutions, depending on the input image
+    * Labels: Binary mask with annotated lungs (1 where lungs are; 0 otherwise)
+    * Mask: Binary mask with all ones
 
 This module contains the base declaration of common data modules and raw-data
 loaders for this database. All configured splits inherit from this definition.
diff --git a/src/mednet/data/segment/stare.py b/src/mednet/data/segment/stare.py
index 1a89c1e7d79c1078fd3c3d5a00a3a545586734e5..8be5936afea3198ff0b1a67df2f7cec6f8876c31 100644
--- a/src/mednet/data/segment/stare.py
+++ b/src/mednet/data/segment/stare.py
@@ -9,18 +9,30 @@ vessel annotations are available. The first set by Adam Hoover ("ah") is
 commonly used for training and testing. The second set by Valentina Kouznetsova
 ("vk") is typically used as a “human” baseline.
 
-* Reference: [STARE-2000]_
-* Original resolution (width x height): 700 x 605
-* Split reference: [MANINIS-2016]_
-* Protocol ``ah`` (default baseline):
+* Database references:
 
-  * Training samples: 10 (including labels from annotator "ah")
-  * Test samples: 10 (including labels from annotator "ah")
+  * Original data: [STARE-2000]_
+  * Split reference: [MANINIS-2016]_
 
-* Protocol ``vk`` (normally used as human comparison):
+Data specifications:
 
-  * Training samples: 10 (including labels from annotator "vk")
-  * Test samples: 10 (including labels from annotator "vk")
+* Raw data input (on disk):
+
+  * RGB images encoded in PPM format with resolution (HxW) = 605 x 700
+  * Total samples: 397 (out of which only 20 are annotated for vessel
+    segmentation)
+
+* Output sample:
+
+    * Image: Load raw PPM images with :py:mod:`PIL`, with auto-conversion to RGB.
+    * Vessel annotations: Load annotations with :py:mod:`PIL`, with
+      auto-conversion to model ``1`` with no dithering.
+    * Eye fundus mask: Load mask with :py:mod:`PIL`, with
+      auto-conversion to model ``1`` with no dithering.
+
+Protocol ``ah`` (default baseline, with first, more detailed annotator)
+includes 10 training samples and 10 test samples.  Protocol ``vk`` (second
+annotator) includes the same samples but annotated by a second expert.
 
 This module contains the base declaration of common data modules and raw-data
 loaders for this database. All configured splits inherit from this definition.
diff --git a/src/mednet/scripts/database.py b/src/mednet/scripts/database.py
index a987e30e1051a7548f0b02bf31c12e04aa97262c..50a89a6bc0ba4276683d534fafda36e1bf76d340 100644
--- a/src/mednet/scripts/database.py
+++ b/src/mednet/scripts/database.py
@@ -21,7 +21,7 @@ def _get_raw_databases() -> dict[str, dict[str, str | list]]:
         * ``module``: the full Pythonic module name (e.g.
           ``mednet.data.classify.montgomery``).
         * ``datadir``: points to the user-configured data directory for the
-          current dataset, if set, or ``None`` otherwise.
+          current database, if set, or ``None`` otherwise.
     """
 
     import importlib
@@ -52,7 +52,7 @@ def _get_raw_databases() -> dict[str, dict[str, str | list]]:
 
 
 def _list_raw_databases():
-    """List raw datasets to a string representation."""
+    """List raw databases to a string representation."""
 
     def _echo(left: str, right: str, color: str = "white") -> None:
         s = [
@@ -117,7 +117,7 @@ def list_():
 @database.command(
     epilog="""Examples:
 
-    1. Check if all files from the split 'montgomery-f0' of the Montgomery
+    1. Check if all files from the config split 'montgomery-f0' of the Montgomery
        database can be loaded:
 
        .. code:: sh
@@ -133,25 +133,27 @@ def list_():
 @click.option(
     "--limit",
     "-l",
-    help="Limit check to the first N samples in each split dataset, making the "
-    "check sensibly faster.  Set it to zero (default) to check everything.",
+    help="Limit check to the first N samples in each split in the "
+    "configuration, making the check sensibly faster. Set it to "
+    "zero (default) to check everything.",
     required=True,
     type=click.IntRange(0),
     default=0,
+    show_default=True,
 )
 @verbosity_option(logger=logger, expose_value=False)
 def check(entrypoint, limit):  # numpydoc ignore=PR01
-    """Check file access on one or more DataModules."""
+    """Check file access on a database configuration split."""
     import importlib.metadata
     import sys
 
-    click.secho(f"Checking entrypoint `{entrypoint}`...", fg="yellow")
+    click.secho(f"Checking database split config `{entrypoint}`...", fg="yellow")
     try:
         module = importlib.metadata.entry_points(group="mednet.config")[
             entrypoint
         ].module
     except KeyError:
-        raise Exception(f"Could not find database entrypoint `{entrypoint}`")
+        raise Exception(f"Could not find database split config `{entrypoint}`")
 
     datamodule = importlib.import_module(module).datamodule
 
@@ -165,19 +167,21 @@ def check(entrypoint, limit):  # numpydoc ignore=PR01
     for k, loader in loaders.items():
         if limit == 0:
             click.secho(
-                f"Checking all samples of dataset `{k}` at entrypoint `{entrypoint}`...",
+                f"Checking all {len(loader)} samples of split `{k}` at config "
+                f"`{entrypoint}`...",
                 fg="yellow",
             )
             loader_limit = sys.maxsize
         else:
             click.secho(
                 f"Checking first {limit} samples of dataset "
-                f"`{k}` at entrypoint `{entrypoint}`...",
+                f"`{k}` at config `{entrypoint}`...",
                 fg="yellow",
             )
             loader_limit = limit
-        # the for loop will trigger raw data loading (ie. user code), protect
-        # it
+
+        # the for loop will trigger raw data loading (ie. user code), protect it
+        i = 0
         try:
             for i, batch in enumerate(loader):
                 if loader_limit == 0:
@@ -194,7 +198,7 @@ def check(entrypoint, limit):  # numpydoc ignore=PR01
                     )
                 loader_limit -= 1
         except Exception:
-            logger.exception(f"Unable to load batch {i} in dataset {k}")
+            logger.exception(f"Unable to load sample {i} at split {k}")
             errors += 1
 
     if not errors:
diff --git a/src/mednet/scripts/utils.py b/src/mednet/scripts/utils.py
index 86eedabba4dfacb61a0134ebee0d6351f0ce48a7..bcd246d6be531b3a31f2f423f46d8be92d5075a8 100644
--- a/src/mednet/scripts/utils.py
+++ b/src/mednet/scripts/utils.py
@@ -117,12 +117,13 @@ def execution_metadata() -> dict[str, int | float | str | dict[str, str] | list[
         * ``accelerator``: acceleration devices available (e.g. ``cuda``)
     """
 
+    import datetime
     import importlib.metadata
     import importlib.util
     import os
     import sys
 
-    args = []
+    args: list[str] = []
     for k in sys.argv:
         if " " in k:
             args.append(f"'{k}'")
@@ -130,7 +131,7 @@ def execution_metadata() -> dict[str, int | float | str | dict[str, str] | list[
             args.append(k)
 
     # current date time, in ISO8610 format
-    datetime = __import__("datetime").datetime.now().astimezone().isoformat()
+    current_datetime = datetime.datetime.now().astimezone().isoformat()
 
     # collects dependency information
     package_name = __package__.split(".")[0] if __package__ is not None else "unknown"
@@ -194,7 +195,7 @@ def execution_metadata() -> dict[str, int | float | str | dict[str, str] | list[
     }
 
     return {
-        "datetime": datetime,
+        "datetime": current_datetime,
         "package-name": package_name,
         "package-version": current_version,
         "python": python,