diff --git a/src/ptbench/data/indian/datamodule.py b/src/ptbench/data/indian/datamodule.py
index 34fb4b5a90fc506cc2096a86526da4bacc7dcaba..6316f264e859c091232a61fd22104fc95b1fe2e6 100644
--- a/src/ptbench/data/indian/datamodule.py
+++ b/src/ptbench/data/indian/datamodule.py
@@ -25,14 +25,14 @@ class DataModule(CachingDataModule):
     in computer-aided diagnosis of pulmonary diseases with a special
     focus on pulmonary tuberculosis (TB).
 
-    * Original resolution (height x width or width x height): more than 1024 x 1024
+    * Original images PNG, 8-bit grayscale, 1024 x 1024 pixels
     * Split reference: [INDIAN-2013]_ with 20% of train set for the validation set
 
     Data specifications:
 
     * Raw data input (on disk):
 
-        * PNG images (grayscale, encoded as RGB images with "inverted" grayscale scale)
+        * PNG RGB 8-bit depth images with "inverted" grayscale scale
         * Variable width and height
 
     * Output image:
@@ -41,12 +41,14 @@ class DataModule(CachingDataModule):
 
             * Load raw PNG with :py:mod:`PIL`
             * Remove black borders
+            * Convert to torch tensor
             * Torch center cropping to get square image
 
         * Final specifications:
 
-            * Grayscale, encoded as a single plane image, 8 bits
-            * Square, with varying resolutions, depending on the input image
+            * Grayscale, encoded as a single plane tensor, 32-bit floats,
+              square, with varying resolutions, depending on the input raw image
+            * Labels: 0 (healthy), 1 (active tuberculosis)
     """
 
     def __init__(self, split_filename: str):
diff --git a/src/ptbench/data/montgomery/datamodule.py b/src/ptbench/data/montgomery/datamodule.py
index e04efebd6b72d205c2ee5c1e2f23559d7a01599d..12ead6e81a8e2673cd8b388d52bf3dd7ef9d7849 100644
--- a/src/ptbench/data/montgomery/datamodule.py
+++ b/src/ptbench/data/montgomery/datamodule.py
@@ -120,12 +120,14 @@ class DataModule(CachingDataModule):
 
             * Load raw PNG with :py:mod:`PIL`
             * Remove black borders
+            * Convert to torch tensor
             * Torch center cropping to get square image
 
         * Final specifications
 
-            * Grayscale, encoded as a single plane image, 8 bits
-            * Square (4020x4020 px)
+            * Grayscale, encoded as a single plane tensor, 32-bit floats,
+              square at 4020 x 4020 pixels
+            * Labels: 0 (healthy), 1 (active tuberculosis)
     """
 
     def __init__(self, split_filename: str):
diff --git a/src/ptbench/data/nih_cxr14/datamodule.py b/src/ptbench/data/nih_cxr14/datamodule.py
index 5452640949ecc998ea4ee026616a894b1a7be7e9..b5bf77a28254a083455a35484df5248655ba34f7 100644
--- a/src/ptbench/data/nih_cxr14/datamodule.py
+++ b/src/ptbench/data/nih_cxr14/datamodule.py
@@ -127,7 +127,11 @@ class DataModule(CachingDataModule):
     CheXNeXt study.
 
     * Reference: [NIH-CXR14-2017]_
-    * Original resolution (height x width): 1024 x 1024
+    * Raw data input (on disk):
+
+      * PNG RGB 8-bit depth images
+      * Resolution: 1024 x 1024 pixels
+
     * Labels: [CHEXNEXT-2018]_
     * Split reference: [CHEXNEXT-2018]_
     * Protocol ``default``:
@@ -141,11 +145,26 @@ class DataModule(CachingDataModule):
         * Transforms:
 
             * Load raw PNG with :py:mod:`PIL`
-
-        * Final specifications
-
-            * RGB, encoded as a 3-plane image, 8 bits
-            * Square (1024x1024 px)
+            * Convert to torch tensor
+
+        * Final specifications:
+
+            * RGB, encoded as a 3-plane tensor, 32-bit floats, square (1024x1024 px)
+            * Labels in order:
+              * cardiomegaly
+              * emphysema
+              * effusion
+              * hernia
+              * infiltration
+              * mass
+              * nodule
+              * atelectasis
+              * pneumothorax
+              * pleural thickening
+              * pneumonia
+              * fibrosis
+              * edema
+              * consolidation
     """
 
     def __init__(self, split_filename: str):
diff --git a/src/ptbench/data/shenzhen/datamodule.py b/src/ptbench/data/shenzhen/datamodule.py
index 8307396df435433f220df0e07cc1f3da24b8e853..7cf1833b27ca1e79382cb7e45fa41b5b4ee27292 100644
--- a/src/ptbench/data/shenzhen/datamodule.py
+++ b/src/ptbench/data/shenzhen/datamodule.py
@@ -112,14 +112,14 @@ class DataModule(CachingDataModule):
     Philips DR Digital Diagnose systems.
 
     * Database reference: [MONTGOMERY-SHENZHEN-2014]_
-    * Original resolution (height x width or width x height): 3000 x 3000 or less
 
     Data specifications:
 
     * Raw data input (on disk):
 
-        * PNG images (grayscale, encoded as RGB images with "inverted" grayscale scale)
-        * Variable width and height
+        * PNG 8-bit RGB images (grayscale, but encoded as RGB images with
+          "inverted" grayscale scale requiring special treatment).
+        * Variable width and height of 3000 x 3000 pixels or less
 
     * Output image:
 
@@ -131,8 +131,9 @@ class DataModule(CachingDataModule):
 
         * Final specifications:
 
-            * Grayscale, encoded as a single plane image, 8 bits
-            * Square, with varying resolutions, depending on the input image
+            * Grayscale, encoded as a single plane tensor, 32-bit floats,
+              square with varying resolutions, depending on the input image
+            * Labels: 0 (healthy), 1 (active tuberculosis)
     """
 
     def __init__(self, split_filename: str):
diff --git a/src/ptbench/data/tbx11k/datamodule.py b/src/ptbench/data/tbx11k/datamodule.py
index 3420b6de672a05400330d1575ed3593579666f99..45785ddbd679a241d90c9c48cfdd526315f48fbc 100644
--- a/src/ptbench/data/tbx11k/datamodule.py
+++ b/src/ptbench/data/tbx11k/datamodule.py
@@ -246,17 +246,17 @@ class DataModule(CachingDataModule):
       active TB cases (total samples = 8369):
 
       - ``train`` dataset samples:
-          - Healthy: 4864
+          - Healthy, Sick or Latent TB: 4864
           - Active TB only: 377
           - Total: 5241
 
       - ``validation`` dataset samples:
+          - Healthy, Sick or Latent TB: 1239
           - Active TB only: 96
-          - Healthy: 1239
           - Total: 1335
 
       - ``test`` dataset samples:
-          - Healthy: 1636
+          - Healthy, Sick or Latent TB: 1636
           - Active TB only: 157
           - Total: 1793
 
@@ -265,8 +265,7 @@ class DataModule(CachingDataModule):
 
     * Raw data input (on disk):
 
-        * PNG images 8 bits RGB
-        * Resolution: 512x512 pixels
+        * PNG images 8 bits RGB, 512 x 512 pixels
 
     * Output image:
 
@@ -276,8 +275,10 @@ class DataModule(CachingDataModule):
 
         * Final specifications:
 
-            * RGB, encoded as a 3-plane image, 8 bits
-            * Square (512x512 px)
+            * RGB, encoded as a 3-plane tensor using 32-bit floats, square
+              (512x512 pixels)
+            * Labels: 0 (healthy, latent tb or sick but no tb depending on the
+              protocol), 1 (active tuberculosis)
     """
 
     def __init__(self, split_filename: str):