diff --git a/src/ptbench/data/hivtb/datamodule.py b/src/ptbench/data/hivtb/datamodule.py
index 63075c61720e393edd2953a29af67336be954952..b5b84ec434ea4ce4145119bf565d0d8ee8a091d9 100644
--- a/src/ptbench/data/hivtb/datamodule.py
+++ b/src/ptbench/data/hivtb/datamodule.py
@@ -11,7 +11,7 @@ from torchvision.transforms.functional import center_crop, to_tensor
 
 from ...utils.rc import load_rc
 from ..datamodule import CachingDataModule
-from ..image_utils import load_pil_grayscale, remove_black_borders
+from ..image_utils import remove_black_borders
 from ..split import JSONDatabaseSplit
 from ..typing import DatabaseSplit
 from ..typing import RawDataLoader as _BaseRawDataLoader
@@ -54,7 +54,9 @@ class RawDataLoader(_BaseRawDataLoader):
         sample
             The sample representation
         """
-        image = load_pil_grayscale(os.path.join(self.datadir, sample[0]))
+        image = PIL.Image.open(os.path.join(self.datadir, sample[0])).convert(
+            "L"
+        )
         image = remove_black_borders(image)
         tensor = to_tensor(image)
         tensor = center_crop(tensor, min(*tensor.shape[1:]))
@@ -99,21 +101,21 @@ class DataModule(CachingDataModule):
     """HIV-TB dataset for computer-aided diagnosis (only BMP files)
 
     * Database reference: [HIV-TB-2019]_
-    * Original resolution (height x width or width x height): 2048 x 2500 pixels
-      or 2500 x 2048 pixels
-    
+    * Original resolution, varying with most images being 2048 x 2500 pixels
+      or 2500 x 2048 pixels, but not all.
+
     Data specifications:
 
     * Raw data input (on disk):
 
-        * BMP images 8 bit grayscale
-        * resolution fixed to one of the cases above
+        * BMP (BMP3) and JPEG grayscale images encoded as 8-bit RGB, with
+          varying resolution
 
     * Output image:
 
         * Transforms:
 
-            * Load raw BMP with :py:mod:`PIL`
+            * Load raw BMP or JPEG with :py:mod:`PIL`
             * Remove black borders
             * Convert to torch tensor
             * Torch center cropping to get square image
diff --git a/src/ptbench/data/hivtb/fold_0.py b/src/ptbench/data/hivtb/fold_0.py
index ba9e9150efd11f36b0b846b894e318200f7a2957..57d77952ad0f012f4c7224f38dc293aa58d72dcd 100644
--- a/src/ptbench/data/hivtb/fold_0.py
+++ b/src/ptbench/data/hivtb/fold_0.py
@@ -2,20 +2,10 @@
 #
 # SPDX-License-Identifier: GPL-3.0-or-later
 
-"""HIV-TB dataset for TB detection (cross validation fold 0)
-
-* Split reference: none (stratified kfolding)
-
-* Stratified kfold protocol:
-    * Training samples: 72% of TB and healthy CXR (including labels)
-    * Validation samples: 18% of TB and healthy CXR (including labels)
-    * Test samples: 10% of TB and healthy CXR (including labels)
-
-* This configuration resolution: 2048 x 2048 (default)
-* See :py:mod:`ptbench.data.hivtb` for dataset details
-"""
-
 from .datamodule import DataModule
 
 datamodule = DataModule("fold-0.json")
+"""HIV-TB dataset for TB detection (cross validation fold 0).
 
+See :py:class:`DataModule` for technical details.
+"""
diff --git a/src/ptbench/data/hivtb/fold_1.py b/src/ptbench/data/hivtb/fold_1.py
index 84fb758134f62c4d496aedd35377c17f90876389..c91a968f500204bd1fa30e43e168dbf3e7f0edab 100644
--- a/src/ptbench/data/hivtb/fold_1.py
+++ b/src/ptbench/data/hivtb/fold_1.py
@@ -2,20 +2,10 @@
 #
 # SPDX-License-Identifier: GPL-3.0-or-later
 
-"""HIV-TB dataset for TB detection (cross validation fold 1)
-
-* Split reference: none (stratified kfolding)
-
-* Stratified kfold protocol:
-    * Training samples: 72% of TB and healthy CXR (including labels)
-    * Validation samples: 18% of TB and healthy CXR (including labels)
-    * Test samples: 10% of TB and healthy CXR (including labels)
-
-* This configuration resolution: 2048 x 2048 (default)
-* See :py:mod:`ptbench.data.hivtb` for dataset details
-"""
-
 from .datamodule import DataModule
 
 datamodule = DataModule("fold-1.json")
+"""HIV-TB dataset for TB detection (cross validation fold 1).
 
+See :py:class:`DataModule` for technical details.
+"""
diff --git a/src/ptbench/data/hivtb/fold_2.py b/src/ptbench/data/hivtb/fold_2.py
index a5f5e97a6d66d0938ba26a357786d03efb1716a5..323e80a02a0b44b5691d13abc971679182e2d97f 100644
--- a/src/ptbench/data/hivtb/fold_2.py
+++ b/src/ptbench/data/hivtb/fold_2.py
@@ -2,20 +2,10 @@
 #
 # SPDX-License-Identifier: GPL-3.0-or-later
 
-"""HIV-TB dataset for TB detection (cross validation fold 2)
-
-* Split reference: none (stratified kfolding)
-
-* Stratified kfold protocol:
-    * Training samples: 72% of TB and healthy CXR (including labels)
-    * Validation samples: 18% of TB and healthy CXR (including labels)
-    * Test samples: 10% of TB and healthy CXR (including labels)
-
-* This configuration resolution: 2048 x 2048 (default)
-* See :py:mod:`ptbench.data.hivtb` for dataset details
-"""
-
 from .datamodule import DataModule
 
 datamodule = DataModule("fold-2.json")
+"""HIV-TB dataset for TB detection (cross validation fold 2).
 
+See :py:class:`DataModule` for technical details.
+"""
diff --git a/src/ptbench/data/hivtb/fold_3.py b/src/ptbench/data/hivtb/fold_3.py
index 1b643ae4cf515f3b9219fa75a950bca77297eaa4..1eed4c056648bce88f174ccbce8a71efe69fc136 100644
--- a/src/ptbench/data/hivtb/fold_3.py
+++ b/src/ptbench/data/hivtb/fold_3.py
@@ -2,20 +2,10 @@
 #
 # SPDX-License-Identifier: GPL-3.0-or-later
 
-"""HIV-TB dataset for TB detection (cross validation fold 3)
-
-* Split reference: none (stratified kfolding)
-
-* Stratified kfold protocol:
-    * Training samples: 72% of TB and healthy CXR (including labels)
-    * Validation samples: 18% of TB and healthy CXR (including labels)
-    * Test samples: 10% of TB and healthy CXR (including labels)
-
-* This configuration resolution: 2048 x 2048 (default)
-* See :py:mod:`ptbench.data.hivtb` for dataset details
-"""
-
 from .datamodule import DataModule
 
 datamodule = DataModule("fold-3.json")
+"""HIV-TB dataset for TB detection (cross validation fold 3).
 
+See :py:class:`DataModule` for technical details.
+"""
diff --git a/src/ptbench/data/hivtb/fold_4.py b/src/ptbench/data/hivtb/fold_4.py
index 581eb85ce1023410e2a1cf31128f3b8effa183ed..9cfa6186d6dc7d44f8bcfa56d7c978e7bf346c54 100644
--- a/src/ptbench/data/hivtb/fold_4.py
+++ b/src/ptbench/data/hivtb/fold_4.py
@@ -2,20 +2,10 @@
 #
 # SPDX-License-Identifier: GPL-3.0-or-later
 
-"""HIV-TB dataset for TB detection (cross validation fold 4)
-
-* Split reference: none (stratified kfolding)
-
-* Stratified kfold protocol:
-    * Training samples: 72% of TB and healthy CXR (including labels)
-    * Validation samples: 18% of TB and healthy CXR (including labels)
-    * Test samples: 10% of TB and healthy CXR (including labels)
-
-* This configuration resolution: 2048 x 2048 (default)
-* See :py:mod:`ptbench.data.hivtb` for dataset details
-"""
-
 from .datamodule import DataModule
 
 datamodule = DataModule("fold-4.json")
+"""HIV-TB dataset for TB detection (cross validation fold 4).
 
+See :py:class:`DataModule` for technical details.
+"""
diff --git a/src/ptbench/data/hivtb/fold_5.py b/src/ptbench/data/hivtb/fold_5.py
index 47ae66d13f22764f73ee83c8605f1f57686448b5..591fef3732b522569a92082cb7e3c208c16bf2da 100644
--- a/src/ptbench/data/hivtb/fold_5.py
+++ b/src/ptbench/data/hivtb/fold_5.py
@@ -2,20 +2,10 @@
 #
 # SPDX-License-Identifier: GPL-3.0-or-later
 
-"""HIV-TB dataset for TB detection (cross validation fold 5)
-
-* Split reference: none (stratified kfolding)
-
-* Stratified kfold protocol:
-    * Training samples: 72% of TB and healthy CXR (including labels)
-    * Validation samples: 18% of TB and healthy CXR (including labels)
-    * Test samples: 10% of TB and healthy CXR (including labels)
-
-* This configuration resolution: 2048 x 2048 (default)
-* See :py:mod:`ptbench.data.hivtb` for dataset details
-"""
-
 from .datamodule import DataModule
 
 datamodule = DataModule("fold-5.json")
+"""HIV-TB dataset for TB detection (cross validation fold 5).
 
+See :py:class:`DataModule` for technical details.
+"""
diff --git a/src/ptbench/data/hivtb/fold_6.py b/src/ptbench/data/hivtb/fold_6.py
index c93232f4ef9dd80009541414de1881573b7c88cd..fb5e1614b349779d42771bd165a9a1d96c6cb83d 100644
--- a/src/ptbench/data/hivtb/fold_6.py
+++ b/src/ptbench/data/hivtb/fold_6.py
@@ -2,20 +2,10 @@
 #
 # SPDX-License-Identifier: GPL-3.0-or-later
 
-"""HIV-TB dataset for TB detection (cross validation fold 6)
-
-* Split reference: none (stratified kfolding)
-
-* Stratified kfold protocol:
-    * Training samples: 72% of TB and healthy CXR (including labels)
-    * Validation samples: 18% of TB and healthy CXR (including labels)
-    * Test samples: 10% of TB and healthy CXR (including labels)
-
-* This configuration resolution: 2048 x 2048 (default)
-* See :py:mod:`ptbench.data.hivtb` for dataset details
-"""
-
 from .datamodule import DataModule
 
 datamodule = DataModule("fold-6.json")
+"""HIV-TB dataset for TB detection (cross validation fold 6).
 
+See :py:class:`DataModule` for technical details.
+"""
diff --git a/src/ptbench/data/hivtb/fold_7.py b/src/ptbench/data/hivtb/fold_7.py
index 33d5cc83de84c41d30ed490650eb0ca077be05b8..d64db4837f24058d34b2daf4c8383595aee7be21 100644
--- a/src/ptbench/data/hivtb/fold_7.py
+++ b/src/ptbench/data/hivtb/fold_7.py
@@ -2,20 +2,10 @@
 #
 # SPDX-License-Identifier: GPL-3.0-or-later
 
-"""HIV-TB dataset for TB detection (cross validation fold 7)
-
-* Split reference: none (stratified kfolding)
-
-* Stratified kfold protocol:
-    * Training samples: 72% of TB and healthy CXR (including labels)
-    * Validation samples: 18% of TB and healthy CXR (including labels)
-    * Test samples: 10% of TB and healthy CXR (including labels)
-
-* This configuration resolution: 2048 x 2048 (default)
-* See :py:mod:`ptbench.data.hivtb` for dataset details
-"""
-
 from .datamodule import DataModule
 
 datamodule = DataModule("fold-7.json")
+"""HIV-TB dataset for TB detection (cross validation fold 7).
 
+See :py:class:`DataModule` for technical details.
+"""
diff --git a/src/ptbench/data/hivtb/fold_8.py b/src/ptbench/data/hivtb/fold_8.py
index 91d89557ef60cc116cf6d4f4f8c216fac1b797a5..8a0f87d10c934f08249ed4f0206c09b5bbc6a7a9 100644
--- a/src/ptbench/data/hivtb/fold_8.py
+++ b/src/ptbench/data/hivtb/fold_8.py
@@ -2,20 +2,10 @@
 #
 # SPDX-License-Identifier: GPL-3.0-or-later
 
-"""HIV-TB dataset for TB detection (cross validation fold 8)
-
-* Split reference: none (stratified kfolding)
-
-* Stratified kfold protocol:
-    * Training samples: 72% of TB and healthy CXR (including labels)
-    * Validation samples: 18% of TB and healthy CXR (including labels)
-    * Test samples: 10% of TB and healthy CXR (including labels)
-
-* This configuration resolution: 2048 x 2048 (default)
-* See :py:mod:`ptbench.data.hivtb` for dataset details
-"""
-
 from .datamodule import DataModule
 
 datamodule = DataModule("fold-8.json")
+"""HIV-TB dataset for TB detection (cross validation fold 8).
 
+See :py:class:`DataModule` for technical details.
+"""
diff --git a/src/ptbench/data/hivtb/fold_9.py b/src/ptbench/data/hivtb/fold_9.py
index 0e0063e8b13a997ce6670714836610260aff1952..d92de50e75cd18ea9b99d1bdb010f6f88872b9cc 100644
--- a/src/ptbench/data/hivtb/fold_9.py
+++ b/src/ptbench/data/hivtb/fold_9.py
@@ -2,20 +2,10 @@
 #
 # SPDX-License-Identifier: GPL-3.0-or-later
 
-"""HIV-TB dataset for TB detection (cross validation fold 9)
-
-* Split reference: none (stratified kfolding)
-
-* Stratified kfold protocol:
-    * Training samples: 72% of TB and healthy CXR (including labels)
-    * Validation samples: 18% of TB and healthy CXR (including labels)
-    * Test samples: 10% of TB and healthy CXR (including labels)
-
-* This configuration resolution: 2048 x 2048 (default)
-* See :py:mod:`ptbench.data.hivtb` for dataset details
-"""
-
 from .datamodule import DataModule
 
 datamodule = DataModule("fold-9.json")
+"""HIV-TB dataset for TB detection (cross validation fold 9).
 
+See :py:class:`DataModule` for technical details.
+"""