diff --git a/src/ptbench/data/indian/__init__.py b/src/ptbench/data/indian/__init__.py
index 889cf5132cd7711e90b6509f1d1cf0768b152117..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 100644
--- a/src/ptbench/data/indian/__init__.py
+++ b/src/ptbench/data/indian/__init__.py
@@ -1,84 +0,0 @@
-# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
-#
-# SPDX-License-Identifier: GPL-3.0-or-later
-
-"""Indian collection dataset for computer-aided diagnosis.
-
-The Indian collection database has been established to foster research
-in computer-aided diagnosis of pulmonary diseases with a special
-focus on pulmonary tuberculosis (TB).
-
-* Reference: [INDIAN-2013]_
-* Original resolution (height x width or width x height): more than 1024 x 1024
-* Split reference: [INDIAN-2013]_ with 20% of train set for the validation set
-"""
-
-import importlib.resources
-import os
-
-from ...utils.rc import load_rc
-from .. import make_dataset
-from ..dataset import JSONDataset
-from ..loader import load_pil_grayscale, make_delayed
-
-_protocols = [
-    importlib.resources.files(__name__).joinpath("default.json.bz2"),
-    importlib.resources.files(__name__).joinpath("fold_0.json.bz2"),
-    importlib.resources.files(__name__).joinpath("fold_1.json.bz2"),
-    importlib.resources.files(__name__).joinpath("fold_2.json.bz2"),
-    importlib.resources.files(__name__).joinpath("fold_3.json.bz2"),
-    importlib.resources.files(__name__).joinpath("fold_4.json.bz2"),
-    importlib.resources.files(__name__).joinpath("fold_5.json.bz2"),
-    importlib.resources.files(__name__).joinpath("fold_6.json.bz2"),
-    importlib.resources.files(__name__).joinpath("fold_7.json.bz2"),
-    importlib.resources.files(__name__).joinpath("fold_8.json.bz2"),
-    importlib.resources.files(__name__).joinpath("fold_9.json.bz2"),
-]
-
-_datadir = load_rc().get("datadir.indian", os.path.realpath(os.curdir))
-
-
-def _raw_data_loader(sample):
-    return dict(
-        data=load_pil_grayscale(os.path.join(_datadir, sample["data"])),
-        label=sample["label"],
-    )
-
-
-def _loader(context, sample):
-    # "context" is ignored in this case - database is homogeneous
-    # we returned delayed samples to avoid loading all images at once
-    return make_delayed(sample, _raw_data_loader)
-
-
-json_dataset = JSONDataset(
-    protocols=_protocols,
-    fieldnames=("data", "label"),
-    loader=_loader,
-)
-"""Indian dataset object."""
-
-
-def _maker(protocol, resize_size=512, cc_size=512, RGB=False):
-    from torchvision import transforms
-
-    from ..augmentations import ElasticDeformation
-    from ..image_utils import RemoveBlackBorders
-
-    post_transforms = []
-    if RGB:
-        post_transforms = [
-            transforms.Lambda(lambda x: x.convert("RGB")),
-            transforms.ToTensor(),
-        ]
-
-    return make_dataset(
-        [json_dataset.subsets(protocol)],
-        [
-            RemoveBlackBorders(),
-            transforms.Resize(resize_size),
-            transforms.CenterCrop(cc_size),
-        ],
-        [ElasticDeformation(p=0.8)],
-        post_transforms,
-    )
diff --git a/src/ptbench/data/indian/datamodule.py b/src/ptbench/data/indian/datamodule.py
new file mode 100644
index 0000000000000000000000000000000000000000..7042a0c485f048f6d7dbd586abccd06478d0558f
--- /dev/null
+++ b/src/ptbench/data/indian/datamodule.py
@@ -0,0 +1,51 @@
+# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
+#
+# SPDX-License-Identifier: GPL-3.0-or-later
+
+import importlib.resources
+
+from ..datamodule import CachingDataModule
+from ..shenzhen.datamodule import RawDataLoader
+from ..split import JSONDatabaseSplit
+
+
+class DataModule(CachingDataModule):
+    """Indian collection dataset for computer-aided diagnosis.
+
+    The Indian collection database has been established to foster research
+    in computer-aided diagnosis of pulmonary diseases with a special
+    focus on pulmonary tuberculosis (TB).
+
+    * Original resolution (height x width or width x height): more than 1024 x 1024
+    * Split reference: [INDIAN-2013]_ with 20% of train set for the validation set
+
+    Data specifications:
+
+    * Raw data input (on disk):
+
+        * PNG images (grayscale, encoded as RGB images with "inverted" grayscale scale)
+        * Variable width and height
+
+    * Output image:
+
+        * Transforms:
+
+            * Load raw PNG with :py:mod:`PIL`
+            * Remove black borders
+            * Torch center cropping to get square image
+
+        * Final specifications:
+
+            * Grayscale, encoded as a single plane image, 8 bits
+            * Square, with varying resolutions, depending on the input image
+    """
+
+    def __init__(self, split_filename: str):
+        super().__init__(
+            database_split=JSONDatabaseSplit(
+                importlib.resources.files(__name__.rsplit(".", 1)[0]).joinpath(
+                    split_filename
+                )
+            ),
+            raw_data_loader=RawDataLoader(),
+        )
diff --git a/src/ptbench/data/indian/datamodules.py b/src/ptbench/data/indian/datamodules.py
deleted file mode 100644
index fc8d52ff4aea06466a3f177a5ede9f610836c277..0000000000000000000000000000000000000000
--- a/src/ptbench/data/indian/datamodules.py
+++ /dev/null
@@ -1,46 +0,0 @@
-# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
-#
-# SPDX-License-Identifier: GPL-3.0-or-later
-
-"""Indian dataset for TB detection (default protocol)
-
-* Split reference: [INDIAN-2013]_ with 20% of train set for the validation set
-* This configuration resolution: 512 x 512 (default)
-* See :py:mod:`ptbench.data.indian` for dataset details
-"""
-
-from clapper.logging import setup
-
-from .. import return_subsets
-from ..base_datamodule import BaseDataModule
-from . import _maker
-
-logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s")
-
-
-class DefaultModule(BaseDataModule):
-    def __init__(
-        self,
-        train_batch_size=1,
-        predict_batch_size=1,
-        drop_incomplete_batch=False,
-        multiproc_kwargs=None,
-    ):
-        super().__init__(
-            train_batch_size=train_batch_size,
-            predict_batch_size=predict_batch_size,
-            drop_incomplete_batch=drop_incomplete_batch,
-            multiproc_kwargs=multiproc_kwargs,
-        )
-
-    def setup(self, stage: str):
-        self.dataset = _maker("default")
-        (
-            self.train_dataset,
-            self.validation_dataset,
-            self.extra_validation_datasets,
-            self.predict_dataset,
-        ) = return_subsets(self.dataset)
-
-
-datamodule = DefaultModule
diff --git a/src/ptbench/data/indian/default.json b/src/ptbench/data/indian/default.json
new file mode 100644
index 0000000000000000000000000000000000000000..3bc3742a8d338260fc9a99f2bf065884e5174ac6
--- /dev/null
+++ b/src/ptbench/data/indian/default.json
@@ -0,0 +1,163 @@
+{
+  "train": [
+    ["DatasetA/Training/nx52.jpg", 0],
+    ["DatasetA/Training/nx19.jpg", 0],
+    ["DatasetA/Training/nx38.jpg", 0],
+    ["DatasetA/Training/nx39.jpg", 0],
+    ["DatasetA/Training/nx35.jpg", 0],
+    ["DatasetA/Training/nx21.jpg", 0],
+    ["DatasetA/Training/nx20.jpg", 0],
+    ["DatasetA/Training/nx11.jpg", 0],
+    ["DatasetA/Training/nx34.jpg", 0],
+    ["DatasetA/Training/nx13.jpg", 0],
+    ["DatasetA/Training/nx12.jpg", 0],
+    ["DatasetA/Training/nx22.jpg", 0],
+    ["DatasetA/Training/nx36.jpg", 0],
+    ["DatasetA/Training/nx16.jpg", 0],
+    ["DatasetA/Training/nx37.jpg", 0],
+    ["DatasetA/Training/nx17.jpg", 0],
+    ["DatasetA/Training/nx29.jpg", 0],
+    ["DatasetA/Training/nx27.jpg", 0],
+    ["DatasetA/Training/nx15.jpg", 0],
+    ["DatasetA/Training/nx33.jpg", 0],
+    ["DatasetA/Training/nx14.jpg", 0],
+    ["DatasetA/Training/nx32.jpg", 0],
+    ["DatasetA/Training/nx26.jpg", 0],
+    ["DatasetA/Training/nx28.jpg", 0],
+    ["DatasetA/Training/nx30.jpg", 0],
+    ["DatasetA/Training/nx31.jpg", 0],
+    ["DatasetA/Training/nx24.jpg", 0],
+    ["DatasetA/Training/nx18.jpg", 0],
+    ["DatasetA/Training/nx46.jpg", 0],
+    ["DatasetA/Training/nx7.jpg", 0],
+    ["DatasetA/Training/nx25.jpg", 0],
+    ["DatasetA/Training/nx47.jpg", 0],
+    ["DatasetA/Training/nx41.jpg", 0],
+    ["DatasetA/Training/nx1.jpg", 0],
+    ["DatasetA/Training/nx40.jpg", 0],
+    ["DatasetA/Training/nx6.jpg", 0],
+    ["DatasetA/Training/nx49.jpg", 0],
+    ["DatasetA/Training/nx8.jpg", 0],
+    ["DatasetA/Training/nx9.jpg", 0],
+    ["DatasetA/Training/nx48.jpg", 0],
+    ["DatasetA/Training/nx44.jpg", 0],
+    ["DatasetA/Training/px45.jpg", 1],
+    ["DatasetA/Training/px51.jpg", 1],
+    ["DatasetA/Training/px48.jpg", 1],
+    ["DatasetA/Training/px10.jpg", 1],
+    ["DatasetA/Training/px38.jpg", 1],
+    ["DatasetA/Training/px15.jpg", 1],
+    ["DatasetA/Training/px11.jpg", 1],
+    ["DatasetA/Training/px29.jpg", 1],
+    ["DatasetA/Training/px49.jpg", 1],
+    ["DatasetA/Training/px14.jpg", 1],
+    ["DatasetA/Training/px8.jpg", 1],
+    ["DatasetA/Training/px28.jpg", 1],
+    ["DatasetA/Training/px9.jpg", 1],
+    ["DatasetA/Training/px16.jpg", 1],
+    ["DatasetA/Training/px17.jpg", 1],
+    ["DatasetA/Training/px39.jpg", 1],
+    ["DatasetA/Training/px22.jpg", 1],
+    ["DatasetA/Training/px47.jpg", 1],
+    ["DatasetA/Training/px50.jpg", 1],
+    ["DatasetA/Training/px36.jpg", 1],
+    ["DatasetA/Training/px37.jpg", 1],
+    ["DatasetA/Training/px23.jpg", 1],
+    ["DatasetA/Training/px35.jpg", 1],
+    ["DatasetA/Training/px21.jpg", 1],
+    ["DatasetA/Training/px20.jpg", 1],
+    ["DatasetA/Training/px34.jpg", 1],
+    ["DatasetA/Training/px30.jpg", 1],
+    ["DatasetA/Training/px24.jpg", 1],
+    ["DatasetA/Training/px18.jpg", 1],
+    ["DatasetA/Training/px19.jpg", 1],
+    ["DatasetA/Training/px25.jpg", 1],
+    ["DatasetA/Training/px31.jpg", 1],
+    ["DatasetA/Training/px27.jpg", 1],
+    ["DatasetA/Training/px33.jpg", 1],
+    ["DatasetA/Training/px32.jpg", 1],
+    ["DatasetA/Training/px26.jpg", 1],
+    ["DatasetA/Training/px44.jpg", 1],
+    ["DatasetA/Training/px52.jpg", 1],
+    ["DatasetA/Training/px46.jpg", 1],
+    ["DatasetA/Training/px3.jpg", 1],
+    ["DatasetA/Training/px2.jpg", 1],
+    ["DatasetA/Training/px12.jpg", 1]
+  ],
+  "validation": [
+    ["DatasetA/Training/nx43.jpg", 0],
+    ["DatasetA/Training/nx50.jpg", 0],
+    ["DatasetA/Training/nx4.jpg", 0],
+    ["DatasetA/Training/nx51.jpg", 0],
+    ["DatasetA/Training/nx45.jpg", 0],
+    ["DatasetA/Training/nx2.jpg", 0],
+    ["DatasetA/Training/nx3.jpg", 0],
+    ["DatasetA/Training/nx42.jpg", 0],
+    ["DatasetA/Training/nx10.jpg", 0],
+    ["DatasetA/Training/nx5.jpg", 0],
+    ["DatasetA/Training/px1.jpg", 1],
+    ["DatasetA/Training/px43.jpg", 1],
+    ["DatasetA/Training/px7.jpg", 1],
+    ["DatasetA/Training/px42.jpg", 1],
+    ["DatasetA/Training/px40.jpg", 1],
+    ["DatasetA/Training/px5.jpg", 1],
+    ["DatasetA/Training/px4.jpg", 1],
+    ["DatasetA/Training/px41.jpg", 1],
+    ["DatasetA/Training/px6.jpg", 1],
+    ["DatasetA/Training/px13.jpg", 1]
+  ],
+  "test": [
+    ["DatasetA/Testing/nx26.jpg", 0],
+    ["DatasetA/Testing/nx19.jpg", 0],
+    ["DatasetA/Testing/nx25.jpg", 0],
+    ["DatasetA/Testing/nx14.jpg", 0],
+    ["DatasetA/Testing/nx15.jpg", 0],
+    ["DatasetA/Testing/nx23.jpg", 0],
+    ["DatasetA/Testing/nx22.jpg", 0],
+    ["DatasetA/Testing/nx17.jpg", 0],
+    ["DatasetA/Testing/nx20.jpg", 0],
+    ["DatasetA/Testing/nx21.jpg", 0],
+    ["DatasetA/Testing/nx16.jpg", 0],
+    ["DatasetA/Testing/nx24.jpg", 0],
+    ["DatasetA/Testing/nx7.jpg", 0],
+    ["DatasetA/Testing/nx12.jpg", 0],
+    ["DatasetA/Testing/nx13.jpg", 0],
+    ["DatasetA/Testing/nx3.jpg", 0],
+    ["DatasetA/Testing/nx2.jpg", 0],
+    ["DatasetA/Testing/nx8.jpg", 0],
+    ["DatasetA/Testing/nx6.jpg", 0],
+    ["DatasetA/Testing/nx9.jpg", 0],
+    ["DatasetA/Testing/nx1.jpg", 0],
+    ["DatasetA/Testing/nx5.jpg", 0],
+    ["DatasetA/Testing/nx4.jpg", 0],
+    ["DatasetA/Testing/nx10.jpg", 0],
+    ["DatasetA/Testing/nx11.jpg", 0],
+    ["DatasetA/Testing/nx18.jpg", 0],
+    ["DatasetA/Testing/px28.jpg", 1],
+    ["DatasetA/Testing/px29.jpg", 1],
+    ["DatasetA/Testing/px48.jpg", 1],
+    ["DatasetA/Testing/px45.jpg", 1],
+    ["DatasetA/Testing/px51.jpg", 1],
+    ["DatasetA/Testing/px49.jpg", 1],
+    ["DatasetA/Testing/px36.jpg", 1],
+    ["DatasetA/Testing/px44.jpg", 1],
+    ["DatasetA/Testing/px37.jpg", 1],
+    ["DatasetA/Testing/px35.jpg", 1],
+    ["DatasetA/Testing/px34.jpg", 1],
+    ["DatasetA/Testing/px30.jpg", 1],
+    ["DatasetA/Testing/px31.jpg", 1],
+    ["DatasetA/Testing/px27.jpg", 1],
+    ["DatasetA/Testing/px33.jpg", 1],
+    ["DatasetA/Testing/px50.jpg", 1],
+    ["DatasetA/Testing/px32.jpg", 1],
+    ["DatasetA/Testing/px40.jpg", 1],
+    ["DatasetA/Testing/px42.jpg", 1],
+    ["DatasetA/Testing/px43.jpg", 1],
+    ["DatasetA/Testing/px47.jpg", 1],
+    ["DatasetA/Testing/px39.jpg", 1],
+    ["DatasetA/Testing/px46.jpg", 1],
+    ["DatasetA/Testing/px52.jpg", 1],
+    ["DatasetA/Testing/px41.jpg", 1],
+    ["DatasetA/Testing/px38.jpg", 1]
+  ]
+}
diff --git a/src/ptbench/data/indian/default.json.bz2 b/src/ptbench/data/indian/default.json.bz2
deleted file mode 100644
index d3c3ba1f0393f1006262115f942ccbd4b087b7a5..0000000000000000000000000000000000000000
Binary files a/src/ptbench/data/indian/default.json.bz2 and /dev/null differ
diff --git a/src/ptbench/data/indian/default.py b/src/ptbench/data/indian/default.py
new file mode 100644
index 0000000000000000000000000000000000000000..7fe993a981c86c0161327d1ddb4498e08a90313c
--- /dev/null
+++ b/src/ptbench/data/indian/default.py
@@ -0,0 +1,7 @@
+# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
+#
+# SPDX-License-Identifier: GPL-3.0-or-later
+
+from .datamodule import DataModule
+
+datamodule = DataModule("default.json.bz2")
diff --git a/src/ptbench/data/indian/fold_0.json b/src/ptbench/data/indian/fold_0.json
new file mode 100644
index 0000000000000000000000000000000000000000..2bceb4270c6ca6ae0686b5c6fe66d4059b93ff77
--- /dev/null
+++ b/src/ptbench/data/indian/fold_0.json
@@ -0,0 +1,163 @@
+{
+  "train": [
+    ["DatasetA/Training/nx19.jpg", 0],
+    ["DatasetA/Training/nx35.jpg", 0],
+    ["DatasetA/Training/nx32.jpg", 0],
+    ["DatasetA/Testing/nx17.jpg", 0],
+    ["DatasetA/Testing/nx19.jpg", 0],
+    ["DatasetA/Training/nx45.jpg", 0],
+    ["DatasetA/Training/nx14.jpg", 0],
+    ["DatasetA/Training/px27.jpg", 1],
+    ["DatasetA/Training/px3.jpg", 1],
+    ["DatasetA/Testing/nx8.jpg", 0],
+    ["DatasetA/Training/nx16.jpg", 0],
+    ["DatasetA/Training/px35.jpg", 1],
+    ["DatasetA/Training/px49.jpg", 1],
+    ["DatasetA/Training/px39.jpg", 1],
+    ["DatasetA/Testing/nx21.jpg", 0],
+    ["DatasetA/Testing/nx13.jpg", 0],
+    ["DatasetA/Testing/nx23.jpg", 0],
+    ["DatasetA/Training/nx1.jpg", 0],
+    ["DatasetA/Training/nx38.jpg", 0],
+    ["DatasetA/Testing/px35.jpg", 1],
+    ["DatasetA/Training/px31.jpg", 1],
+    ["DatasetA/Training/px15.jpg", 1],
+    ["DatasetA/Training/px6.jpg", 1],
+    ["DatasetA/Training/px7.jpg", 1],
+    ["DatasetA/Training/nx40.jpg", 0],
+    ["DatasetA/Training/nx46.jpg", 0],
+    ["DatasetA/Training/nx10.jpg", 0],
+    ["DatasetA/Testing/nx16.jpg", 0],
+    ["DatasetA/Training/nx47.jpg", 0],
+    ["DatasetA/Training/px22.jpg", 1],
+    ["DatasetA/Testing/nx18.jpg", 0],
+    ["DatasetA/Testing/px40.jpg", 1],
+    ["DatasetA/Testing/px33.jpg", 1],
+    ["DatasetA/Testing/px38.jpg", 1],
+    ["DatasetA/Testing/nx22.jpg", 0],
+    ["DatasetA/Training/nx31.jpg", 0],
+    ["DatasetA/Testing/px36.jpg", 1],
+    ["DatasetA/Training/px38.jpg", 1],
+    ["DatasetA/Training/px29.jpg", 1],
+    ["DatasetA/Testing/px41.jpg", 1],
+    ["DatasetA/Testing/nx25.jpg", 0],
+    ["DatasetA/Testing/px44.jpg", 1],
+    ["DatasetA/Training/px10.jpg", 1],
+    ["DatasetA/Testing/nx5.jpg", 0],
+    ["DatasetA/Training/px50.jpg", 1],
+    ["DatasetA/Training/nx48.jpg", 0],
+    ["DatasetA/Training/nx34.jpg", 0],
+    ["DatasetA/Testing/nx24.jpg", 0],
+    ["DatasetA/Testing/px31.jpg", 1],
+    ["DatasetA/Training/px33.jpg", 1],
+    ["DatasetA/Training/px34.jpg", 1],
+    ["DatasetA/Testing/px52.jpg", 1],
+    ["DatasetA/Training/px52.jpg", 1],
+    ["DatasetA/Testing/px42.jpg", 1],
+    ["DatasetA/Training/nx41.jpg", 0],
+    ["DatasetA/Training/px41.jpg", 1],
+    ["DatasetA/Training/px14.jpg", 1],
+    ["DatasetA/Testing/px37.jpg", 1],
+    ["DatasetA/Testing/px48.jpg", 1],
+    ["DatasetA/Training/px8.jpg", 1],
+    ["DatasetA/Training/nx37.jpg", 0],
+    ["DatasetA/Training/nx51.jpg", 0],
+    ["DatasetA/Training/px1.jpg", 1],
+    ["DatasetA/Training/nx8.jpg", 0],
+    ["DatasetA/Training/nx7.jpg", 0],
+    ["DatasetA/Training/nx52.jpg", 0],
+    ["DatasetA/Training/nx29.jpg", 0],
+    ["DatasetA/Training/nx2.jpg", 0],
+    ["DatasetA/Training/nx25.jpg", 0],
+    ["DatasetA/Training/px13.jpg", 1],
+    ["DatasetA/Training/nx22.jpg", 0],
+    ["DatasetA/Training/nx27.jpg", 0],
+    ["DatasetA/Training/nx9.jpg", 0],
+    ["DatasetA/Testing/nx11.jpg", 0],
+    ["DatasetA/Testing/px30.jpg", 1],
+    ["DatasetA/Testing/nx1.jpg", 0],
+    ["DatasetA/Testing/px45.jpg", 1],
+    ["DatasetA/Training/px28.jpg", 1],
+    ["DatasetA/Training/px42.jpg", 1],
+    ["DatasetA/Training/px21.jpg", 1],
+    ["DatasetA/Training/nx12.jpg", 0],
+    ["DatasetA/Training/nx11.jpg", 0],
+    ["DatasetA/Training/px12.jpg", 1],
+    ["DatasetA/Training/nx13.jpg", 0],
+    ["DatasetA/Training/px43.jpg", 1],
+    ["DatasetA/Testing/px27.jpg", 1],
+    ["DatasetA/Training/px32.jpg", 1],
+    ["DatasetA/Testing/px28.jpg", 1],
+    ["DatasetA/Testing/nx9.jpg", 0],
+    ["DatasetA/Training/px19.jpg", 1],
+    ["DatasetA/Testing/nx4.jpg", 0],
+    ["DatasetA/Training/nx17.jpg", 0],
+    ["DatasetA/Testing/px51.jpg", 1],
+    ["DatasetA/Testing/px29.jpg", 1],
+    ["DatasetA/Training/px18.jpg", 1],
+    ["DatasetA/Testing/nx12.jpg", 0],
+    ["DatasetA/Testing/px34.jpg", 1],
+    ["DatasetA/Training/nx28.jpg", 0],
+    ["DatasetA/Testing/nx7.jpg", 0],
+    ["DatasetA/Training/px46.jpg", 1],
+    ["DatasetA/Training/nx44.jpg", 0],
+    ["DatasetA/Training/nx4.jpg", 0],
+    ["DatasetA/Training/px4.jpg", 1],
+    ["DatasetA/Training/nx42.jpg", 0],
+    ["DatasetA/Training/px17.jpg", 1],
+    ["DatasetA/Training/nx26.jpg", 0],
+    ["DatasetA/Training/px26.jpg", 1],
+    ["DatasetA/Training/px36.jpg", 1],
+    ["DatasetA/Training/px25.jpg", 1],
+    ["DatasetA/Training/px45.jpg", 1],
+    ["DatasetA/Testing/nx3.jpg", 0]
+  ],
+  "validation": [
+    ["DatasetA/Training/px16.jpg", 1],
+    ["DatasetA/Training/nx20.jpg", 0],
+    ["DatasetA/Testing/px47.jpg", 1],
+    ["DatasetA/Training/px20.jpg", 1],
+    ["DatasetA/Training/px44.jpg", 1],
+    ["DatasetA/Testing/px39.jpg", 1],
+    ["DatasetA/Training/nx6.jpg", 0],
+    ["DatasetA/Training/nx36.jpg", 0],
+    ["DatasetA/Training/nx18.jpg", 0],
+    ["DatasetA/Training/px2.jpg", 1],
+    ["DatasetA/Training/nx50.jpg", 0],
+    ["DatasetA/Training/nx15.jpg", 0],
+    ["DatasetA/Training/px30.jpg", 1],
+    ["DatasetA/Testing/nx14.jpg", 0],
+    ["DatasetA/Testing/nx26.jpg", 0],
+    ["DatasetA/Testing/px49.jpg", 1],
+    ["DatasetA/Training/px48.jpg", 1],
+    ["DatasetA/Training/px51.jpg", 1],
+    ["DatasetA/Training/px23.jpg", 1],
+    ["DatasetA/Testing/px32.jpg", 1],
+    ["DatasetA/Training/nx49.jpg", 0],
+    ["DatasetA/Training/px5.jpg", 1],
+    ["DatasetA/Testing/nx15.jpg", 0],
+    ["DatasetA/Testing/nx2.jpg", 0],
+    ["DatasetA/Testing/nx6.jpg", 0],
+    ["DatasetA/Training/nx24.jpg", 0],
+    ["DatasetA/Testing/nx20.jpg", 0],
+    ["DatasetA/Training/px24.jpg", 1]
+  ],
+  "test": [
+    ["DatasetA/Training/nx39.jpg", 0],
+    ["DatasetA/Training/nx21.jpg", 0],
+    ["DatasetA/Training/nx33.jpg", 0],
+    ["DatasetA/Training/nx30.jpg", 0],
+    ["DatasetA/Training/px11.jpg", 1],
+    ["DatasetA/Training/px9.jpg", 1],
+    ["DatasetA/Training/px47.jpg", 1],
+    ["DatasetA/Training/px37.jpg", 1],
+    ["DatasetA/Training/nx43.jpg", 0],
+    ["DatasetA/Training/nx3.jpg", 0],
+    ["DatasetA/Training/nx5.jpg", 0],
+    ["DatasetA/Training/px40.jpg", 1],
+    ["DatasetA/Testing/nx10.jpg", 0],
+    ["DatasetA/Testing/px50.jpg", 1],
+    ["DatasetA/Testing/px43.jpg", 1],
+    ["DatasetA/Testing/px46.jpg", 1]
+  ]
+}
diff --git a/src/ptbench/data/indian/fold_0.json.bz2 b/src/ptbench/data/indian/fold_0.json.bz2
deleted file mode 100644
index 855c3050b6d37dd55866301aadc01b35d950caee..0000000000000000000000000000000000000000
Binary files a/src/ptbench/data/indian/fold_0.json.bz2 and /dev/null differ
diff --git a/src/ptbench/data/indian/fold_0.py b/src/ptbench/data/indian/fold_0.py
index ffca634728e97fe74cc1f31ff7b6199386ba1ccd..c810e85cea48c13293f0c0d28587e7df4e28be67 100644
--- a/src/ptbench/data/indian/fold_0.py
+++ b/src/ptbench/data/indian/fold_0.py
@@ -2,45 +2,6 @@
 #
 # SPDX-License-Identifier: GPL-3.0-or-later
 
-"""Indian dataset for TB detection (cross validation fold 0)
+from .datamodule import DataModule
 
-* Split reference: [INDIAN-2013]_ with 20% of train set for the validation set
-* This configuration resolution: 512 x 512 (default)
-* See :py:mod:`ptbench.data.indian` for dataset details
-"""
-
-from clapper.logging import setup
-
-from .. import return_subsets
-from ..base_datamodule import BaseDataModule
-from . import _maker
-
-logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s")
-
-
-class Fold0Module(BaseDataModule):
-    def __init__(
-        self,
-        train_batch_size=1,
-        predict_batch_size=1,
-        drop_incomplete_batch=False,
-        multiproc_kwargs=None,
-    ):
-        super().__init__(
-            train_batch_size=train_batch_size,
-            predict_batch_size=predict_batch_size,
-            drop_incomplete_batch=drop_incomplete_batch,
-            multiproc_kwargs=multiproc_kwargs,
-        )
-
-    def setup(self, stage: str):
-        self.dataset = _maker("fold_0")
-        (
-            self.train_dataset,
-            self.validation_dataset,
-            self.extra_validation_datasets,
-            self.predict_dataset,
-        ) = return_subsets(self.dataset)
-
-
-datamodule = Fold0Module
+datamodule = DataModule("fold_0.json.bz2")
diff --git a/src/ptbench/data/indian/fold_1.json b/src/ptbench/data/indian/fold_1.json
new file mode 100644
index 0000000000000000000000000000000000000000..8dd669d964c4dd9a4416ffe8f732c6f53d96db4c
--- /dev/null
+++ b/src/ptbench/data/indian/fold_1.json
@@ -0,0 +1,163 @@
+{
+  "train": [
+    ["DatasetA/Training/nx37.jpg", 0],
+    ["DatasetA/Training/nx41.jpg", 0],
+    ["DatasetA/Training/px10.jpg", 1],
+    ["DatasetA/Testing/nx15.jpg", 0],
+    ["DatasetA/Training/px34.jpg", 1],
+    ["DatasetA/Training/nx34.jpg", 0],
+    ["DatasetA/Training/nx36.jpg", 0],
+    ["DatasetA/Training/nx26.jpg", 0],
+    ["DatasetA/Testing/nx12.jpg", 0],
+    ["DatasetA/Testing/nx20.jpg", 0],
+    ["DatasetA/Training/nx46.jpg", 0],
+    ["DatasetA/Training/px19.jpg", 1],
+    ["DatasetA/Testing/nx1.jpg", 0],
+    ["DatasetA/Training/nx29.jpg", 0],
+    ["DatasetA/Training/px45.jpg", 1],
+    ["DatasetA/Training/nx30.jpg", 0],
+    ["DatasetA/Training/px11.jpg", 1],
+    ["DatasetA/Training/px51.jpg", 1],
+    ["DatasetA/Training/px27.jpg", 1],
+    ["DatasetA/Testing/px48.jpg", 1],
+    ["DatasetA/Testing/nx5.jpg", 0],
+    ["DatasetA/Testing/px43.jpg", 1],
+    ["DatasetA/Training/nx43.jpg", 0],
+    ["DatasetA/Training/px17.jpg", 1],
+    ["DatasetA/Training/px31.jpg", 1],
+    ["DatasetA/Training/nx8.jpg", 0],
+    ["DatasetA/Training/px2.jpg", 1],
+    ["DatasetA/Testing/nx8.jpg", 0],
+    ["DatasetA/Training/nx47.jpg", 0],
+    ["DatasetA/Training/nx33.jpg", 0],
+    ["DatasetA/Training/nx18.jpg", 0],
+    ["DatasetA/Training/px39.jpg", 1],
+    ["DatasetA/Training/nx51.jpg", 0],
+    ["DatasetA/Testing/nx9.jpg", 0],
+    ["DatasetA/Training/nx15.jpg", 0],
+    ["DatasetA/Testing/px51.jpg", 1],
+    ["DatasetA/Training/nx17.jpg", 0],
+    ["DatasetA/Training/px6.jpg", 1],
+    ["DatasetA/Training/nx5.jpg", 0],
+    ["DatasetA/Training/px24.jpg", 1],
+    ["DatasetA/Training/px7.jpg", 1],
+    ["DatasetA/Training/px47.jpg", 1],
+    ["DatasetA/Training/nx4.jpg", 0],
+    ["DatasetA/Training/px32.jpg", 1],
+    ["DatasetA/Training/px36.jpg", 1],
+    ["DatasetA/Training/nx13.jpg", 0],
+    ["DatasetA/Training/nx39.jpg", 0],
+    ["DatasetA/Training/px8.jpg", 1],
+    ["DatasetA/Training/nx42.jpg", 0],
+    ["DatasetA/Testing/px46.jpg", 1],
+    ["DatasetA/Training/px18.jpg", 1],
+    ["DatasetA/Testing/px34.jpg", 1],
+    ["DatasetA/Testing/nx22.jpg", 0],
+    ["DatasetA/Training/px48.jpg", 1],
+    ["DatasetA/Training/nx25.jpg", 0],
+    ["DatasetA/Testing/px28.jpg", 1],
+    ["DatasetA/Training/px9.jpg", 1],
+    ["DatasetA/Training/nx45.jpg", 0],
+    ["DatasetA/Testing/nx16.jpg", 0],
+    ["DatasetA/Training/nx31.jpg", 0],
+    ["DatasetA/Testing/nx10.jpg", 0],
+    ["DatasetA/Testing/px29.jpg", 1],
+    ["DatasetA/Training/nx3.jpg", 0],
+    ["DatasetA/Testing/nx26.jpg", 0],
+    ["DatasetA/Testing/px30.jpg", 1],
+    ["DatasetA/Training/px25.jpg", 1],
+    ["DatasetA/Testing/px27.jpg", 1],
+    ["DatasetA/Training/px44.jpg", 1],
+    ["DatasetA/Training/px49.jpg", 1],
+    ["DatasetA/Testing/px37.jpg", 1],
+    ["DatasetA/Training/px38.jpg", 1],
+    ["DatasetA/Training/nx9.jpg", 0],
+    ["DatasetA/Training/px42.jpg", 1],
+    ["DatasetA/Testing/nx23.jpg", 0],
+    ["DatasetA/Training/nx32.jpg", 0],
+    ["DatasetA/Testing/px39.jpg", 1],
+    ["DatasetA/Training/nx12.jpg", 0],
+    ["DatasetA/Training/nx44.jpg", 0],
+    ["DatasetA/Training/px35.jpg", 1],
+    ["DatasetA/Testing/nx17.jpg", 0],
+    ["DatasetA/Testing/nx24.jpg", 0],
+    ["DatasetA/Training/px40.jpg", 1],
+    ["DatasetA/Training/nx11.jpg", 0],
+    ["DatasetA/Testing/nx25.jpg", 0],
+    ["DatasetA/Training/px12.jpg", 1],
+    ["DatasetA/Training/px41.jpg", 1],
+    ["DatasetA/Training/nx27.jpg", 0],
+    ["DatasetA/Training/px14.jpg", 1],
+    ["DatasetA/Training/nx21.jpg", 0],
+    ["DatasetA/Testing/px42.jpg", 1],
+    ["DatasetA/Testing/nx2.jpg", 0],
+    ["DatasetA/Testing/px50.jpg", 1],
+    ["DatasetA/Training/px4.jpg", 1],
+    ["DatasetA/Testing/nx19.jpg", 0],
+    ["DatasetA/Training/px22.jpg", 1],
+    ["DatasetA/Training/nx7.jpg", 0],
+    ["DatasetA/Training/px33.jpg", 1],
+    ["DatasetA/Training/nx14.jpg", 0],
+    ["DatasetA/Training/px37.jpg", 1],
+    ["DatasetA/Testing/nx7.jpg", 0],
+    ["DatasetA/Training/px23.jpg", 1],
+    ["DatasetA/Testing/px33.jpg", 1],
+    ["DatasetA/Testing/nx13.jpg", 0],
+    ["DatasetA/Training/px20.jpg", 1],
+    ["DatasetA/Training/nx35.jpg", 0],
+    ["DatasetA/Testing/px47.jpg", 1],
+    ["DatasetA/Testing/px52.jpg", 1],
+    ["DatasetA/Testing/px35.jpg", 1],
+    ["DatasetA/Training/px21.jpg", 1],
+    ["DatasetA/Testing/px45.jpg", 1],
+    ["DatasetA/Training/nx24.jpg", 0]
+  ],
+  "validation": [
+    ["DatasetA/Training/px26.jpg", 1],
+    ["DatasetA/Training/px13.jpg", 1],
+    ["DatasetA/Training/px30.jpg", 1],
+    ["DatasetA/Training/px29.jpg", 1],
+    ["DatasetA/Training/px1.jpg", 1],
+    ["DatasetA/Testing/nx14.jpg", 0],
+    ["DatasetA/Testing/px41.jpg", 1],
+    ["DatasetA/Training/nx20.jpg", 0],
+    ["DatasetA/Training/nx16.jpg", 0],
+    ["DatasetA/Training/nx2.jpg", 0],
+    ["DatasetA/Testing/px40.jpg", 1],
+    ["DatasetA/Training/nx49.jpg", 0],
+    ["DatasetA/Training/px16.jpg", 1],
+    ["DatasetA/Training/nx22.jpg", 0],
+    ["DatasetA/Training/nx19.jpg", 0],
+    ["DatasetA/Testing/px49.jpg", 1],
+    ["DatasetA/Testing/nx21.jpg", 0],
+    ["DatasetA/Training/nx6.jpg", 0],
+    ["DatasetA/Training/px3.jpg", 1],
+    ["DatasetA/Testing/px44.jpg", 1],
+    ["DatasetA/Testing/nx11.jpg", 0],
+    ["DatasetA/Testing/nx4.jpg", 0],
+    ["DatasetA/Testing/nx6.jpg", 0],
+    ["DatasetA/Training/px15.jpg", 1],
+    ["DatasetA/Testing/px36.jpg", 1],
+    ["DatasetA/Training/px28.jpg", 1],
+    ["DatasetA/Training/nx10.jpg", 0],
+    ["DatasetA/Training/nx48.jpg", 0]
+  ],
+  "test": [
+    ["DatasetA/Training/nx52.jpg", 0],
+    ["DatasetA/Training/nx38.jpg", 0],
+    ["DatasetA/Training/nx28.jpg", 0],
+    ["DatasetA/Training/nx1.jpg", 0],
+    ["DatasetA/Training/nx40.jpg", 0],
+    ["DatasetA/Training/px50.jpg", 1],
+    ["DatasetA/Training/px52.jpg", 1],
+    ["DatasetA/Training/px46.jpg", 1],
+    ["DatasetA/Training/nx50.jpg", 0],
+    ["DatasetA/Training/px43.jpg", 1],
+    ["DatasetA/Training/px5.jpg", 1],
+    ["DatasetA/Testing/nx3.jpg", 0],
+    ["DatasetA/Testing/nx18.jpg", 0],
+    ["DatasetA/Testing/px31.jpg", 1],
+    ["DatasetA/Testing/px32.jpg", 1],
+    ["DatasetA/Testing/px38.jpg", 1]
+  ]
+}
diff --git a/src/ptbench/data/indian/fold_1.json.bz2 b/src/ptbench/data/indian/fold_1.json.bz2
deleted file mode 100644
index 536e8855ecf28eddd09b1ab15acdc706c6dfe0a2..0000000000000000000000000000000000000000
Binary files a/src/ptbench/data/indian/fold_1.json.bz2 and /dev/null differ
diff --git a/src/ptbench/data/indian/fold_1.py b/src/ptbench/data/indian/fold_1.py
index 6075fb0e3a75bd9332acce7a057e1caab2c08506..736a778dab6b708bceba8a282eadfe45e45a86fe 100644
--- a/src/ptbench/data/indian/fold_1.py
+++ b/src/ptbench/data/indian/fold_1.py
@@ -2,45 +2,6 @@
 #
 # SPDX-License-Identifier: GPL-3.0-or-later
 
-"""Indian dataset for TB detection (cross validation fold 1)
+from .datamodule import DataModule
 
-* Split reference: [INDIAN-2013]_ with 20% of train set for the validation set
-* This configuration resolution: 512 x 512 (default)
-* See :py:mod:`ptbench.data.indian` for dataset details
-"""
-
-from clapper.logging import setup
-
-from .. import return_subsets
-from ..base_datamodule import BaseDataModule
-from . import _maker
-
-logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s")
-
-
-class Fold0Module(BaseDataModule):
-    def __init__(
-        self,
-        train_batch_size=1,
-        predict_batch_size=1,
-        drop_incomplete_batch=False,
-        multiproc_kwargs=None,
-    ):
-        super().__init__(
-            train_batch_size=train_batch_size,
-            predict_batch_size=predict_batch_size,
-            drop_incomplete_batch=drop_incomplete_batch,
-            multiproc_kwargs=multiproc_kwargs,
-        )
-
-    def setup(self, stage: str):
-        self.dataset = _maker("fold_1")
-        (
-            self.train_dataset,
-            self.validation_dataset,
-            self.extra_validation_datasets,
-            self.predict_dataset,
-        ) = return_subsets(self.dataset)
-
-
-datamodule = Fold0Module
+datamodule = DataModule("fold_1.json.bz2")
diff --git a/src/ptbench/data/indian/fold_2.json b/src/ptbench/data/indian/fold_2.json
new file mode 100644
index 0000000000000000000000000000000000000000..412d64fc7d1bdc1e2545d7f53b6796ca75b67e8f
--- /dev/null
+++ b/src/ptbench/data/indian/fold_2.json
@@ -0,0 +1,163 @@
+{
+  "train": [
+    ["DatasetA/Training/nx7.jpg", 0],
+    ["DatasetA/Testing/nx26.jpg", 0],
+    ["DatasetA/Testing/nx16.jpg", 0],
+    ["DatasetA/Training/nx43.jpg", 0],
+    ["DatasetA/Training/px9.jpg", 1],
+    ["DatasetA/Testing/px27.jpg", 1],
+    ["DatasetA/Testing/px38.jpg", 1],
+    ["DatasetA/Testing/nx5.jpg", 0],
+    ["DatasetA/Testing/nx18.jpg", 0],
+    ["DatasetA/Training/px4.jpg", 1],
+    ["DatasetA/Testing/px28.jpg", 1],
+    ["DatasetA/Training/px36.jpg", 1],
+    ["DatasetA/Training/px8.jpg", 1],
+    ["DatasetA/Training/px30.jpg", 1],
+    ["DatasetA/Training/px17.jpg", 1],
+    ["DatasetA/Training/nx18.jpg", 0],
+    ["DatasetA/Training/px27.jpg", 1],
+    ["DatasetA/Training/px24.jpg", 1],
+    ["DatasetA/Training/nx6.jpg", 0],
+    ["DatasetA/Training/px40.jpg", 1],
+    ["DatasetA/Testing/nx2.jpg", 0],
+    ["DatasetA/Training/nx21.jpg", 0],
+    ["DatasetA/Training/nx12.jpg", 0],
+    ["DatasetA/Testing/nx24.jpg", 0],
+    ["DatasetA/Training/px28.jpg", 1],
+    ["DatasetA/Training/nx33.jpg", 0],
+    ["DatasetA/Training/nx5.jpg", 0],
+    ["DatasetA/Testing/px37.jpg", 1],
+    ["DatasetA/Testing/nx25.jpg", 0],
+    ["DatasetA/Testing/nx12.jpg", 0],
+    ["DatasetA/Training/px47.jpg", 1],
+    ["DatasetA/Training/px11.jpg", 1],
+    ["DatasetA/Testing/px36.jpg", 1],
+    ["DatasetA/Testing/nx13.jpg", 0],
+    ["DatasetA/Training/nx46.jpg", 0],
+    ["DatasetA/Training/px37.jpg", 1],
+    ["DatasetA/Training/px22.jpg", 1],
+    ["DatasetA/Training/nx15.jpg", 0],
+    ["DatasetA/Testing/px44.jpg", 1],
+    ["DatasetA/Testing/px46.jpg", 1],
+    ["DatasetA/Training/nx16.jpg", 0],
+    ["DatasetA/Training/px38.jpg", 1],
+    ["DatasetA/Testing/nx20.jpg", 0],
+    ["DatasetA/Training/nx29.jpg", 0],
+    ["DatasetA/Testing/px30.jpg", 1],
+    ["DatasetA/Training/nx45.jpg", 0],
+    ["DatasetA/Training/px16.jpg", 1],
+    ["DatasetA/Testing/nx9.jpg", 0],
+    ["DatasetA/Training/px15.jpg", 1],
+    ["DatasetA/Testing/nx4.jpg", 0],
+    ["DatasetA/Training/px45.jpg", 1],
+    ["DatasetA/Testing/px47.jpg", 1],
+    ["DatasetA/Testing/px49.jpg", 1],
+    ["DatasetA/Testing/nx1.jpg", 0],
+    ["DatasetA/Training/px21.jpg", 1],
+    ["DatasetA/Training/px7.jpg", 1],
+    ["DatasetA/Training/nx9.jpg", 0],
+    ["DatasetA/Testing/px31.jpg", 1],
+    ["DatasetA/Testing/nx10.jpg", 0],
+    ["DatasetA/Training/px48.jpg", 1],
+    ["DatasetA/Training/px3.jpg", 1],
+    ["DatasetA/Training/nx3.jpg", 0],
+    ["DatasetA/Testing/px48.jpg", 1],
+    ["DatasetA/Training/px51.jpg", 1],
+    ["DatasetA/Testing/nx6.jpg", 0],
+    ["DatasetA/Training/px34.jpg", 1],
+    ["DatasetA/Training/nx42.jpg", 0],
+    ["DatasetA/Testing/px41.jpg", 1],
+    ["DatasetA/Testing/px32.jpg", 1],
+    ["DatasetA/Training/nx52.jpg", 0],
+    ["DatasetA/Testing/nx15.jpg", 0],
+    ["DatasetA/Training/nx17.jpg", 0],
+    ["DatasetA/Testing/px35.jpg", 1],
+    ["DatasetA/Training/px39.jpg", 1],
+    ["DatasetA/Testing/px33.jpg", 1],
+    ["DatasetA/Training/nx41.jpg", 0],
+    ["DatasetA/Training/px49.jpg", 1],
+    ["DatasetA/Training/nx51.jpg", 0],
+    ["DatasetA/Testing/nx22.jpg", 0],
+    ["DatasetA/Training/nx48.jpg", 0],
+    ["DatasetA/Training/px12.jpg", 1],
+    ["DatasetA/Training/nx38.jpg", 0],
+    ["DatasetA/Testing/nx23.jpg", 0],
+    ["DatasetA/Training/nx49.jpg", 0],
+    ["DatasetA/Testing/px50.jpg", 1],
+    ["DatasetA/Training/nx36.jpg", 0],
+    ["DatasetA/Testing/px45.jpg", 1],
+    ["DatasetA/Testing/nx14.jpg", 0],
+    ["DatasetA/Training/nx19.jpg", 0],
+    ["DatasetA/Training/px43.jpg", 1],
+    ["DatasetA/Training/nx10.jpg", 0],
+    ["DatasetA/Training/nx26.jpg", 0],
+    ["DatasetA/Testing/nx3.jpg", 0],
+    ["DatasetA/Training/px10.jpg", 1],
+    ["DatasetA/Training/px50.jpg", 1],
+    ["DatasetA/Training/nx2.jpg", 0],
+    ["DatasetA/Testing/px52.jpg", 1],
+    ["DatasetA/Training/px29.jpg", 1],
+    ["DatasetA/Training/px18.jpg", 1],
+    ["DatasetA/Training/nx35.jpg", 0],
+    ["DatasetA/Training/nx24.jpg", 0],
+    ["DatasetA/Training/px26.jpg", 1],
+    ["DatasetA/Training/nx50.jpg", 0],
+    ["DatasetA/Training/px5.jpg", 1],
+    ["DatasetA/Training/px13.jpg", 1],
+    ["DatasetA/Testing/nx19.jpg", 0],
+    ["DatasetA/Training/nx39.jpg", 0],
+    ["DatasetA/Training/px1.jpg", 1],
+    ["DatasetA/Training/px52.jpg", 1],
+    ["DatasetA/Training/nx20.jpg", 0],
+    ["DatasetA/Training/nx1.jpg", 0]
+  ],
+  "validation": [
+    ["DatasetA/Testing/px40.jpg", 1],
+    ["DatasetA/Training/px2.jpg", 1],
+    ["DatasetA/Testing/px29.jpg", 1],
+    ["DatasetA/Training/nx8.jpg", 0],
+    ["DatasetA/Training/px23.jpg", 1],
+    ["DatasetA/Testing/nx11.jpg", 0],
+    ["DatasetA/Training/px19.jpg", 1],
+    ["DatasetA/Training/nx14.jpg", 0],
+    ["DatasetA/Training/nx40.jpg", 0],
+    ["DatasetA/Training/nx11.jpg", 0],
+    ["DatasetA/Testing/px39.jpg", 1],
+    ["DatasetA/Training/nx47.jpg", 0],
+    ["DatasetA/Training/nx30.jpg", 0],
+    ["DatasetA/Training/px6.jpg", 1],
+    ["DatasetA/Testing/nx7.jpg", 0],
+    ["DatasetA/Training/px25.jpg", 1],
+    ["DatasetA/Training/nx32.jpg", 0],
+    ["DatasetA/Training/px14.jpg", 1],
+    ["DatasetA/Training/nx27.jpg", 0],
+    ["DatasetA/Training/px46.jpg", 1],
+    ["DatasetA/Testing/px51.jpg", 1],
+    ["DatasetA/Testing/px34.jpg", 1],
+    ["DatasetA/Training/nx13.jpg", 0],
+    ["DatasetA/Training/px32.jpg", 1],
+    ["DatasetA/Training/nx4.jpg", 0],
+    ["DatasetA/Training/nx28.jpg", 0],
+    ["DatasetA/Testing/nx17.jpg", 0],
+    ["DatasetA/Testing/px43.jpg", 1]
+  ],
+  "test": [
+    ["DatasetA/Training/nx34.jpg", 0],
+    ["DatasetA/Training/nx22.jpg", 0],
+    ["DatasetA/Training/nx37.jpg", 0],
+    ["DatasetA/Training/nx31.jpg", 0],
+    ["DatasetA/Training/nx25.jpg", 0],
+    ["DatasetA/Training/nx44.jpg", 0],
+    ["DatasetA/Training/px35.jpg", 1],
+    ["DatasetA/Training/px20.jpg", 1],
+    ["DatasetA/Training/px31.jpg", 1],
+    ["DatasetA/Training/px33.jpg", 1],
+    ["DatasetA/Training/px44.jpg", 1],
+    ["DatasetA/Training/px42.jpg", 1],
+    ["DatasetA/Training/px41.jpg", 1],
+    ["DatasetA/Testing/nx21.jpg", 0],
+    ["DatasetA/Testing/nx8.jpg", 0],
+    ["DatasetA/Testing/px42.jpg", 1]
+  ]
+}
diff --git a/src/ptbench/data/indian/fold_2.json.bz2 b/src/ptbench/data/indian/fold_2.json.bz2
deleted file mode 100644
index 2d2f71f0dae9e86ba6d5990b9afd5803b643a440..0000000000000000000000000000000000000000
Binary files a/src/ptbench/data/indian/fold_2.json.bz2 and /dev/null differ
diff --git a/src/ptbench/data/indian/fold_2.py b/src/ptbench/data/indian/fold_2.py
index a85141422e0e8ff14600487114b23e3d8d8ad7c5..48df1bfe7c4a562e98a5d87aa66b7b7c6f9f1ec3 100644
--- a/src/ptbench/data/indian/fold_2.py
+++ b/src/ptbench/data/indian/fold_2.py
@@ -2,45 +2,6 @@
 #
 # SPDX-License-Identifier: GPL-3.0-or-later
 
-"""Indian dataset for TB detection (cross validation fold 2)
+from .datamodule import DataModule
 
-* Split reference: [INDIAN-2013]_ with 20% of train set for the validation set
-* This configuration resolution: 512 x 512 (default)
-* See :py:mod:`ptbench.data.indian` for dataset details
-"""
-
-from clapper.logging import setup
-
-from .. import return_subsets
-from ..base_datamodule import BaseDataModule
-from . import _maker
-
-logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s")
-
-
-class Fold0Module(BaseDataModule):
-    def __init__(
-        self,
-        train_batch_size=1,
-        predict_batch_size=1,
-        drop_incomplete_batch=False,
-        multiproc_kwargs=None,
-    ):
-        super().__init__(
-            train_batch_size=train_batch_size,
-            predict_batch_size=predict_batch_size,
-            drop_incomplete_batch=drop_incomplete_batch,
-            multiproc_kwargs=multiproc_kwargs,
-        )
-
-    def setup(self, stage: str):
-        self.dataset = _maker("fold_2")
-        (
-            self.train_dataset,
-            self.validation_dataset,
-            self.extra_validation_datasets,
-            self.predict_dataset,
-        ) = return_subsets(self.dataset)
-
-
-datamodule = Fold0Module
+datamodule = DataModule("fold_2.json.bz2")
diff --git a/src/ptbench/data/indian/fold_3.json b/src/ptbench/data/indian/fold_3.json
new file mode 100644
index 0000000000000000000000000000000000000000..5a587001bca1a8da43fbf2fc4b64f5b6c5dffcfa
--- /dev/null
+++ b/src/ptbench/data/indian/fold_3.json
@@ -0,0 +1,163 @@
+{
+  "train": [
+    ["DatasetA/Training/nx21.jpg", 0],
+    ["DatasetA/Testing/px39.jpg", 1],
+    ["DatasetA/Testing/px45.jpg", 1],
+    ["DatasetA/Testing/px50.jpg", 1],
+    ["DatasetA/Training/nx30.jpg", 0],
+    ["DatasetA/Testing/px33.jpg", 1],
+    ["DatasetA/Testing/nx6.jpg", 0],
+    ["DatasetA/Training/nx11.jpg", 0],
+    ["DatasetA/Training/nx10.jpg", 0],
+    ["DatasetA/Training/nx22.jpg", 0],
+    ["DatasetA/Testing/nx2.jpg", 0],
+    ["DatasetA/Training/nx33.jpg", 0],
+    ["DatasetA/Training/nx40.jpg", 0],
+    ["DatasetA/Training/nx2.jpg", 0],
+    ["DatasetA/Training/nx18.jpg", 0],
+    ["DatasetA/Training/nx47.jpg", 0],
+    ["DatasetA/Training/nx39.jpg", 0],
+    ["DatasetA/Testing/px43.jpg", 1],
+    ["DatasetA/Training/px46.jpg", 1],
+    ["DatasetA/Training/nx44.jpg", 0],
+    ["DatasetA/Training/nx41.jpg", 0],
+    ["DatasetA/Training/px31.jpg", 1],
+    ["DatasetA/Training/px28.jpg", 1],
+    ["DatasetA/Testing/nx9.jpg", 0],
+    ["DatasetA/Training/nx29.jpg", 0],
+    ["DatasetA/Testing/px38.jpg", 1],
+    ["DatasetA/Testing/px32.jpg", 1],
+    ["DatasetA/Testing/px30.jpg", 1],
+    ["DatasetA/Training/nx15.jpg", 0],
+    ["DatasetA/Training/nx37.jpg", 0],
+    ["DatasetA/Training/px10.jpg", 1],
+    ["DatasetA/Training/nx20.jpg", 0],
+    ["DatasetA/Training/px37.jpg", 1],
+    ["DatasetA/Training/nx50.jpg", 0],
+    ["DatasetA/Testing/px42.jpg", 1],
+    ["DatasetA/Training/px11.jpg", 1],
+    ["DatasetA/Training/nx51.jpg", 0],
+    ["DatasetA/Training/px47.jpg", 1],
+    ["DatasetA/Training/nx31.jpg", 0],
+    ["DatasetA/Training/px50.jpg", 1],
+    ["DatasetA/Training/px9.jpg", 1],
+    ["DatasetA/Training/px24.jpg", 1],
+    ["DatasetA/Training/px8.jpg", 1],
+    ["DatasetA/Training/px18.jpg", 1],
+    ["DatasetA/Testing/nx22.jpg", 0],
+    ["DatasetA/Training/px33.jpg", 1],
+    ["DatasetA/Training/nx45.jpg", 0],
+    ["DatasetA/Training/px52.jpg", 1],
+    ["DatasetA/Testing/px34.jpg", 1],
+    ["DatasetA/Testing/nx19.jpg", 0],
+    ["DatasetA/Training/px7.jpg", 1],
+    ["DatasetA/Training/px38.jpg", 1],
+    ["DatasetA/Training/nx32.jpg", 0],
+    ["DatasetA/Training/nx34.jpg", 0],
+    ["DatasetA/Training/px19.jpg", 1],
+    ["DatasetA/Testing/px51.jpg", 1],
+    ["DatasetA/Training/nx46.jpg", 0],
+    ["DatasetA/Training/nx24.jpg", 0],
+    ["DatasetA/Testing/px27.jpg", 1],
+    ["DatasetA/Testing/px48.jpg", 1],
+    ["DatasetA/Testing/px44.jpg", 1],
+    ["DatasetA/Training/px13.jpg", 1],
+    ["DatasetA/Training/nx36.jpg", 0],
+    ["DatasetA/Testing/nx13.jpg", 0],
+    ["DatasetA/Testing/nx7.jpg", 0],
+    ["DatasetA/Training/nx19.jpg", 0],
+    ["DatasetA/Training/px36.jpg", 1],
+    ["DatasetA/Training/nx3.jpg", 0],
+    ["DatasetA/Testing/px28.jpg", 1],
+    ["DatasetA/Training/nx43.jpg", 0],
+    ["DatasetA/Training/px48.jpg", 1],
+    ["DatasetA/Testing/nx4.jpg", 0],
+    ["DatasetA/Testing/px46.jpg", 1],
+    ["DatasetA/Training/nx12.jpg", 0],
+    ["DatasetA/Testing/px47.jpg", 1],
+    ["DatasetA/Training/px5.jpg", 1],
+    ["DatasetA/Training/nx27.jpg", 0],
+    ["DatasetA/Testing/nx20.jpg", 0],
+    ["DatasetA/Training/nx49.jpg", 0],
+    ["DatasetA/Testing/px35.jpg", 1],
+    ["DatasetA/Training/px51.jpg", 1],
+    ["DatasetA/Training/px30.jpg", 1],
+    ["DatasetA/Training/nx25.jpg", 0],
+    ["DatasetA/Training/px32.jpg", 1],
+    ["DatasetA/Training/px23.jpg", 1],
+    ["DatasetA/Testing/px41.jpg", 1],
+    ["DatasetA/Training/px17.jpg", 1],
+    ["DatasetA/Training/px15.jpg", 1],
+    ["DatasetA/Testing/nx18.jpg", 0],
+    ["DatasetA/Training/nx5.jpg", 0],
+    ["DatasetA/Training/px35.jpg", 1],
+    ["DatasetA/Training/nx16.jpg", 0],
+    ["DatasetA/Training/px34.jpg", 1],
+    ["DatasetA/Testing/nx15.jpg", 0],
+    ["DatasetA/Testing/nx10.jpg", 0],
+    ["DatasetA/Testing/nx23.jpg", 0],
+    ["DatasetA/Training/px4.jpg", 1],
+    ["DatasetA/Testing/nx24.jpg", 0],
+    ["DatasetA/Training/nx28.jpg", 0],
+    ["DatasetA/Training/px40.jpg", 1],
+    ["DatasetA/Testing/nx21.jpg", 0],
+    ["DatasetA/Training/px41.jpg", 1],
+    ["DatasetA/Training/px42.jpg", 1],
+    ["DatasetA/Testing/nx25.jpg", 0],
+    ["DatasetA/Training/px45.jpg", 1],
+    ["DatasetA/Testing/nx12.jpg", 0],
+    ["DatasetA/Training/nx42.jpg", 0],
+    ["DatasetA/Training/nx26.jpg", 0],
+    ["DatasetA/Testing/px29.jpg", 1],
+    ["DatasetA/Training/px16.jpg", 1],
+    ["DatasetA/Training/px26.jpg", 1]
+  ],
+  "validation": [
+    ["DatasetA/Testing/px31.jpg", 1],
+    ["DatasetA/Training/nx38.jpg", 0],
+    ["DatasetA/Training/px44.jpg", 1],
+    ["DatasetA/Testing/nx8.jpg", 0],
+    ["DatasetA/Training/px20.jpg", 1],
+    ["DatasetA/Training/nx7.jpg", 0],
+    ["DatasetA/Training/nx4.jpg", 0],
+    ["DatasetA/Training/px2.jpg", 1],
+    ["DatasetA/Testing/nx5.jpg", 0],
+    ["DatasetA/Testing/nx26.jpg", 0],
+    ["DatasetA/Training/nx48.jpg", 0],
+    ["DatasetA/Training/nx14.jpg", 0],
+    ["DatasetA/Training/nx52.jpg", 0],
+    ["DatasetA/Testing/px52.jpg", 1],
+    ["DatasetA/Training/px3.jpg", 1],
+    ["DatasetA/Training/px14.jpg", 1],
+    ["DatasetA/Training/px39.jpg", 1],
+    ["DatasetA/Training/px1.jpg", 1],
+    ["DatasetA/Training/nx13.jpg", 0],
+    ["DatasetA/Training/px21.jpg", 1],
+    ["DatasetA/Training/nx1.jpg", 0],
+    ["DatasetA/Testing/nx17.jpg", 0],
+    ["DatasetA/Testing/px40.jpg", 1],
+    ["DatasetA/Training/px22.jpg", 1],
+    ["DatasetA/Training/px43.jpg", 1],
+    ["DatasetA/Training/px27.jpg", 1],
+    ["DatasetA/Testing/nx11.jpg", 0],
+    ["DatasetA/Testing/nx3.jpg", 0]
+  ],
+  "test": [
+    ["DatasetA/Training/nx35.jpg", 0],
+    ["DatasetA/Training/nx17.jpg", 0],
+    ["DatasetA/Training/nx6.jpg", 0],
+    ["DatasetA/Training/nx8.jpg", 0],
+    ["DatasetA/Training/nx9.jpg", 0],
+    ["DatasetA/Training/px29.jpg", 1],
+    ["DatasetA/Training/px49.jpg", 1],
+    ["DatasetA/Training/px25.jpg", 1],
+    ["DatasetA/Training/px12.jpg", 1],
+    ["DatasetA/Training/px6.jpg", 1],
+    ["DatasetA/Testing/nx14.jpg", 0],
+    ["DatasetA/Testing/nx16.jpg", 0],
+    ["DatasetA/Testing/nx1.jpg", 0],
+    ["DatasetA/Testing/px49.jpg", 1],
+    ["DatasetA/Testing/px36.jpg", 1],
+    ["DatasetA/Testing/px37.jpg", 1]
+  ]
+}
diff --git a/src/ptbench/data/indian/fold_3.json.bz2 b/src/ptbench/data/indian/fold_3.json.bz2
deleted file mode 100644
index 93d5428108b9ed474c3ca44f2c660dd1b9712560..0000000000000000000000000000000000000000
Binary files a/src/ptbench/data/indian/fold_3.json.bz2 and /dev/null differ
diff --git a/src/ptbench/data/indian/fold_3.py b/src/ptbench/data/indian/fold_3.py
index f0e4b15a3f064abede1b57b90522fbd525f0cb5a..9967e4ea9297fadf97192586d66e185bec997e7e 100644
--- a/src/ptbench/data/indian/fold_3.py
+++ b/src/ptbench/data/indian/fold_3.py
@@ -2,45 +2,6 @@
 #
 # SPDX-License-Identifier: GPL-3.0-or-later
 
-"""Indian dataset for TB detection (cross validation fold 3)
+from .datamodule import DataModule
 
-* Split reference: [INDIAN-2013]_ with 20% of train set for the validation set
-* This configuration resolution: 512 x 512 (default)
-* See :py:mod:`ptbench.data.indian` for dataset details
-"""
-
-from clapper.logging import setup
-
-from .. import return_subsets
-from ..base_datamodule import BaseDataModule
-from . import _maker
-
-logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s")
-
-
-class Fold0Module(BaseDataModule):
-    def __init__(
-        self,
-        train_batch_size=1,
-        predict_batch_size=1,
-        drop_incomplete_batch=False,
-        multiproc_kwargs=None,
-    ):
-        super().__init__(
-            train_batch_size=train_batch_size,
-            predict_batch_size=predict_batch_size,
-            drop_incomplete_batch=drop_incomplete_batch,
-            multiproc_kwargs=multiproc_kwargs,
-        )
-
-    def setup(self, stage: str):
-        self.dataset = _maker("fold_3")
-        (
-            self.train_dataset,
-            self.validation_dataset,
-            self.extra_validation_datasets,
-            self.predict_dataset,
-        ) = return_subsets(self.dataset)
-
-
-datamodule = Fold0Module
+datamodule = DataModule("fold_3.json.bz2")
diff --git a/src/ptbench/data/indian/fold_4.json b/src/ptbench/data/indian/fold_4.json
new file mode 100644
index 0000000000000000000000000000000000000000..0342a015a983630b1d3f4ee01d96094a25261298
--- /dev/null
+++ b/src/ptbench/data/indian/fold_4.json
@@ -0,0 +1,163 @@
+{
+  "train": [
+    ["DatasetA/Training/nx6.jpg", 0],
+    ["DatasetA/Training/px30.jpg", 1],
+    ["DatasetA/Training/nx24.jpg", 0],
+    ["DatasetA/Training/nx13.jpg", 0],
+    ["DatasetA/Testing/px43.jpg", 1],
+    ["DatasetA/Training/px20.jpg", 1],
+    ["DatasetA/Training/nx4.jpg", 0],
+    ["DatasetA/Testing/nx13.jpg", 0],
+    ["DatasetA/Training/px18.jpg", 1],
+    ["DatasetA/Training/nx37.jpg", 0],
+    ["DatasetA/Training/nx22.jpg", 0],
+    ["DatasetA/Training/nx11.jpg", 0],
+    ["DatasetA/Testing/px47.jpg", 1],
+    ["DatasetA/Training/px32.jpg", 1],
+    ["DatasetA/Training/nx49.jpg", 0],
+    ["DatasetA/Training/px8.jpg", 1],
+    ["DatasetA/Training/nx39.jpg", 0],
+    ["DatasetA/Testing/px34.jpg", 1],
+    ["DatasetA/Testing/nx9.jpg", 0],
+    ["DatasetA/Testing/px28.jpg", 1],
+    ["DatasetA/Training/px37.jpg", 1],
+    ["DatasetA/Training/nx52.jpg", 0],
+    ["DatasetA/Training/px9.jpg", 1],
+    ["DatasetA/Testing/nx23.jpg", 0],
+    ["DatasetA/Training/px50.jpg", 1],
+    ["DatasetA/Training/px15.jpg", 1],
+    ["DatasetA/Testing/nx20.jpg", 0],
+    ["DatasetA/Training/nx14.jpg", 0],
+    ["DatasetA/Testing/nx6.jpg", 0],
+    ["DatasetA/Training/px26.jpg", 1],
+    ["DatasetA/Testing/nx8.jpg", 0],
+    ["DatasetA/Testing/nx16.jpg", 0],
+    ["DatasetA/Testing/px35.jpg", 1],
+    ["DatasetA/Testing/px50.jpg", 1],
+    ["DatasetA/Testing/px41.jpg", 1],
+    ["DatasetA/Testing/px45.jpg", 1],
+    ["DatasetA/Training/px33.jpg", 1],
+    ["DatasetA/Testing/px42.jpg", 1],
+    ["DatasetA/Training/nx29.jpg", 0],
+    ["DatasetA/Training/nx10.jpg", 0],
+    ["DatasetA/Testing/nx12.jpg", 0],
+    ["DatasetA/Training/px6.jpg", 1],
+    ["DatasetA/Training/px22.jpg", 1],
+    ["DatasetA/Training/px12.jpg", 1],
+    ["DatasetA/Training/px34.jpg", 1],
+    ["DatasetA/Training/px29.jpg", 1],
+    ["DatasetA/Testing/px36.jpg", 1],
+    ["DatasetA/Training/px14.jpg", 1],
+    ["DatasetA/Testing/nx19.jpg", 0],
+    ["DatasetA/Training/nx48.jpg", 0],
+    ["DatasetA/Training/px19.jpg", 1],
+    ["DatasetA/Testing/px52.jpg", 1],
+    ["DatasetA/Testing/nx7.jpg", 0],
+    ["DatasetA/Training/nx32.jpg", 0],
+    ["DatasetA/Training/px39.jpg", 1],
+    ["DatasetA/Testing/nx21.jpg", 0],
+    ["DatasetA/Training/px25.jpg", 1],
+    ["DatasetA/Testing/nx14.jpg", 0],
+    ["DatasetA/Training/nx46.jpg", 0],
+    ["DatasetA/Testing/px39.jpg", 1],
+    ["DatasetA/Training/px2.jpg", 1],
+    ["DatasetA/Training/nx17.jpg", 0],
+    ["DatasetA/Training/nx1.jpg", 0],
+    ["DatasetA/Training/px36.jpg", 1],
+    ["DatasetA/Training/px40.jpg", 1],
+    ["DatasetA/Training/px31.jpg", 1],
+    ["DatasetA/Testing/nx18.jpg", 0],
+    ["DatasetA/Training/nx50.jpg", 0],
+    ["DatasetA/Training/nx8.jpg", 0],
+    ["DatasetA/Testing/px38.jpg", 1],
+    ["DatasetA/Training/nx3.jpg", 0],
+    ["DatasetA/Training/nx51.jpg", 0],
+    ["DatasetA/Training/nx35.jpg", 0],
+    ["DatasetA/Testing/nx1.jpg", 0],
+    ["DatasetA/Testing/px37.jpg", 1],
+    ["DatasetA/Testing/nx15.jpg", 0],
+    ["DatasetA/Training/nx21.jpg", 0],
+    ["DatasetA/Training/nx45.jpg", 0],
+    ["DatasetA/Training/nx31.jpg", 0],
+    ["DatasetA/Testing/px44.jpg", 1],
+    ["DatasetA/Testing/px48.jpg", 1],
+    ["DatasetA/Testing/px40.jpg", 1],
+    ["DatasetA/Training/nx25.jpg", 0],
+    ["DatasetA/Training/nx40.jpg", 0],
+    ["DatasetA/Training/px1.jpg", 1],
+    ["DatasetA/Training/px5.jpg", 1],
+    ["DatasetA/Training/nx19.jpg", 0],
+    ["DatasetA/Training/nx47.jpg", 0],
+    ["DatasetA/Testing/nx25.jpg", 0],
+    ["DatasetA/Training/nx33.jpg", 0],
+    ["DatasetA/Training/px43.jpg", 1],
+    ["DatasetA/Testing/px30.jpg", 1],
+    ["DatasetA/Training/px46.jpg", 1],
+    ["DatasetA/Testing/nx17.jpg", 0],
+    ["DatasetA/Training/px38.jpg", 1],
+    ["DatasetA/Training/px45.jpg", 1],
+    ["DatasetA/Training/px24.jpg", 1],
+    ["DatasetA/Testing/px32.jpg", 1],
+    ["DatasetA/Training/nx38.jpg", 0],
+    ["DatasetA/Training/px28.jpg", 1],
+    ["DatasetA/Testing/nx11.jpg", 0],
+    ["DatasetA/Training/px47.jpg", 1],
+    ["DatasetA/Training/nx42.jpg", 0],
+    ["DatasetA/Testing/px46.jpg", 1],
+    ["DatasetA/Training/nx12.jpg", 0],
+    ["DatasetA/Training/nx28.jpg", 0],
+    ["DatasetA/Training/px17.jpg", 1],
+    ["DatasetA/Training/nx20.jpg", 0],
+    ["DatasetA/Training/px13.jpg", 1],
+    ["DatasetA/Training/px49.jpg", 1],
+    ["DatasetA/Testing/nx10.jpg", 0]
+  ],
+  "validation": [
+    ["DatasetA/Testing/nx3.jpg", 0],
+    ["DatasetA/Testing/px31.jpg", 1],
+    ["DatasetA/Training/nx44.jpg", 0],
+    ["DatasetA/Training/px27.jpg", 1],
+    ["DatasetA/Training/px52.jpg", 1],
+    ["DatasetA/Training/nx16.jpg", 0],
+    ["DatasetA/Training/px11.jpg", 1],
+    ["DatasetA/Training/nx26.jpg", 0],
+    ["DatasetA/Training/px7.jpg", 1],
+    ["DatasetA/Training/nx5.jpg", 0],
+    ["DatasetA/Testing/px51.jpg", 1],
+    ["DatasetA/Training/nx34.jpg", 0],
+    ["DatasetA/Training/nx43.jpg", 0],
+    ["DatasetA/Training/px41.jpg", 1],
+    ["DatasetA/Training/nx27.jpg", 0],
+    ["DatasetA/Training/px44.jpg", 1],
+    ["DatasetA/Training/nx30.jpg", 0],
+    ["DatasetA/Training/nx9.jpg", 0],
+    ["DatasetA/Training/nx7.jpg", 0],
+    ["DatasetA/Training/px42.jpg", 1],
+    ["DatasetA/Testing/nx2.jpg", 0],
+    ["DatasetA/Testing/px27.jpg", 1],
+    ["DatasetA/Testing/nx26.jpg", 0],
+    ["DatasetA/Training/px35.jpg", 1],
+    ["DatasetA/Training/nx2.jpg", 0],
+    ["DatasetA/Training/px48.jpg", 1],
+    ["DatasetA/Training/px23.jpg", 1],
+    ["DatasetA/Testing/px49.jpg", 1]
+  ],
+  "test": [
+    ["DatasetA/Training/nx36.jpg", 0],
+    ["DatasetA/Training/nx15.jpg", 0],
+    ["DatasetA/Training/nx18.jpg", 0],
+    ["DatasetA/Training/nx41.jpg", 0],
+    ["DatasetA/Training/px51.jpg", 1],
+    ["DatasetA/Training/px10.jpg", 1],
+    ["DatasetA/Training/px16.jpg", 1],
+    ["DatasetA/Training/px21.jpg", 1],
+    ["DatasetA/Training/px3.jpg", 1],
+    ["DatasetA/Training/px4.jpg", 1],
+    ["DatasetA/Testing/nx22.jpg", 0],
+    ["DatasetA/Testing/nx24.jpg", 0],
+    ["DatasetA/Testing/nx5.jpg", 0],
+    ["DatasetA/Testing/nx4.jpg", 0],
+    ["DatasetA/Testing/px29.jpg", 1],
+    ["DatasetA/Testing/px33.jpg", 1]
+  ]
+}
diff --git a/src/ptbench/data/indian/fold_4.json.bz2 b/src/ptbench/data/indian/fold_4.json.bz2
deleted file mode 100644
index aa45648f410cd3318c8efa892a0031bfe0575e55..0000000000000000000000000000000000000000
Binary files a/src/ptbench/data/indian/fold_4.json.bz2 and /dev/null differ
diff --git a/src/ptbench/data/indian/fold_4.py b/src/ptbench/data/indian/fold_4.py
index 4a1ca92023ba1e882e1044d1645b6fca8e37ae9b..8630ee093f484d19f3e493f8ab059ddb29314a9a 100644
--- a/src/ptbench/data/indian/fold_4.py
+++ b/src/ptbench/data/indian/fold_4.py
@@ -2,45 +2,6 @@
 #
 # SPDX-License-Identifier: GPL-3.0-or-later
 
-"""Indian dataset for TB detection (cross validation fold 4)
+from .datamodule import DataModule
 
-* Split reference: [INDIAN-2013]_ with 20% of train set for the validation set
-* This configuration resolution: 512 x 512 (default)
-* See :py:mod:`ptbench.data.indian` for dataset details
-"""
-
-from clapper.logging import setup
-
-from .. import return_subsets
-from ..base_datamodule import BaseDataModule
-from . import _maker
-
-logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s")
-
-
-class Fold0Module(BaseDataModule):
-    def __init__(
-        self,
-        train_batch_size=1,
-        predict_batch_size=1,
-        drop_incomplete_batch=False,
-        multiproc_kwargs=None,
-    ):
-        super().__init__(
-            train_batch_size=train_batch_size,
-            predict_batch_size=predict_batch_size,
-            drop_incomplete_batch=drop_incomplete_batch,
-            multiproc_kwargs=multiproc_kwargs,
-        )
-
-    def setup(self, stage: str):
-        self.dataset = _maker("fold_4")
-        (
-            self.train_dataset,
-            self.validation_dataset,
-            self.extra_validation_datasets,
-            self.predict_dataset,
-        ) = return_subsets(self.dataset)
-
-
-datamodule = Fold0Module
+datamodule = DataModule("fold_4.json.bz2")
diff --git a/src/ptbench/data/indian/fold_5.json b/src/ptbench/data/indian/fold_5.json
new file mode 100644
index 0000000000000000000000000000000000000000..f8ffb386e2d390331efe8090378f3149fa621be1
--- /dev/null
+++ b/src/ptbench/data/indian/fold_5.json
@@ -0,0 +1,163 @@
+{
+  "train": [
+    ["DatasetA/Training/px38.jpg", 1],
+    ["DatasetA/Training/px45.jpg", 1],
+    ["DatasetA/Training/px36.jpg", 1],
+    ["DatasetA/Training/nx19.jpg", 0],
+    ["DatasetA/Training/nx22.jpg", 0],
+    ["DatasetA/Training/px39.jpg", 1],
+    ["DatasetA/Testing/nx18.jpg", 0],
+    ["DatasetA/Training/nx20.jpg", 0],
+    ["DatasetA/Training/px23.jpg", 1],
+    ["DatasetA/Testing/px41.jpg", 1],
+    ["DatasetA/Testing/nx5.jpg", 0],
+    ["DatasetA/Training/nx52.jpg", 0],
+    ["DatasetA/Testing/px38.jpg", 1],
+    ["DatasetA/Training/px52.jpg", 1],
+    ["DatasetA/Training/nx2.jpg", 0],
+    ["DatasetA/Training/nx16.jpg", 0],
+    ["DatasetA/Testing/px33.jpg", 1],
+    ["DatasetA/Training/nx4.jpg", 0],
+    ["DatasetA/Training/px11.jpg", 1],
+    ["DatasetA/Testing/nx16.jpg", 0],
+    ["DatasetA/Training/px35.jpg", 1],
+    ["DatasetA/Training/nx47.jpg", 0],
+    ["DatasetA/Testing/px35.jpg", 1],
+    ["DatasetA/Testing/px48.jpg", 1],
+    ["DatasetA/Training/px22.jpg", 1],
+    ["DatasetA/Testing/px43.jpg", 1],
+    ["DatasetA/Training/px9.jpg", 1],
+    ["DatasetA/Training/px33.jpg", 1],
+    ["DatasetA/Training/nx36.jpg", 0],
+    ["DatasetA/Training/px15.jpg", 1],
+    ["DatasetA/Training/nx21.jpg", 0],
+    ["DatasetA/Training/nx24.jpg", 0],
+    ["DatasetA/Training/px4.jpg", 1],
+    ["DatasetA/Training/nx34.jpg", 0],
+    ["DatasetA/Training/nx12.jpg", 0],
+    ["DatasetA/Training/px46.jpg", 1],
+    ["DatasetA/Testing/nx4.jpg", 0],
+    ["DatasetA/Training/nx30.jpg", 0],
+    ["DatasetA/Testing/px52.jpg", 1],
+    ["DatasetA/Testing/px47.jpg", 1],
+    ["DatasetA/Training/px37.jpg", 1],
+    ["DatasetA/Training/nx48.jpg", 0],
+    ["DatasetA/Training/px51.jpg", 1],
+    ["DatasetA/Testing/nx3.jpg", 0],
+    ["DatasetA/Testing/nx2.jpg", 0],
+    ["DatasetA/Training/nx33.jpg", 0],
+    ["DatasetA/Training/px18.jpg", 1],
+    ["DatasetA/Testing/px27.jpg", 1],
+    ["DatasetA/Training/nx1.jpg", 0],
+    ["DatasetA/Training/px30.jpg", 1],
+    ["DatasetA/Training/px48.jpg", 1],
+    ["DatasetA/Training/px28.jpg", 1],
+    ["DatasetA/Training/px49.jpg", 1],
+    ["DatasetA/Training/px5.jpg", 1],
+    ["DatasetA/Testing/px29.jpg", 1],
+    ["DatasetA/Training/px21.jpg", 1],
+    ["DatasetA/Training/px17.jpg", 1],
+    ["DatasetA/Testing/nx25.jpg", 0],
+    ["DatasetA/Testing/px40.jpg", 1],
+    ["DatasetA/Testing/nx21.jpg", 0],
+    ["DatasetA/Training/nx25.jpg", 0],
+    ["DatasetA/Testing/px37.jpg", 1],
+    ["DatasetA/Training/nx14.jpg", 0],
+    ["DatasetA/Testing/nx7.jpg", 0],
+    ["DatasetA/Training/nx50.jpg", 0],
+    ["DatasetA/Testing/nx20.jpg", 0],
+    ["DatasetA/Training/px41.jpg", 1],
+    ["DatasetA/Training/nx29.jpg", 0],
+    ["DatasetA/Training/nx17.jpg", 0],
+    ["DatasetA/Testing/nx12.jpg", 0],
+    ["DatasetA/Testing/nx22.jpg", 0],
+    ["DatasetA/Training/nx11.jpg", 0],
+    ["DatasetA/Training/nx49.jpg", 0],
+    ["DatasetA/Testing/nx23.jpg", 0],
+    ["DatasetA/Training/nx44.jpg", 0],
+    ["DatasetA/Testing/nx24.jpg", 0],
+    ["DatasetA/Training/px6.jpg", 1],
+    ["DatasetA/Training/nx39.jpg", 0],
+    ["DatasetA/Training/nx27.jpg", 0],
+    ["DatasetA/Training/nx7.jpg", 0],
+    ["DatasetA/Training/nx45.jpg", 0],
+    ["DatasetA/Training/px14.jpg", 1],
+    ["DatasetA/Training/nx3.jpg", 0],
+    ["DatasetA/Training/px25.jpg", 1],
+    ["DatasetA/Training/nx38.jpg", 0],
+    ["DatasetA/Training/px44.jpg", 1],
+    ["DatasetA/Testing/nx26.jpg", 0],
+    ["DatasetA/Training/px8.jpg", 1],
+    ["DatasetA/Testing/nx1.jpg", 0],
+    ["DatasetA/Testing/px50.jpg", 1],
+    ["DatasetA/Testing/nx14.jpg", 0],
+    ["DatasetA/Training/nx28.jpg", 0],
+    ["DatasetA/Training/px27.jpg", 1],
+    ["DatasetA/Training/px40.jpg", 1],
+    ["DatasetA/Training/nx5.jpg", 0],
+    ["DatasetA/Training/px43.jpg", 1],
+    ["DatasetA/Testing/px42.jpg", 1],
+    ["DatasetA/Training/px34.jpg", 1],
+    ["DatasetA/Training/px16.jpg", 1],
+    ["DatasetA/Training/nx9.jpg", 0],
+    ["DatasetA/Testing/nx15.jpg", 0],
+    ["DatasetA/Training/px42.jpg", 1],
+    ["DatasetA/Training/px12.jpg", 1],
+    ["DatasetA/Testing/px31.jpg", 1],
+    ["DatasetA/Training/px47.jpg", 1],
+    ["DatasetA/Testing/nx10.jpg", 0],
+    ["DatasetA/Training/px2.jpg", 1],
+    ["DatasetA/Training/px50.jpg", 1],
+    ["DatasetA/Training/nx41.jpg", 0],
+    ["DatasetA/Training/px29.jpg", 1],
+    ["DatasetA/Training/px20.jpg", 1],
+    ["DatasetA/Training/nx40.jpg", 0]
+  ],
+  "validation": [
+    ["DatasetA/Training/px26.jpg", 1],
+    ["DatasetA/Training/px10.jpg", 1],
+    ["DatasetA/Training/px3.jpg", 1],
+    ["DatasetA/Testing/px44.jpg", 1],
+    ["DatasetA/Testing/px46.jpg", 1],
+    ["DatasetA/Testing/px32.jpg", 1],
+    ["DatasetA/Training/nx35.jpg", 0],
+    ["DatasetA/Training/px13.jpg", 1],
+    ["DatasetA/Testing/nx6.jpg", 0],
+    ["DatasetA/Training/nx18.jpg", 0],
+    ["DatasetA/Testing/nx19.jpg", 0],
+    ["DatasetA/Training/nx6.jpg", 0],
+    ["DatasetA/Training/px31.jpg", 1],
+    ["DatasetA/Training/nx15.jpg", 0],
+    ["DatasetA/Testing/px39.jpg", 1],
+    ["DatasetA/Training/nx8.jpg", 0],
+    ["DatasetA/Training/nx13.jpg", 0],
+    ["DatasetA/Training/nx37.jpg", 0],
+    ["DatasetA/Testing/px28.jpg", 1],
+    ["DatasetA/Training/nx51.jpg", 0],
+    ["DatasetA/Testing/px30.jpg", 1],
+    ["DatasetA/Testing/nx8.jpg", 0],
+    ["DatasetA/Testing/px36.jpg", 1],
+    ["DatasetA/Training/nx43.jpg", 0],
+    ["DatasetA/Training/nx31.jpg", 0],
+    ["DatasetA/Testing/px49.jpg", 1],
+    ["DatasetA/Training/nx26.jpg", 0],
+    ["DatasetA/Training/px7.jpg", 1]
+  ],
+  "test": [
+    ["DatasetA/Training/nx32.jpg", 0],
+    ["DatasetA/Training/nx46.jpg", 0],
+    ["DatasetA/Training/px24.jpg", 1],
+    ["DatasetA/Training/px19.jpg", 1],
+    ["DatasetA/Training/px32.jpg", 1],
+    ["DatasetA/Training/nx42.jpg", 0],
+    ["DatasetA/Training/nx10.jpg", 0],
+    ["DatasetA/Training/px1.jpg", 1],
+    ["DatasetA/Testing/nx17.jpg", 0],
+    ["DatasetA/Testing/nx13.jpg", 0],
+    ["DatasetA/Testing/nx9.jpg", 0],
+    ["DatasetA/Testing/nx11.jpg", 0],
+    ["DatasetA/Testing/px45.jpg", 1],
+    ["DatasetA/Testing/px51.jpg", 1],
+    ["DatasetA/Testing/px34.jpg", 1]
+  ]
+}
diff --git a/src/ptbench/data/indian/fold_5.json.bz2 b/src/ptbench/data/indian/fold_5.json.bz2
deleted file mode 100644
index 9364b3a72199720389fd6f9de7785a870865362b..0000000000000000000000000000000000000000
Binary files a/src/ptbench/data/indian/fold_5.json.bz2 and /dev/null differ
diff --git a/src/ptbench/data/indian/fold_5.py b/src/ptbench/data/indian/fold_5.py
index 7d411c26a99a54dc8f6a2502d96c79bacf0f0482..0c7504c512426a635eae3cdb63b545651fb43f5f 100644
--- a/src/ptbench/data/indian/fold_5.py
+++ b/src/ptbench/data/indian/fold_5.py
@@ -2,45 +2,6 @@
 #
 # SPDX-License-Identifier: GPL-3.0-or-later
 
-"""Indian dataset for TB detection (cross validation fold 5)
+from .datamodule import DataModule
 
-* Split reference: [INDIAN-2013]_ with 20% of train set for the validation set
-* This configuration resolution: 512 x 512 (default)
-* See :py:mod:`ptbench.data.indian` for dataset details
-"""
-
-from clapper.logging import setup
-
-from .. import return_subsets
-from ..base_datamodule import BaseDataModule
-from . import _maker
-
-logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s")
-
-
-class Fold0Module(BaseDataModule):
-    def __init__(
-        self,
-        train_batch_size=1,
-        predict_batch_size=1,
-        drop_incomplete_batch=False,
-        multiproc_kwargs=None,
-    ):
-        super().__init__(
-            train_batch_size=train_batch_size,
-            predict_batch_size=predict_batch_size,
-            drop_incomplete_batch=drop_incomplete_batch,
-            multiproc_kwargs=multiproc_kwargs,
-        )
-
-    def setup(self, stage: str):
-        self.dataset = _maker("fold_5")
-        (
-            self.train_dataset,
-            self.validation_dataset,
-            self.extra_validation_datasets,
-            self.predict_dataset,
-        ) = return_subsets(self.dataset)
-
-
-datamodule = Fold0Module
+datamodule = DataModule("fold_5.json.bz2")
diff --git a/src/ptbench/data/indian/fold_6.json b/src/ptbench/data/indian/fold_6.json
new file mode 100644
index 0000000000000000000000000000000000000000..721aae0fd45851d54e8255d6fe086b09242372ba
--- /dev/null
+++ b/src/ptbench/data/indian/fold_6.json
@@ -0,0 +1,163 @@
+{
+  "train": [
+    ["DatasetA/Testing/nx19.jpg", 0],
+    ["DatasetA/Training/nx50.jpg", 0],
+    ["DatasetA/Training/nx3.jpg", 0],
+    ["DatasetA/Training/px41.jpg", 1],
+    ["DatasetA/Training/px3.jpg", 1],
+    ["DatasetA/Training/nx22.jpg", 0],
+    ["DatasetA/Training/nx12.jpg", 0],
+    ["DatasetA/Training/px6.jpg", 1],
+    ["DatasetA/Testing/nx13.jpg", 0],
+    ["DatasetA/Training/px44.jpg", 1],
+    ["DatasetA/Testing/nx16.jpg", 0],
+    ["DatasetA/Training/nx14.jpg", 0],
+    ["DatasetA/Testing/nx1.jpg", 0],
+    ["DatasetA/Testing/px29.jpg", 1],
+    ["DatasetA/Training/nx8.jpg", 0],
+    ["DatasetA/Training/px36.jpg", 1],
+    ["DatasetA/Testing/nx10.jpg", 0],
+    ["DatasetA/Testing/nx7.jpg", 0],
+    ["DatasetA/Training/px33.jpg", 1],
+    ["DatasetA/Testing/nx9.jpg", 0],
+    ["DatasetA/Testing/nx11.jpg", 0],
+    ["DatasetA/Training/px5.jpg", 1],
+    ["DatasetA/Testing/px37.jpg", 1],
+    ["DatasetA/Testing/px34.jpg", 1],
+    ["DatasetA/Training/px38.jpg", 1],
+    ["DatasetA/Testing/px30.jpg", 1],
+    ["DatasetA/Testing/nx12.jpg", 0],
+    ["DatasetA/Testing/px46.jpg", 1],
+    ["DatasetA/Testing/px31.jpg", 1],
+    ["DatasetA/Testing/nx23.jpg", 0],
+    ["DatasetA/Training/nx49.jpg", 0],
+    ["DatasetA/Training/px43.jpg", 1],
+    ["DatasetA/Training/px30.jpg", 1],
+    ["DatasetA/Training/nx18.jpg", 0],
+    ["DatasetA/Testing/nx5.jpg", 0],
+    ["DatasetA/Testing/nx4.jpg", 0],
+    ["DatasetA/Testing/nx8.jpg", 0],
+    ["DatasetA/Training/px4.jpg", 1],
+    ["DatasetA/Training/px27.jpg", 1],
+    ["DatasetA/Training/px1.jpg", 1],
+    ["DatasetA/Training/nx26.jpg", 0],
+    ["DatasetA/Training/px29.jpg", 1],
+    ["DatasetA/Training/nx47.jpg", 0],
+    ["DatasetA/Training/px15.jpg", 1],
+    ["DatasetA/Training/nx30.jpg", 0],
+    ["DatasetA/Training/px24.jpg", 1],
+    ["DatasetA/Training/nx17.jpg", 0],
+    ["DatasetA/Training/nx13.jpg", 0],
+    ["DatasetA/Training/nx51.jpg", 0],
+    ["DatasetA/Training/px16.jpg", 1],
+    ["DatasetA/Testing/px52.jpg", 1],
+    ["DatasetA/Testing/px44.jpg", 1],
+    ["DatasetA/Testing/px27.jpg", 1],
+    ["DatasetA/Training/px34.jpg", 1],
+    ["DatasetA/Training/px17.jpg", 1],
+    ["DatasetA/Testing/px48.jpg", 1],
+    ["DatasetA/Training/nx43.jpg", 0],
+    ["DatasetA/Training/nx5.jpg", 0],
+    ["DatasetA/Training/nx36.jpg", 0],
+    ["DatasetA/Training/nx37.jpg", 0],
+    ["DatasetA/Training/px47.jpg", 1],
+    ["DatasetA/Training/px22.jpg", 1],
+    ["DatasetA/Training/nx42.jpg", 0],
+    ["DatasetA/Training/nx45.jpg", 0],
+    ["DatasetA/Testing/px51.jpg", 1],
+    ["DatasetA/Testing/nx21.jpg", 0],
+    ["DatasetA/Training/px21.jpg", 1],
+    ["DatasetA/Testing/px32.jpg", 1],
+    ["DatasetA/Training/nx1.jpg", 0],
+    ["DatasetA/Training/nx7.jpg", 0],
+    ["DatasetA/Training/nx6.jpg", 0],
+    ["DatasetA/Training/nx31.jpg", 0],
+    ["DatasetA/Training/nx40.jpg", 0],
+    ["DatasetA/Training/nx41.jpg", 0],
+    ["DatasetA/Testing/px45.jpg", 1],
+    ["DatasetA/Training/nx11.jpg", 0],
+    ["DatasetA/Testing/px39.jpg", 1],
+    ["DatasetA/Testing/px36.jpg", 1],
+    ["DatasetA/Training/nx38.jpg", 0],
+    ["DatasetA/Training/nx16.jpg", 0],
+    ["DatasetA/Training/nx52.jpg", 0],
+    ["DatasetA/Training/px12.jpg", 1],
+    ["DatasetA/Training/px32.jpg", 1],
+    ["DatasetA/Training/px49.jpg", 1],
+    ["DatasetA/Testing/px38.jpg", 1],
+    ["DatasetA/Training/nx48.jpg", 0],
+    ["DatasetA/Training/px28.jpg", 1],
+    ["DatasetA/Training/nx21.jpg", 0],
+    ["DatasetA/Training/nx28.jpg", 0],
+    ["DatasetA/Training/px52.jpg", 1],
+    ["DatasetA/Training/nx39.jpg", 0],
+    ["DatasetA/Training/px14.jpg", 1],
+    ["DatasetA/Training/nx25.jpg", 0],
+    ["DatasetA/Training/px35.jpg", 1],
+    ["DatasetA/Training/px19.jpg", 1],
+    ["DatasetA/Training/px11.jpg", 1],
+    ["DatasetA/Testing/nx18.jpg", 0],
+    ["DatasetA/Training/px39.jpg", 1],
+    ["DatasetA/Training/px31.jpg", 1],
+    ["DatasetA/Training/nx35.jpg", 0],
+    ["DatasetA/Testing/px50.jpg", 1],
+    ["DatasetA/Training/nx27.jpg", 0],
+    ["DatasetA/Training/nx32.jpg", 0],
+    ["DatasetA/Training/px42.jpg", 1],
+    ["DatasetA/Testing/nx3.jpg", 0],
+    ["DatasetA/Testing/px41.jpg", 1],
+    ["DatasetA/Training/px8.jpg", 1],
+    ["DatasetA/Testing/nx14.jpg", 0],
+    ["DatasetA/Training/px40.jpg", 1],
+    ["DatasetA/Testing/px33.jpg", 1],
+    ["DatasetA/Training/px9.jpg", 1],
+    ["DatasetA/Training/px37.jpg", 1]
+  ],
+  "validation": [
+    ["DatasetA/Training/px25.jpg", 1],
+    ["DatasetA/Training/nx10.jpg", 0],
+    ["DatasetA/Training/px13.jpg", 1],
+    ["DatasetA/Training/nx15.jpg", 0],
+    ["DatasetA/Testing/px49.jpg", 1],
+    ["DatasetA/Training/nx34.jpg", 0],
+    ["DatasetA/Testing/nx17.jpg", 0],
+    ["DatasetA/Testing/nx15.jpg", 0],
+    ["DatasetA/Training/nx9.jpg", 0],
+    ["DatasetA/Training/px2.jpg", 1],
+    ["DatasetA/Training/px20.jpg", 1],
+    ["DatasetA/Training/px46.jpg", 1],
+    ["DatasetA/Training/px48.jpg", 1],
+    ["DatasetA/Training/px18.jpg", 1],
+    ["DatasetA/Testing/px42.jpg", 1],
+    ["DatasetA/Training/px10.jpg", 1],
+    ["DatasetA/Training/px51.jpg", 1],
+    ["DatasetA/Testing/nx25.jpg", 0],
+    ["DatasetA/Training/nx44.jpg", 0],
+    ["DatasetA/Testing/nx22.jpg", 0],
+    ["DatasetA/Testing/px43.jpg", 1],
+    ["DatasetA/Training/px50.jpg", 1],
+    ["DatasetA/Training/nx33.jpg", 0],
+    ["DatasetA/Training/nx19.jpg", 0],
+    ["DatasetA/Testing/nx24.jpg", 0],
+    ["DatasetA/Testing/px40.jpg", 1],
+    ["DatasetA/Training/nx20.jpg", 0],
+    ["DatasetA/Training/nx46.jpg", 0]
+  ],
+  "test": [
+    ["DatasetA/Training/nx29.jpg", 0],
+    ["DatasetA/Training/nx24.jpg", 0],
+    ["DatasetA/Training/px45.jpg", 1],
+    ["DatasetA/Training/px23.jpg", 1],
+    ["DatasetA/Training/px26.jpg", 1],
+    ["DatasetA/Training/nx4.jpg", 0],
+    ["DatasetA/Training/nx2.jpg", 0],
+    ["DatasetA/Training/px7.jpg", 1],
+    ["DatasetA/Testing/nx26.jpg", 0],
+    ["DatasetA/Testing/nx20.jpg", 0],
+    ["DatasetA/Testing/nx2.jpg", 0],
+    ["DatasetA/Testing/nx6.jpg", 0],
+    ["DatasetA/Testing/px28.jpg", 1],
+    ["DatasetA/Testing/px35.jpg", 1],
+    ["DatasetA/Testing/px47.jpg", 1]
+  ]
+}
diff --git a/src/ptbench/data/indian/fold_6.json.bz2 b/src/ptbench/data/indian/fold_6.json.bz2
deleted file mode 100644
index f41ea98ea4e6c79d6f2abc5e26fd8ffca27e7310..0000000000000000000000000000000000000000
Binary files a/src/ptbench/data/indian/fold_6.json.bz2 and /dev/null differ
diff --git a/src/ptbench/data/indian/fold_6.py b/src/ptbench/data/indian/fold_6.py
index cf1ba7eec0c8be7287ea8592c2174c5dc4b23a0c..2f8e8e320a4cae35036bdeacc964c996d979e9fb 100644
--- a/src/ptbench/data/indian/fold_6.py
+++ b/src/ptbench/data/indian/fold_6.py
@@ -2,45 +2,6 @@
 #
 # SPDX-License-Identifier: GPL-3.0-or-later
 
-"""Indian dataset for TB detection (cross validation fold 6)
+from .datamodule import DataModule
 
-* Split reference: [INDIAN-2013]_ with 20% of train set for the validation set
-* This configuration resolution: 512 x 512 (default)
-* See :py:mod:`ptbench.data.indian` for dataset details
-"""
-
-from clapper.logging import setup
-
-from .. import return_subsets
-from ..base_datamodule import BaseDataModule
-from . import _maker
-
-logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s")
-
-
-class Fold0Module(BaseDataModule):
-    def __init__(
-        self,
-        train_batch_size=1,
-        predict_batch_size=1,
-        drop_incomplete_batch=False,
-        multiproc_kwargs=None,
-    ):
-        super().__init__(
-            train_batch_size=train_batch_size,
-            predict_batch_size=predict_batch_size,
-            drop_incomplete_batch=drop_incomplete_batch,
-            multiproc_kwargs=multiproc_kwargs,
-        )
-
-    def setup(self, stage: str):
-        self.dataset = _maker("fold_6")
-        (
-            self.train_dataset,
-            self.validation_dataset,
-            self.extra_validation_datasets,
-            self.predict_dataset,
-        ) = return_subsets(self.dataset)
-
-
-datamodule = Fold0Module
+datamodule = DataModule("fold_6.json.bz2")
diff --git a/src/ptbench/data/indian/fold_7.json b/src/ptbench/data/indian/fold_7.json
new file mode 100644
index 0000000000000000000000000000000000000000..6cecc46a22967631dde28968f47035792c3860a9
--- /dev/null
+++ b/src/ptbench/data/indian/fold_7.json
@@ -0,0 +1,163 @@
+{
+  "train": [
+    ["DatasetA/Training/nx25.jpg", 0],
+    ["DatasetA/Testing/nx16.jpg", 0],
+    ["DatasetA/Training/nx27.jpg", 0],
+    ["DatasetA/Training/px36.jpg", 1],
+    ["DatasetA/Training/nx21.jpg", 0],
+    ["DatasetA/Testing/px52.jpg", 1],
+    ["DatasetA/Training/nx42.jpg", 0],
+    ["DatasetA/Training/nx31.jpg", 0],
+    ["DatasetA/Training/nx13.jpg", 0],
+    ["DatasetA/Training/px6.jpg", 1],
+    ["DatasetA/Testing/nx4.jpg", 0],
+    ["DatasetA/Testing/px50.jpg", 1],
+    ["DatasetA/Training/px44.jpg", 1],
+    ["DatasetA/Training/nx36.jpg", 0],
+    ["DatasetA/Training/nx39.jpg", 0],
+    ["DatasetA/Testing/nx5.jpg", 0],
+    ["DatasetA/Testing/px37.jpg", 1],
+    ["DatasetA/Training/nx40.jpg", 0],
+    ["DatasetA/Training/px12.jpg", 1],
+    ["DatasetA/Testing/nx2.jpg", 0],
+    ["DatasetA/Training/px39.jpg", 1],
+    ["DatasetA/Training/px9.jpg", 1],
+    ["DatasetA/Training/px40.jpg", 1],
+    ["DatasetA/Training/px49.jpg", 1],
+    ["DatasetA/Training/px29.jpg", 1],
+    ["DatasetA/Training/px16.jpg", 1],
+    ["DatasetA/Training/nx7.jpg", 0],
+    ["DatasetA/Testing/nx12.jpg", 0],
+    ["DatasetA/Training/nx22.jpg", 0],
+    ["DatasetA/Training/nx9.jpg", 0],
+    ["DatasetA/Training/nx43.jpg", 0],
+    ["DatasetA/Testing/px39.jpg", 1],
+    ["DatasetA/Training/px19.jpg", 1],
+    ["DatasetA/Training/nx5.jpg", 0],
+    ["DatasetA/Training/px51.jpg", 1],
+    ["DatasetA/Training/px20.jpg", 1],
+    ["DatasetA/Training/px18.jpg", 1],
+    ["DatasetA/Testing/px40.jpg", 1],
+    ["DatasetA/Testing/px28.jpg", 1],
+    ["DatasetA/Testing/nx15.jpg", 0],
+    ["DatasetA/Training/px47.jpg", 1],
+    ["DatasetA/Testing/nx19.jpg", 0],
+    ["DatasetA/Training/px48.jpg", 1],
+    ["DatasetA/Training/nx29.jpg", 0],
+    ["DatasetA/Testing/nx25.jpg", 0],
+    ["DatasetA/Testing/nx3.jpg", 0],
+    ["DatasetA/Training/px24.jpg", 1],
+    ["DatasetA/Testing/px35.jpg", 1],
+    ["DatasetA/Training/nx16.jpg", 0],
+    ["DatasetA/Training/nx41.jpg", 0],
+    ["DatasetA/Testing/nx17.jpg", 0],
+    ["DatasetA/Training/nx28.jpg", 0],
+    ["DatasetA/Testing/nx26.jpg", 0],
+    ["DatasetA/Training/px37.jpg", 1],
+    ["DatasetA/Training/px25.jpg", 1],
+    ["DatasetA/Training/px46.jpg", 1],
+    ["DatasetA/Training/nx30.jpg", 0],
+    ["DatasetA/Training/nx35.jpg", 0],
+    ["DatasetA/Training/px28.jpg", 1],
+    ["DatasetA/Training/px13.jpg", 1],
+    ["DatasetA/Testing/px41.jpg", 1],
+    ["DatasetA/Training/nx44.jpg", 0],
+    ["DatasetA/Testing/nx24.jpg", 0],
+    ["DatasetA/Testing/nx10.jpg", 0],
+    ["DatasetA/Testing/nx23.jpg", 0],
+    ["DatasetA/Training/nx2.jpg", 0],
+    ["DatasetA/Testing/px33.jpg", 1],
+    ["DatasetA/Training/nx3.jpg", 0],
+    ["DatasetA/Training/nx32.jpg", 0],
+    ["DatasetA/Testing/px43.jpg", 1],
+    ["DatasetA/Testing/px34.jpg", 1],
+    ["DatasetA/Training/px22.jpg", 1],
+    ["DatasetA/Training/px33.jpg", 1],
+    ["DatasetA/Training/nx37.jpg", 0],
+    ["DatasetA/Training/nx45.jpg", 0],
+    ["DatasetA/Testing/nx6.jpg", 0],
+    ["DatasetA/Training/px17.jpg", 1],
+    ["DatasetA/Training/nx4.jpg", 0],
+    ["DatasetA/Testing/px32.jpg", 1],
+    ["DatasetA/Training/px50.jpg", 1],
+    ["DatasetA/Training/px35.jpg", 1],
+    ["DatasetA/Training/px15.jpg", 1],
+    ["DatasetA/Training/nx1.jpg", 0],
+    ["DatasetA/Training/nx8.jpg", 0],
+    ["DatasetA/Training/nx26.jpg", 0],
+    ["DatasetA/Testing/px49.jpg", 1],
+    ["DatasetA/Training/nx6.jpg", 0],
+    ["DatasetA/Training/nx50.jpg", 0],
+    ["DatasetA/Training/px4.jpg", 1],
+    ["DatasetA/Training/px1.jpg", 1],
+    ["DatasetA/Testing/px51.jpg", 1],
+    ["DatasetA/Training/px32.jpg", 1],
+    ["DatasetA/Testing/px48.jpg", 1],
+    ["DatasetA/Training/nx33.jpg", 0],
+    ["DatasetA/Testing/nx1.jpg", 0],
+    ["DatasetA/Training/px31.jpg", 1],
+    ["DatasetA/Training/nx34.jpg", 0],
+    ["DatasetA/Testing/px36.jpg", 1],
+    ["DatasetA/Testing/nx20.jpg", 0],
+    ["DatasetA/Testing/nx7.jpg", 0],
+    ["DatasetA/Training/nx17.jpg", 0],
+    ["DatasetA/Training/px3.jpg", 1],
+    ["DatasetA/Testing/nx13.jpg", 0],
+    ["DatasetA/Training/px52.jpg", 1],
+    ["DatasetA/Testing/px47.jpg", 1],
+    ["DatasetA/Testing/px42.jpg", 1],
+    ["DatasetA/Training/px7.jpg", 1],
+    ["DatasetA/Training/nx38.jpg", 0],
+    ["DatasetA/Training/px42.jpg", 1],
+    ["DatasetA/Testing/nx18.jpg", 0],
+    ["DatasetA/Training/px43.jpg", 1],
+    ["DatasetA/Testing/px29.jpg", 1]
+  ],
+  "validation": [
+    ["DatasetA/Testing/nx9.jpg", 0],
+    ["DatasetA/Testing/px45.jpg", 1],
+    ["DatasetA/Training/px21.jpg", 1],
+    ["DatasetA/Training/nx20.jpg", 0],
+    ["DatasetA/Training/px45.jpg", 1],
+    ["DatasetA/Training/nx47.jpg", 0],
+    ["DatasetA/Training/nx52.jpg", 0],
+    ["DatasetA/Training/nx18.jpg", 0],
+    ["DatasetA/Training/px26.jpg", 1],
+    ["DatasetA/Training/px8.jpg", 1],
+    ["DatasetA/Training/nx10.jpg", 0],
+    ["DatasetA/Testing/nx8.jpg", 0],
+    ["DatasetA/Testing/nx14.jpg", 0],
+    ["DatasetA/Training/px41.jpg", 1],
+    ["DatasetA/Training/nx46.jpg", 0],
+    ["DatasetA/Training/px23.jpg", 1],
+    ["DatasetA/Training/px5.jpg", 1],
+    ["DatasetA/Testing/px46.jpg", 1],
+    ["DatasetA/Testing/px38.jpg", 1],
+    ["DatasetA/Testing/nx11.jpg", 0],
+    ["DatasetA/Testing/nx22.jpg", 0],
+    ["DatasetA/Training/nx24.jpg", 0],
+    ["DatasetA/Testing/nx21.jpg", 0],
+    ["DatasetA/Training/px10.jpg", 1],
+    ["DatasetA/Training/px11.jpg", 1],
+    ["DatasetA/Training/nx15.jpg", 0],
+    ["DatasetA/Testing/px27.jpg", 1],
+    ["DatasetA/Testing/px31.jpg", 1]
+  ],
+  "test": [
+    ["DatasetA/Training/nx19.jpg", 0],
+    ["DatasetA/Training/nx11.jpg", 0],
+    ["DatasetA/Training/nx12.jpg", 0],
+    ["DatasetA/Training/nx14.jpg", 0],
+    ["DatasetA/Training/nx49.jpg", 0],
+    ["DatasetA/Training/nx48.jpg", 0],
+    ["DatasetA/Training/px38.jpg", 1],
+    ["DatasetA/Training/px14.jpg", 1],
+    ["DatasetA/Training/px34.jpg", 1],
+    ["DatasetA/Training/px30.jpg", 1],
+    ["DatasetA/Training/px27.jpg", 1],
+    ["DatasetA/Training/px2.jpg", 1],
+    ["DatasetA/Training/nx51.jpg", 0],
+    ["DatasetA/Testing/px44.jpg", 1],
+    ["DatasetA/Testing/px30.jpg", 1]
+  ]
+}
diff --git a/src/ptbench/data/indian/fold_7.json.bz2 b/src/ptbench/data/indian/fold_7.json.bz2
deleted file mode 100644
index ea48efb36b52774907d720317cec34ebf1479a5a..0000000000000000000000000000000000000000
Binary files a/src/ptbench/data/indian/fold_7.json.bz2 and /dev/null differ
diff --git a/src/ptbench/data/indian/fold_7.py b/src/ptbench/data/indian/fold_7.py
index 49a4d8f1232a4eabb38432568f16eb3f7e4894a3..389e7f4e58a621ff777547760a7c834aeb518efc 100644
--- a/src/ptbench/data/indian/fold_7.py
+++ b/src/ptbench/data/indian/fold_7.py
@@ -2,45 +2,6 @@
 #
 # SPDX-License-Identifier: GPL-3.0-or-later
 
-"""Indian dataset for TB detection (cross validation fold 7)
+from .datamodule import DataModule
 
-* Split reference: [INDIAN-2013]_ with 20% of train set for the validation set
-* This configuration resolution: 512 x 512 (default)
-* See :py:mod:`ptbench.data.indian` for dataset details
-"""
-
-from clapper.logging import setup
-
-from .. import return_subsets
-from ..base_datamodule import BaseDataModule
-from . import _maker
-
-logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s")
-
-
-class Fold0Module(BaseDataModule):
-    def __init__(
-        self,
-        train_batch_size=1,
-        predict_batch_size=1,
-        drop_incomplete_batch=False,
-        multiproc_kwargs=None,
-    ):
-        super().__init__(
-            train_batch_size=train_batch_size,
-            predict_batch_size=predict_batch_size,
-            drop_incomplete_batch=drop_incomplete_batch,
-            multiproc_kwargs=multiproc_kwargs,
-        )
-
-    def setup(self, stage: str):
-        self.dataset = _maker("fold_7")
-        (
-            self.train_dataset,
-            self.validation_dataset,
-            self.extra_validation_datasets,
-            self.predict_dataset,
-        ) = return_subsets(self.dataset)
-
-
-datamodule = Fold0Module
+datamodule = DataModule("fold_7.json.bz2")
diff --git a/src/ptbench/data/indian/fold_8.json b/src/ptbench/data/indian/fold_8.json
new file mode 100644
index 0000000000000000000000000000000000000000..5a4d1865e82091adc6d99b3250d3eb4d519ad580
--- /dev/null
+++ b/src/ptbench/data/indian/fold_8.json
@@ -0,0 +1,163 @@
+{
+  "train": [
+    ["DatasetA/Testing/nx13.jpg", 0],
+    ["DatasetA/Testing/nx4.jpg", 0],
+    ["DatasetA/Training/nx29.jpg", 0],
+    ["DatasetA/Training/px37.jpg", 1],
+    ["DatasetA/Training/nx22.jpg", 0],
+    ["DatasetA/Training/px31.jpg", 1],
+    ["DatasetA/Testing/px30.jpg", 1],
+    ["DatasetA/Testing/nx24.jpg", 0],
+    ["DatasetA/Training/nx15.jpg", 0],
+    ["DatasetA/Testing/nx3.jpg", 0],
+    ["DatasetA/Training/nx37.jpg", 0],
+    ["DatasetA/Testing/px32.jpg", 1],
+    ["DatasetA/Training/nx10.jpg", 0],
+    ["DatasetA/Training/nx30.jpg", 0],
+    ["DatasetA/Training/nx24.jpg", 0],
+    ["DatasetA/Testing/px47.jpg", 1],
+    ["DatasetA/Training/nx16.jpg", 0],
+    ["DatasetA/Training/px41.jpg", 1],
+    ["DatasetA/Training/nx31.jpg", 0],
+    ["DatasetA/Training/nx17.jpg", 0],
+    ["DatasetA/Training/nx42.jpg", 0],
+    ["DatasetA/Testing/nx8.jpg", 0],
+    ["DatasetA/Training/px33.jpg", 1],
+    ["DatasetA/Training/px26.jpg", 1],
+    ["DatasetA/Training/px18.jpg", 1],
+    ["DatasetA/Training/nx51.jpg", 0],
+    ["DatasetA/Training/px20.jpg", 1],
+    ["DatasetA/Testing/px50.jpg", 1],
+    ["DatasetA/Training/nx34.jpg", 0],
+    ["DatasetA/Training/px47.jpg", 1],
+    ["DatasetA/Training/nx52.jpg", 0],
+    ["DatasetA/Training/px9.jpg", 1],
+    ["DatasetA/Training/nx46.jpg", 0],
+    ["DatasetA/Training/nx3.jpg", 0],
+    ["DatasetA/Testing/nx20.jpg", 0],
+    ["DatasetA/Training/px7.jpg", 1],
+    ["DatasetA/Training/px43.jpg", 1],
+    ["DatasetA/Training/px16.jpg", 1],
+    ["DatasetA/Training/px22.jpg", 1],
+    ["DatasetA/Testing/px46.jpg", 1],
+    ["DatasetA/Testing/px42.jpg", 1],
+    ["DatasetA/Testing/nx7.jpg", 0],
+    ["DatasetA/Training/nx43.jpg", 0],
+    ["DatasetA/Training/nx18.jpg", 0],
+    ["DatasetA/Training/px28.jpg", 1],
+    ["DatasetA/Testing/nx18.jpg", 0],
+    ["DatasetA/Training/px10.jpg", 1],
+    ["DatasetA/Training/px15.jpg", 1],
+    ["DatasetA/Training/nx12.jpg", 0],
+    ["DatasetA/Training/nx32.jpg", 0],
+    ["DatasetA/Testing/nx5.jpg", 0],
+    ["DatasetA/Training/px21.jpg", 1],
+    ["DatasetA/Training/px35.jpg", 1],
+    ["DatasetA/Training/px4.jpg", 1],
+    ["DatasetA/Training/px46.jpg", 1],
+    ["DatasetA/Training/px25.jpg", 1],
+    ["DatasetA/Testing/px33.jpg", 1],
+    ["DatasetA/Training/px49.jpg", 1],
+    ["DatasetA/Testing/px44.jpg", 1],
+    ["DatasetA/Training/nx27.jpg", 0],
+    ["DatasetA/Testing/nx21.jpg", 0],
+    ["DatasetA/Testing/px31.jpg", 1],
+    ["DatasetA/Testing/nx1.jpg", 0],
+    ["DatasetA/Testing/px43.jpg", 1],
+    ["DatasetA/Training/px32.jpg", 1],
+    ["DatasetA/Testing/nx17.jpg", 0],
+    ["DatasetA/Testing/nx25.jpg", 0],
+    ["DatasetA/Training/px12.jpg", 1],
+    ["DatasetA/Training/nx1.jpg", 0],
+    ["DatasetA/Training/px36.jpg", 1],
+    ["DatasetA/Testing/nx23.jpg", 0],
+    ["DatasetA/Testing/nx2.jpg", 0],
+    ["DatasetA/Training/px38.jpg", 1],
+    ["DatasetA/Training/px40.jpg", 1],
+    ["DatasetA/Testing/px37.jpg", 1],
+    ["DatasetA/Testing/px34.jpg", 1],
+    ["DatasetA/Training/px50.jpg", 1],
+    ["DatasetA/Training/px34.jpg", 1],
+    ["DatasetA/Training/px11.jpg", 1],
+    ["DatasetA/Testing/nx9.jpg", 0],
+    ["DatasetA/Training/px14.jpg", 1],
+    ["DatasetA/Testing/px45.jpg", 1],
+    ["DatasetA/Training/nx21.jpg", 0],
+    ["DatasetA/Training/nx9.jpg", 0],
+    ["DatasetA/Training/nx41.jpg", 0],
+    ["DatasetA/Training/nx4.jpg", 0],
+    ["DatasetA/Training/nx36.jpg", 0],
+    ["DatasetA/Training/px52.jpg", 1],
+    ["DatasetA/Training/px17.jpg", 1],
+    ["DatasetA/Training/nx20.jpg", 0],
+    ["DatasetA/Testing/px51.jpg", 1],
+    ["DatasetA/Training/px27.jpg", 1],
+    ["DatasetA/Training/nx28.jpg", 0],
+    ["DatasetA/Testing/nx16.jpg", 0],
+    ["DatasetA/Testing/nx6.jpg", 0],
+    ["DatasetA/Training/nx5.jpg", 0],
+    ["DatasetA/Training/px51.jpg", 1],
+    ["DatasetA/Training/px39.jpg", 1],
+    ["DatasetA/Training/nx44.jpg", 0],
+    ["DatasetA/Training/nx49.jpg", 0],
+    ["DatasetA/Training/px24.jpg", 1],
+    ["DatasetA/Training/nx33.jpg", 0],
+    ["DatasetA/Testing/nx14.jpg", 0],
+    ["DatasetA/Testing/nx11.jpg", 0],
+    ["DatasetA/Testing/px29.jpg", 1],
+    ["DatasetA/Training/nx48.jpg", 0],
+    ["DatasetA/Testing/px36.jpg", 1],
+    ["DatasetA/Testing/px35.jpg", 1],
+    ["DatasetA/Training/nx8.jpg", 0],
+    ["DatasetA/Training/px29.jpg", 1],
+    ["DatasetA/Training/px23.jpg", 1],
+    ["DatasetA/Training/nx11.jpg", 0]
+  ],
+  "validation": [
+    ["DatasetA/Training/nx40.jpg", 0],
+    ["DatasetA/Training/nx50.jpg", 0],
+    ["DatasetA/Training/nx25.jpg", 0],
+    ["DatasetA/Training/nx38.jpg", 0],
+    ["DatasetA/Training/px3.jpg", 1],
+    ["DatasetA/Testing/nx26.jpg", 0],
+    ["DatasetA/Testing/px49.jpg", 1],
+    ["DatasetA/Training/nx35.jpg", 0],
+    ["DatasetA/Testing/nx10.jpg", 0],
+    ["DatasetA/Training/px19.jpg", 1],
+    ["DatasetA/Training/px8.jpg", 1],
+    ["DatasetA/Training/px42.jpg", 1],
+    ["DatasetA/Testing/nx15.jpg", 0],
+    ["DatasetA/Training/px30.jpg", 1],
+    ["DatasetA/Training/nx39.jpg", 0],
+    ["DatasetA/Training/px1.jpg", 1],
+    ["DatasetA/Training/px45.jpg", 1],
+    ["DatasetA/Testing/px28.jpg", 1],
+    ["DatasetA/Training/px6.jpg", 1],
+    ["DatasetA/Training/nx19.jpg", 0],
+    ["DatasetA/Training/nx6.jpg", 0],
+    ["DatasetA/Testing/px38.jpg", 1],
+    ["DatasetA/Training/px44.jpg", 1],
+    ["DatasetA/Training/px5.jpg", 1],
+    ["DatasetA/Testing/nx22.jpg", 0],
+    ["DatasetA/Training/px2.jpg", 1],
+    ["DatasetA/Training/nx14.jpg", 0],
+    ["DatasetA/Training/nx2.jpg", 0]
+  ],
+  "test": [
+    ["DatasetA/Training/nx13.jpg", 0],
+    ["DatasetA/Training/nx26.jpg", 0],
+    ["DatasetA/Training/nx7.jpg", 0],
+    ["DatasetA/Training/nx47.jpg", 0],
+    ["DatasetA/Training/px48.jpg", 1],
+    ["DatasetA/Training/nx45.jpg", 0],
+    ["DatasetA/Training/px13.jpg", 1],
+    ["DatasetA/Testing/nx19.jpg", 0],
+    ["DatasetA/Testing/nx12.jpg", 0],
+    ["DatasetA/Testing/px48.jpg", 1],
+    ["DatasetA/Testing/px27.jpg", 1],
+    ["DatasetA/Testing/px40.jpg", 1],
+    ["DatasetA/Testing/px39.jpg", 1],
+    ["DatasetA/Testing/px52.jpg", 1],
+    ["DatasetA/Testing/px41.jpg", 1]
+  ]
+}
diff --git a/src/ptbench/data/indian/fold_8.json.bz2 b/src/ptbench/data/indian/fold_8.json.bz2
deleted file mode 100644
index 49a6c2341e86184469f963359c4d2102ed5249ed..0000000000000000000000000000000000000000
Binary files a/src/ptbench/data/indian/fold_8.json.bz2 and /dev/null differ
diff --git a/src/ptbench/data/indian/fold_8.py b/src/ptbench/data/indian/fold_8.py
index 9f6340746cce79c973950316bb73e1f3614a9500..a948035965aacef3c249b44cc07eff9e124aa51c 100644
--- a/src/ptbench/data/indian/fold_8.py
+++ b/src/ptbench/data/indian/fold_8.py
@@ -2,45 +2,6 @@
 #
 # SPDX-License-Identifier: GPL-3.0-or-later
 
-"""Indian dataset for TB detection (cross validation fold 8)
+from .datamodule import DataModule
 
-* Split reference: [INDIAN-2013]_ with 20% of train set for the validation set
-* This configuration resolution: 512 x 512 (default)
-* See :py:mod:`ptbench.data.indian` for dataset details
-"""
-
-from clapper.logging import setup
-
-from .. import return_subsets
-from ..base_datamodule import BaseDataModule
-from . import _maker
-
-logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s")
-
-
-class Fold0Module(BaseDataModule):
-    def __init__(
-        self,
-        train_batch_size=1,
-        predict_batch_size=1,
-        drop_incomplete_batch=False,
-        multiproc_kwargs=None,
-    ):
-        super().__init__(
-            train_batch_size=train_batch_size,
-            predict_batch_size=predict_batch_size,
-            drop_incomplete_batch=drop_incomplete_batch,
-            multiproc_kwargs=multiproc_kwargs,
-        )
-
-    def setup(self, stage: str):
-        self.dataset = _maker("fold_8")
-        (
-            self.train_dataset,
-            self.validation_dataset,
-            self.extra_validation_datasets,
-            self.predict_dataset,
-        ) = return_subsets(self.dataset)
-
-
-datamodule = Fold0Module
+datamodule = DataModule("fold_8.json.bz2")
diff --git a/src/ptbench/data/indian/fold_9.json b/src/ptbench/data/indian/fold_9.json
new file mode 100644
index 0000000000000000000000000000000000000000..bb906259ca87226b90142a59478f5059ac3dd1aa
--- /dev/null
+++ b/src/ptbench/data/indian/fold_9.json
@@ -0,0 +1,163 @@
+{
+  "train": [
+    ["DatasetA/Training/nx35.jpg", 0],
+    ["DatasetA/Testing/px30.jpg", 1],
+    ["DatasetA/Training/px35.jpg", 1],
+    ["DatasetA/Testing/px39.jpg", 1],
+    ["DatasetA/Training/nx44.jpg", 0],
+    ["DatasetA/Training/nx47.jpg", 0],
+    ["DatasetA/Training/nx2.jpg", 0],
+    ["DatasetA/Testing/nx13.jpg", 0],
+    ["DatasetA/Training/nx28.jpg", 0],
+    ["DatasetA/Testing/px41.jpg", 1],
+    ["DatasetA/Training/nx10.jpg", 0],
+    ["DatasetA/Training/nx17.jpg", 0],
+    ["DatasetA/Testing/nx24.jpg", 0],
+    ["DatasetA/Testing/px48.jpg", 1],
+    ["DatasetA/Training/px49.jpg", 1],
+    ["DatasetA/Training/nx37.jpg", 0],
+    ["DatasetA/Training/nx21.jpg", 0],
+    ["DatasetA/Training/px51.jpg", 1],
+    ["DatasetA/Testing/px32.jpg", 1],
+    ["DatasetA/Training/px27.jpg", 1],
+    ["DatasetA/Training/nx12.jpg", 0],
+    ["DatasetA/Training/px12.jpg", 1],
+    ["DatasetA/Training/px46.jpg", 1],
+    ["DatasetA/Training/nx33.jpg", 0],
+    ["DatasetA/Training/px10.jpg", 1],
+    ["DatasetA/Training/px19.jpg", 1],
+    ["DatasetA/Training/px29.jpg", 1],
+    ["DatasetA/Training/nx43.jpg", 0],
+    ["DatasetA/Testing/nx19.jpg", 0],
+    ["DatasetA/Training/nx34.jpg", 0],
+    ["DatasetA/Training/nx45.jpg", 0],
+    ["DatasetA/Training/px30.jpg", 1],
+    ["DatasetA/Testing/nx2.jpg", 0],
+    ["DatasetA/Training/px24.jpg", 1],
+    ["DatasetA/Training/nx49.jpg", 0],
+    ["DatasetA/Training/px21.jpg", 1],
+    ["DatasetA/Training/px43.jpg", 1],
+    ["DatasetA/Training/nx7.jpg", 0],
+    ["DatasetA/Training/px44.jpg", 1],
+    ["DatasetA/Training/px41.jpg", 1],
+    ["DatasetA/Training/px9.jpg", 1],
+    ["DatasetA/Training/px47.jpg", 1],
+    ["DatasetA/Testing/nx21.jpg", 0],
+    ["DatasetA/Training/px32.jpg", 1],
+    ["DatasetA/Training/nx38.jpg", 0],
+    ["DatasetA/Training/px23.jpg", 1],
+    ["DatasetA/Training/nx50.jpg", 0],
+    ["DatasetA/Training/nx51.jpg", 0],
+    ["DatasetA/Training/px34.jpg", 1],
+    ["DatasetA/Testing/px51.jpg", 1],
+    ["DatasetA/Training/nx40.jpg", 0],
+    ["DatasetA/Training/nx11.jpg", 0],
+    ["DatasetA/Training/nx31.jpg", 0],
+    ["DatasetA/Testing/nx5.jpg", 0],
+    ["DatasetA/Testing/px49.jpg", 1],
+    ["DatasetA/Training/px52.jpg", 1],
+    ["DatasetA/Testing/nx10.jpg", 0],
+    ["DatasetA/Testing/px44.jpg", 1],
+    ["DatasetA/Training/px2.jpg", 1],
+    ["DatasetA/Training/nx48.jpg", 0],
+    ["DatasetA/Training/nx18.jpg", 0],
+    ["DatasetA/Training/px4.jpg", 1],
+    ["DatasetA/Training/nx8.jpg", 0],
+    ["DatasetA/Training/px16.jpg", 1],
+    ["DatasetA/Testing/px45.jpg", 1],
+    ["DatasetA/Training/nx1.jpg", 0],
+    ["DatasetA/Testing/px33.jpg", 1],
+    ["DatasetA/Training/nx52.jpg", 0],
+    ["DatasetA/Training/nx19.jpg", 0],
+    ["DatasetA/Training/nx13.jpg", 0],
+    ["DatasetA/Training/nx9.jpg", 0],
+    ["DatasetA/Testing/nx26.jpg", 0],
+    ["DatasetA/Testing/px46.jpg", 1],
+    ["DatasetA/Training/nx22.jpg", 0],
+    ["DatasetA/Testing/nx14.jpg", 0],
+    ["DatasetA/Training/nx25.jpg", 0],
+    ["DatasetA/Testing/px52.jpg", 1],
+    ["DatasetA/Training/px3.jpg", 1],
+    ["DatasetA/Testing/nx22.jpg", 0],
+    ["DatasetA/Testing/nx4.jpg", 0],
+    ["DatasetA/Training/px7.jpg", 1],
+    ["DatasetA/Testing/px34.jpg", 1],
+    ["DatasetA/Training/px11.jpg", 1],
+    ["DatasetA/Testing/nx11.jpg", 0],
+    ["DatasetA/Testing/px43.jpg", 1],
+    ["DatasetA/Testing/nx16.jpg", 0],
+    ["DatasetA/Training/px37.jpg", 1],
+    ["DatasetA/Training/nx5.jpg", 0],
+    ["DatasetA/Training/px1.jpg", 1],
+    ["DatasetA/Training/px40.jpg", 1],
+    ["DatasetA/Training/px26.jpg", 1],
+    ["DatasetA/Training/px38.jpg", 1],
+    ["DatasetA/Testing/px27.jpg", 1],
+    ["DatasetA/Testing/nx6.jpg", 0],
+    ["DatasetA/Training/nx24.jpg", 0],
+    ["DatasetA/Training/nx29.jpg", 0],
+    ["DatasetA/Testing/px50.jpg", 1],
+    ["DatasetA/Training/px14.jpg", 1],
+    ["DatasetA/Testing/nx20.jpg", 0],
+    ["DatasetA/Training/nx30.jpg", 0],
+    ["DatasetA/Training/nx3.jpg", 0],
+    ["DatasetA/Testing/nx9.jpg", 0],
+    ["DatasetA/Testing/nx18.jpg", 0],
+    ["DatasetA/Testing/px36.jpg", 1],
+    ["DatasetA/Training/px13.jpg", 1],
+    ["DatasetA/Training/px5.jpg", 1],
+    ["DatasetA/Testing/px28.jpg", 1],
+    ["DatasetA/Testing/nx3.jpg", 0],
+    ["DatasetA/Testing/nx12.jpg", 0],
+    ["DatasetA/Training/px48.jpg", 1],
+    ["DatasetA/Training/px31.jpg", 1],
+    ["DatasetA/Testing/px31.jpg", 1]
+  ],
+  "validation": [
+    ["DatasetA/Training/nx15.jpg", 0],
+    ["DatasetA/Training/px6.jpg", 1],
+    ["DatasetA/Training/nx4.jpg", 0],
+    ["DatasetA/Training/nx6.jpg", 0],
+    ["DatasetA/Training/px33.jpg", 1],
+    ["DatasetA/Training/nx36.jpg", 0],
+    ["DatasetA/Testing/px42.jpg", 1],
+    ["DatasetA/Testing/px38.jpg", 1],
+    ["DatasetA/Training/nx26.jpg", 0],
+    ["DatasetA/Training/nx14.jpg", 0],
+    ["DatasetA/Training/nx39.jpg", 0],
+    ["DatasetA/Testing/nx17.jpg", 0],
+    ["DatasetA/Testing/nx1.jpg", 0],
+    ["DatasetA/Training/nx42.jpg", 0],
+    ["DatasetA/Training/nx32.jpg", 0],
+    ["DatasetA/Training/px25.jpg", 1],
+    ["DatasetA/Testing/px37.jpg", 1],
+    ["DatasetA/Testing/nx8.jpg", 0],
+    ["DatasetA/Training/px50.jpg", 1],
+    ["DatasetA/Testing/px35.jpg", 1],
+    ["DatasetA/Testing/px47.jpg", 1],
+    ["DatasetA/Training/px45.jpg", 1],
+    ["DatasetA/Training/nx41.jpg", 0],
+    ["DatasetA/Training/nx46.jpg", 0],
+    ["DatasetA/Training/px42.jpg", 1],
+    ["DatasetA/Training/px20.jpg", 1],
+    ["DatasetA/Testing/px29.jpg", 1],
+    ["DatasetA/Testing/px40.jpg", 1]
+  ],
+  "test": [
+    ["DatasetA/Training/nx20.jpg", 0],
+    ["DatasetA/Training/nx16.jpg", 0],
+    ["DatasetA/Training/nx27.jpg", 0],
+    ["DatasetA/Training/px15.jpg", 1],
+    ["DatasetA/Training/px8.jpg", 1],
+    ["DatasetA/Training/px28.jpg", 1],
+    ["DatasetA/Training/px17.jpg", 1],
+    ["DatasetA/Training/px39.jpg", 1],
+    ["DatasetA/Training/px22.jpg", 1],
+    ["DatasetA/Training/px36.jpg", 1],
+    ["DatasetA/Training/px18.jpg", 1],
+    ["DatasetA/Testing/nx25.jpg", 0],
+    ["DatasetA/Testing/nx15.jpg", 0],
+    ["DatasetA/Testing/nx23.jpg", 0],
+    ["DatasetA/Testing/nx7.jpg", 0]
+  ]
+}
diff --git a/src/ptbench/data/indian/fold_9.json.bz2 b/src/ptbench/data/indian/fold_9.json.bz2
deleted file mode 100644
index 27a3fb922ea3f408d707e2314a4ed8bcb58b7079..0000000000000000000000000000000000000000
Binary files a/src/ptbench/data/indian/fold_9.json.bz2 and /dev/null differ
diff --git a/src/ptbench/data/indian/fold_9.py b/src/ptbench/data/indian/fold_9.py
index a406bc17902a2904daf5db13a230f89a32b3f8e4..daa85e030720898578ef65a0318f90701b808abf 100644
--- a/src/ptbench/data/indian/fold_9.py
+++ b/src/ptbench/data/indian/fold_9.py
@@ -2,45 +2,6 @@
 #
 # SPDX-License-Identifier: GPL-3.0-or-later
 
-"""Indian dataset for TB detection (cross validation fold 9)
+from .datamodule import DataModule
 
-* Split reference: [INDIAN-2013]_ with 20% of train set for the validation set
-* This configuration resolution: 512 x 512 (default)
-* See :py:mod:`ptbench.data.indian` for dataset details
-"""
-
-from clapper.logging import setup
-
-from .. import return_subsets
-from ..base_datamodule import BaseDataModule
-from . import _maker
-
-logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s")
-
-
-class Fold0Module(BaseDataModule):
-    def __init__(
-        self,
-        train_batch_size=1,
-        predict_batch_size=1,
-        drop_incomplete_batch=False,
-        multiproc_kwargs=None,
-    ):
-        super().__init__(
-            train_batch_size=train_batch_size,
-            predict_batch_size=predict_batch_size,
-            drop_incomplete_batch=drop_incomplete_batch,
-            multiproc_kwargs=multiproc_kwargs,
-        )
-
-    def setup(self, stage: str):
-        self.dataset = _maker("fold_9")
-        (
-            self.train_dataset,
-            self.validation_dataset,
-            self.extra_validation_datasets,
-            self.predict_dataset,
-        ) = return_subsets(self.dataset)
-
-
-datamodule = Fold0Module
+datamodule = DataModule("fold_9.json.bz2")