diff --git a/src/ptbench/data/montgomery/__init__.py b/src/ptbench/data/montgomery/__init__.py
index 65239cbf5d908075346675ad10e7c86569383f77..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 100644
--- a/src/ptbench/data/montgomery/__init__.py
+++ b/src/ptbench/data/montgomery/__init__.py
@@ -1,88 +0,0 @@
-# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
-#
-# SPDX-License-Identifier: GPL-3.0-or-later
-
-"""Montgomery dataset for computer-aided diagnosis.
-
-The Montgomery database has been established to foster research
-in computer-aided diagnosis of pulmonary diseases with a special
-focus on pulmonary tuberculosis (TB).
-
-* Reference: [MONTGOMERY-SHENZHEN-2014]_
-* Original resolution (height x width or width x height): 4020 x 4892
-* Split reference: none
-* Protocol ``default``:
-
-  * Training samples: 64% of TB and healthy CXR (including labels)
-  * Validation samples: 16% of TB and healthy CXR (including labels)
-  * Test samples: 20% of TB and healthy CXR (including labels)
-"""
-
-import importlib.resources
-import os
-
-from ...utils.rc import load_rc
-from .. import make_dataset
-from ..dataset import JSONDataset
-from ..loader import load_pil_baw, make_delayed
-
-_protocols = [
-    importlib.resources.files(__name__).joinpath("default.json.bz2"),
-    importlib.resources.files(__name__).joinpath("fold_0.json.bz2"),
-    importlib.resources.files(__name__).joinpath("fold_1.json.bz2"),
-    importlib.resources.files(__name__).joinpath("fold_2.json.bz2"),
-    importlib.resources.files(__name__).joinpath("fold_3.json.bz2"),
-    importlib.resources.files(__name__).joinpath("fold_4.json.bz2"),
-    importlib.resources.files(__name__).joinpath("fold_5.json.bz2"),
-    importlib.resources.files(__name__).joinpath("fold_6.json.bz2"),
-    importlib.resources.files(__name__).joinpath("fold_7.json.bz2"),
-    importlib.resources.files(__name__).joinpath("fold_8.json.bz2"),
-    importlib.resources.files(__name__).joinpath("fold_9.json.bz2"),
-]
-
-_datadir = load_rc().get("datadir.montgomery", os.path.realpath(os.curdir))
-
-
-def _raw_data_loader(sample):
-    return dict(
-        data=load_pil_baw(os.path.join(_datadir, sample["data"])),  # type: ignore
-        label=sample["label"],
-    )
-
-
-def _loader(context, sample):
-    # "context" is ignored in this case - database is homogeneous
-    # we return delayed samples to avoid loading all images at once
-    return make_delayed(sample, _raw_data_loader)
-
-
-json_dataset = JSONDataset(
-    protocols=_protocols,
-    fieldnames=("data", "label"),
-    loader=_loader,
-)
-"""Montgomery dataset object."""
-
-
-def _maker(protocol, resize_size=512, cc_size=512, RGB=False):
-    from torchvision import transforms
-
-    from ..transforms import ElasticDeformation, RemoveBlackBorders
-
-    post_transforms = []
-    if RGB:
-        post_transforms = [
-            transforms.Lambda(lambda x: x.convert("RGB")),
-            transforms.ToTensor(),
-        ]
-
-    return make_dataset(
-        [json_dataset.subsets(protocol)],
-        [
-            RemoveBlackBorders(),
-            transforms.Resize(resize_size),
-            transforms.CenterCrop(cc_size),
-        ],
-        [ElasticDeformation(p=0.8)],
-        post_transforms,
-    )
diff --git a/src/ptbench/data/montgomery/default.py b/src/ptbench/data/montgomery/default.py
index 1f5c0809869be5f011880e808e160024b3c1c1b0..bc93c593f78c15d3a6b1e267117006ab408af2a4 100644
--- a/src/ptbench/data/montgomery/default.py
+++ b/src/ptbench/data/montgomery/default.py
@@ -2,46 +2,50 @@
 #
 # SPDX-License-Identifier: GPL-3.0-or-later
 
-"""Montgomery dataset for TB detection (default protocol)
+"""Montgomery datamodule for TB detection (default protocol)
 
-* Split reference: first 64% of TB and healthy CXR for "train" 16% for
-* "validation", 20% for "test"
-* This configuration resolution: 512 x 512 (default)
-* See :py:mod:`ptbench.data.montgomery` for dataset details
-"""
+* See :py:mod:`ptbench.data.montgomery` for more database details.
 
-from clapper.logging import setup
+This configuration:
 
-from .. import return_subsets
-from ..base_datamodule import BaseDataModule
-from . import _maker
+* Raw data input (on disk):
 
-logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s")
+    * PNG images 12 bit grayscale
+    * resolution: 4020 x 4892 px or 4892 x 4020 px
 
+* Output image:
+    
+    * Transforms:
 
-class DefaultModule(BaseDataModule):
-    def __init__(
-        self,
-        train_batch_size=1,
-        predict_batch_size=1,
-        drop_incomplete_batch=False,
-        multiproc_kwargs=None,
-    ):
-        super().__init__(
-            train_batch_size=train_batch_size,
-            predict_batch_size=predict_batch_size,
-            drop_incomplete_batch=drop_incomplete_batch,
-            multiproc_kwargs=multiproc_kwargs,
-        )
+        * Load raw PNG with :py:mod:`PIL`
+        * Remove black borders
+        * Torch resizing (512 x 512 px)
+        * Torch center cropping (512 x 512 px)
+
+    * Final specifications
+
+        * Fixed resolution: 512 x 512 px
+        * Color RGB encoding
 
-    def setup(self, stage: str):
-        self.dataset = _maker("default")
-        (
-            self.train_dataset,
-            self.validation_dataset,
-            self.extra_validation_datasets,
-            self.predict_dataset,
-        ) = return_subsets(self.dataset)
 
+Protocol ``default``:
+    
+    * Training samples: first 64% of TB and healthy CXR (including labels)
+    * Validation samples: 16% of TB and healthy CXR (including labels)
+    * Test samples: 20% of TB and healty CXR (including labels)
+"""
+
+import importlib.resources
 
-datamodule = DefaultModule
+from ..datamodule import CachingDataModule
+from ..split import JSONDatabaseSplit
+from .loader import RawDataLoader
+
+datamodule = CachingDataModule(
+    database_split=JSONDatabaseSplit(
+        importlib.resources.files(__name__.rsplit(".", 1)[0]).joinpath(
+            "default.json.bz2"
+        )
+    ),
+    raw_data_loader=RawDataLoader(),
+)
diff --git a/src/ptbench/data/montgomery/fold_0.py b/src/ptbench/data/montgomery/fold_0.py
index c60791be50ccd5186ce8e4af263efb7d7513b07a..043769518603766044d804e2c6dd39a2176cb1db 100644
--- a/src/ptbench/data/montgomery/fold_0.py
+++ b/src/ptbench/data/montgomery/fold_0.py
@@ -2,46 +2,44 @@
 #
 # SPDX-License-Identifier: GPL-3.0-or-later
 
-"""Montgomery dataset for TB detection (cross validation fold 0)
+"""Montgomery datamodule for TB detection (default protocol)
 
-* Split reference: first 64% of TB and healthy CXR for "train" 16% for
-* "validation", 20% for "test"
-* This configuration resolution: 512 x 512 (default)
-* See :py:mod:`ptbench.data.montgomery` for dataset details
-"""
+* See :py:mod:`ptbench.data.montgomery` for more database details.
 
-from clapper.logging import setup
+This configuration:
 
-from .. import return_subsets
-from ..base_datamodule import BaseDataModule
-from . import _maker
+* Raw data input (on disk):
 
-logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s")
+    * PNG images 12 bit grayscale
+    * resolution: 4020 x 4892 px or 4892 x 4020 px
 
+* Output image:
+    
+    * Transforms:
 
-class DefaultModule(BaseDataModule):
-    def __init__(
-        self,
-        train_batch_size=1,
-        predict_batch_size=1,
-        drop_incomplete_batch=False,
-        multiproc_kwargs=None,
-    ):
-        super().__init__(
-            train_batch_size=train_batch_size,
-            predict_batch_size=predict_batch_size,
-            drop_incomplete_batch=drop_incomplete_batch,
-            multiproc_kwargs=multiproc_kwargs,
-        )
+        * Load raw PNG with :py:mod:`PIL`
+        * Remove black borders
+        * Torch resizing (512 x 512 px)
+        * Torch center cropping (512 x 512 px)
 
-    def setup(self, stage: str):
-        self.dataset = _maker("fold_0")
-        (
-            self.train_dataset,
-            self.validation_dataset,
-            self.extra_validation_datasets,
-            self.predict_dataset,
-        ) = return_subsets(self.dataset)
+    * Final specifications
+
+        * Fixed resolution: 512 x 512 px
+        * Color RGB encoding
+"""
 
+import importlib.resources
+
+from ..datamodule import CachingDataModule
+from ..split import JSONDatabaseSplit
+from .loader import RawDataLoader
+
+datamodule = CachingDataModule(
+    database_split=JSONDatabaseSplit(
+        importlib.resources.files(__name__.rsplit(".", 1)[0]).joinpath(
+            "fold_0.json.bz2"
+        )
+    ),
+    raw_data_loader=RawDataLoader(),
+)
 
-datamodule = DefaultModule
diff --git a/src/ptbench/data/montgomery/fold_0_rgb.py b/src/ptbench/data/montgomery/fold_0_rgb.py
deleted file mode 100644
index 8e8b0c8914b6a63dd9ab854984ff2bc51cb4e255..0000000000000000000000000000000000000000
--- a/src/ptbench/data/montgomery/fold_0_rgb.py
+++ /dev/null
@@ -1,47 +0,0 @@
-# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
-#
-# SPDX-License-Identifier: GPL-3.0-or-later
-
-"""Montgomery dataset for TB detection (cross validation fold 0, RGB)
-
-* Split reference: first 64% of TB and healthy CXR for "train" 16% for
-* "validation", 20% for "test"
-* This configuration resolution: 512 x 512 (default)
-* See :py:mod:`ptbench.data.montgomery` for dataset details
-"""
-
-from clapper.logging import setup
-
-from .. import return_subsets
-from ..base_datamodule import BaseDataModule
-from . import _maker
-
-logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s")
-
-
-class DefaultModule(BaseDataModule):
-    def __init__(
-        self,
-        train_batch_size=1,
-        predict_batch_size=1,
-        drop_incomplete_batch=False,
-        multiproc_kwargs=None,
-    ):
-        super().__init__(
-            train_batch_size=train_batch_size,
-            predict_batch_size=predict_batch_size,
-            drop_incomplete_batch=drop_incomplete_batch,
-            multiproc_kwargs=multiproc_kwargs,
-        )
-
-    def setup(self, stage: str):
-        self.dataset = _maker("fold_0", RGB=True)
-        (
-            self.train_dataset,
-            self.validation_dataset,
-            self.extra_validation_datasets,
-            self.predict_dataset,
-        ) = return_subsets(self.dataset)
-
-
-datamodule = DefaultModule
diff --git a/src/ptbench/data/montgomery/fold_1.py b/src/ptbench/data/montgomery/fold_1.py
index d6627e673978bcf960b8fb5f72add7cb4a13a141..8500456bca2c2d2330d56085986b69ed91ab9d91 100644
--- a/src/ptbench/data/montgomery/fold_1.py
+++ b/src/ptbench/data/montgomery/fold_1.py
@@ -2,46 +2,44 @@
 #
 # SPDX-License-Identifier: GPL-3.0-or-later
 
-"""Montgomery dataset for TB detection (cross validation fold 1)
+"""Montgomery datamodule for TB detection (default protocol)
 
-* Split reference: first 64% of TB and healthy CXR for "train" 16% for
-* "validation", 20% for "test"
-* This configuration resolution: 512 x 512 (default)
-* See :py:mod:`ptbench.data.montgomery` for dataset details
-"""
+* See :py:mod:`ptbench.data.montgomery` for more database details.
 
-from clapper.logging import setup
+This configuration:
 
-from .. import return_subsets
-from ..base_datamodule import BaseDataModule
-from . import _maker
+* Raw data input (on disk):
 
-logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s")
+    * PNG images 12 bit grayscale
+    * resolution: 4020 x 4892 px or 4892 x 4020 px
 
+* Output image:
+    
+    * Transforms:
 
-class DefaultModule(BaseDataModule):
-    def __init__(
-        self,
-        train_batch_size=1,
-        predict_batch_size=1,
-        drop_incomplete_batch=False,
-        multiproc_kwargs=None,
-    ):
-        super().__init__(
-            train_batch_size=train_batch_size,
-            predict_batch_size=predict_batch_size,
-            drop_incomplete_batch=drop_incomplete_batch,
-            multiproc_kwargs=multiproc_kwargs,
-        )
+        * Load raw PNG with :py:mod:`PIL`
+        * Remove black borders
+        * Torch resizing (512 x 512 px)
+        * Torch center cropping (512 x 512 px)
 
-    def setup(self, stage: str):
-        self.dataset = _maker("fold_1")
-        (
-            self.train_dataset,
-            self.validation_dataset,
-            self.extra_validation_datasets,
-            self.predict_dataset,
-        ) = return_subsets(self.dataset)
+    * Final specifications
+
+        * Fixed resolution: 512 x 512 px
+        * Color RGB encoding
+"""
 
+import importlib.resources
+
+from ..datamodule import CachingDataModule
+from ..split import JSONDatabaseSplit
+from .loader import RawDataLoader
+
+datamodule = CachingDataModule(
+    database_split=JSONDatabaseSplit(
+        importlib.resources.files(__name__.rsplit(".", 1)[0]).joinpath(
+            "fold_1.json.bz2"
+        )
+    ),
+    raw_data_loader=RawDataLoader(),
+)
 
-datamodule = DefaultModule
diff --git a/src/ptbench/data/montgomery/fold_1_rgb.py b/src/ptbench/data/montgomery/fold_1_rgb.py
deleted file mode 100644
index bc47a322c3fd779e3bc19924f6d7ac7c13e71847..0000000000000000000000000000000000000000
--- a/src/ptbench/data/montgomery/fold_1_rgb.py
+++ /dev/null
@@ -1,47 +0,0 @@
-# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
-#
-# SPDX-License-Identifier: GPL-3.0-or-later
-
-"""Montgomery dataset for TB detection (cross validation fold 1, RGB)
-
-* Split reference: first 64% of TB and healthy CXR for "train" 16% for
-* "validation", 20% for "test"
-* This configuration resolution: 512 x 512 (default)
-* See :py:mod:`ptbench.data.montgomery` for dataset details
-"""
-
-from clapper.logging import setup
-
-from .. import return_subsets
-from ..base_datamodule import BaseDataModule
-from . import _maker
-
-logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s")
-
-
-class DefaultModule(BaseDataModule):
-    def __init__(
-        self,
-        train_batch_size=1,
-        predict_batch_size=1,
-        drop_incomplete_batch=False,
-        multiproc_kwargs=None,
-    ):
-        super().__init__(
-            train_batch_size=train_batch_size,
-            predict_batch_size=predict_batch_size,
-            drop_incomplete_batch=drop_incomplete_batch,
-            multiproc_kwargs=multiproc_kwargs,
-        )
-
-    def setup(self, stage: str):
-        self.dataset = _maker("fold_1", RGB=True)
-        (
-            self.train_dataset,
-            self.validation_dataset,
-            self.extra_validation_datasets,
-            self.predict_dataset,
-        ) = return_subsets(self.dataset)
-
-
-datamodule = DefaultModule
diff --git a/src/ptbench/data/montgomery/fold_2.py b/src/ptbench/data/montgomery/fold_2.py
index 8c5f4a66fd2af0b9f26b67241f45c630f69bd06a..e4b7a61482c391fb2b4b04d189b6637c0ced3222 100644
--- a/src/ptbench/data/montgomery/fold_2.py
+++ b/src/ptbench/data/montgomery/fold_2.py
@@ -2,46 +2,44 @@
 #
 # SPDX-License-Identifier: GPL-3.0-or-later
 
-"""Montgomery dataset for TB detection (cross validation fold 2)
+"""Montgomery datamodule for TB detection (default protocol)
 
-* Split reference: first 64% of TB and healthy CXR for "train" 16% for
-* "validation", 20% for "test"
-* This configuration resolution: 512 x 512 (default)
-* See :py:mod:`ptbench.data.montgomery` for dataset details
-"""
+* See :py:mod:`ptbench.data.montgomery` for more database details.
 
-from clapper.logging import setup
+This configuration:
 
-from .. import return_subsets
-from ..base_datamodule import BaseDataModule
-from . import _maker
+* Raw data input (on disk):
 
-logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s")
+    * PNG images 12 bit grayscale
+    * resolution: 4020 x 4892 px or 4892 x 4020 px
 
+* Output image:
+    
+    * Transforms:
 
-class DefaultModule(BaseDataModule):
-    def __init__(
-        self,
-        train_batch_size=1,
-        predict_batch_size=1,
-        drop_incomplete_batch=False,
-        multiproc_kwargs=None,
-    ):
-        super().__init__(
-            train_batch_size=train_batch_size,
-            predict_batch_size=predict_batch_size,
-            drop_incomplete_batch=drop_incomplete_batch,
-            multiproc_kwargs=multiproc_kwargs,
-        )
+        * Load raw PNG with :py:mod:`PIL`
+        * Remove black borders
+        * Torch resizing (512 x 512 px)
+        * Torch center cropping (512 x 512 px)
 
-    def setup(self, stage: str):
-        self.dataset = _maker("fold_2")
-        (
-            self.train_dataset,
-            self.validation_dataset,
-            self.extra_validation_datasets,
-            self.predict_dataset,
-        ) = return_subsets(self.dataset)
+    * Final specifications
+
+        * Fixed resolution: 512 x 512 px
+        * Color RGB encoding
+"""
 
+import importlib.resources
+
+from ..datamodule import CachingDataModule
+from ..split import JSONDatabaseSplit
+from .loader import RawDataLoader
+
+datamodule = CachingDataModule(
+    database_split=JSONDatabaseSplit(
+        importlib.resources.files(__name__.rsplit(".", 1)[0]).joinpath(
+            "fold_2.json.bz2"
+        )
+    ),
+    raw_data_loader=RawDataLoader(),
+)
 
-datamodule = DefaultModule
diff --git a/src/ptbench/data/montgomery/fold_2_rgb.py b/src/ptbench/data/montgomery/fold_2_rgb.py
deleted file mode 100644
index b81a877b2bc7372a99812a27935e6daf42401568..0000000000000000000000000000000000000000
--- a/src/ptbench/data/montgomery/fold_2_rgb.py
+++ /dev/null
@@ -1,47 +0,0 @@
-# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
-#
-# SPDX-License-Identifier: GPL-3.0-or-later
-
-"""Montgomery dataset for TB detection (cross validation fold 2, RGB)
-
-* Split reference: first 64% of TB and healthy CXR for "train" 16% for
-* "validation", 20% for "test"
-* This configuration resolution: 512 x 512 (default)
-* See :py:mod:`ptbench.data.montgomery` for dataset details
-"""
-
-from clapper.logging import setup
-
-from .. import return_subsets
-from ..base_datamodule import BaseDataModule
-from . import _maker
-
-logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s")
-
-
-class DefaultModule(BaseDataModule):
-    def __init__(
-        self,
-        train_batch_size=1,
-        predict_batch_size=1,
-        drop_incomplete_batch=False,
-        multiproc_kwargs=None,
-    ):
-        super().__init__(
-            train_batch_size=train_batch_size,
-            predict_batch_size=predict_batch_size,
-            drop_incomplete_batch=drop_incomplete_batch,
-            multiproc_kwargs=multiproc_kwargs,
-        )
-
-    def setup(self, stage: str):
-        self.dataset = _maker("fold_2", RGB=True)
-        (
-            self.train_dataset,
-            self.validation_dataset,
-            self.extra_validation_datasets,
-            self.predict_dataset,
-        ) = return_subsets(self.dataset)
-
-
-datamodule = DefaultModule
diff --git a/src/ptbench/data/montgomery/fold_3.py b/src/ptbench/data/montgomery/fold_3.py
index 8e685d7e3baa3a23924c62a77ffc61bf51e12056..719bf004979db29ea723433ce5bc2dc046aa05ab 100644
--- a/src/ptbench/data/montgomery/fold_3.py
+++ b/src/ptbench/data/montgomery/fold_3.py
@@ -2,46 +2,45 @@
 #
 # SPDX-License-Identifier: GPL-3.0-or-later
 
-"""Montgomery dataset for TB detection (cross validation fold 3)
+"""Montgomery datamodule for TB detection (default protocol)
 
-* Split reference: first 64% of TB and healthy CXR for "train" 16% for
-* "validation", 20% for "test"
-* This configuration resolution: 512 x 512 (default)
-* See :py:mod:`ptbench.data.montgomery` for dataset details
-"""
+* See :py:mod:`ptbench.data.montgomery` for more database details.
 
-from clapper.logging import setup
+This configuration:
 
-from .. import return_subsets
-from ..base_datamodule import BaseDataModule
-from . import _maker
+* Raw data input (on disk):
 
-logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s")
+    * PNG images 12 bit grayscale
+    * resolution: 4020 x 4892 px or 4892 x 4020 px
 
+* Output image:
+    
+    * Transforms:
 
-class DefaultModule(BaseDataModule):
-    def __init__(
-        self,
-        train_batch_size=1,
-        predict_batch_size=1,
-        drop_incomplete_batch=False,
-        multiproc_kwargs=None,
-    ):
-        super().__init__(
-            train_batch_size=train_batch_size,
-            predict_batch_size=predict_batch_size,
-            drop_incomplete_batch=drop_incomplete_batch,
-            multiproc_kwargs=multiproc_kwargs,
-        )
+        * Load raw PNG with :py:mod:`PIL`
+        * Remove black borders
+        * Torch resizing (512 x 512 px)
+        * Torch center cropping (512 x 512 px)
+
+    * Final specifications
+
+        * Fixed resolution: 512 x 512 px
+        * Color RGB encoding
+"""
+
+import importlib.resources
 
-    def setup(self, stage: str):
-        self.dataset = _maker("fold_3")
-        (
-            self.train_dataset,
-            self.validation_dataset,
-            self.extra_validation_datasets,
-            self.predict_dataset,
-        ) = return_subsets(self.dataset)
+from ..datamodule import CachingDataModule
+from ..split import JSONDatabaseSplit
+from .loader import RawDataLoader
+
+datamodule = CachingDataModule(
+    database_split=JSONDatabaseSplit(
+        importlib.resources.files(__name__.rsplit(".", 1)[0]).joinpath(
+            "fold_3.json.bz2"
+        )
+    ),
+    raw_data_loader=RawDataLoader(),
+)
 
 
-datamodule = DefaultModule
diff --git a/src/ptbench/data/montgomery/fold_3_rgb.py b/src/ptbench/data/montgomery/fold_3_rgb.py
deleted file mode 100644
index 7b600371c8d434d79049c6e6423b36e99f2a32cb..0000000000000000000000000000000000000000
--- a/src/ptbench/data/montgomery/fold_3_rgb.py
+++ /dev/null
@@ -1,47 +0,0 @@
-# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
-#
-# SPDX-License-Identifier: GPL-3.0-or-later
-
-"""Montgomery dataset for TB detection (cross validation fold 3, RGB)
-
-* Split reference: first 64% of TB and healthy CXR for "train" 16% for
-* "validation", 20% for "test"
-* This configuration resolution: 512 x 512 (default)
-* See :py:mod:`ptbench.data.montgomery` for dataset details
-"""
-
-from clapper.logging import setup
-
-from .. import return_subsets
-from ..base_datamodule import BaseDataModule
-from . import _maker
-
-logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s")
-
-
-class DefaultModule(BaseDataModule):
-    def __init__(
-        self,
-        train_batch_size=1,
-        predict_batch_size=1,
-        drop_incomplete_batch=False,
-        multiproc_kwargs=None,
-    ):
-        super().__init__(
-            train_batch_size=train_batch_size,
-            predict_batch_size=predict_batch_size,
-            drop_incomplete_batch=drop_incomplete_batch,
-            multiproc_kwargs=multiproc_kwargs,
-        )
-
-    def setup(self, stage: str):
-        self.dataset = _maker("fold_3", RGB=True)
-        (
-            self.train_dataset,
-            self.validation_dataset,
-            self.extra_validation_datasets,
-            self.predict_dataset,
-        ) = return_subsets(self.dataset)
-
-
-datamodule = DefaultModule
diff --git a/src/ptbench/data/montgomery/fold_4.py b/src/ptbench/data/montgomery/fold_4.py
index 9459cb938605df06823a86a96fbd1cf374fe9738..2e97b114f805846a0cb557ae85613322b4b9a73b 100644
--- a/src/ptbench/data/montgomery/fold_4.py
+++ b/src/ptbench/data/montgomery/fold_4.py
@@ -2,46 +2,45 @@
 #
 # SPDX-License-Identifier: GPL-3.0-or-later
 
-"""Montgomery dataset for TB detection (cross validation fold 4)
+"""Montgomery datamodule for TB detection (default protocol)
 
-* Split reference: first 64% of TB and healthy CXR for "train" 16% for
-* "validation", 20% for "test"
-* This configuration resolution: 512 x 512 (default)
-* See :py:mod:`ptbench.data.montgomery` for dataset details
-"""
+* See :py:mod:`ptbench.data.montgomery` for more database details.
 
-from clapper.logging import setup
+This configuration:
 
-from .. import return_subsets
-from ..base_datamodule import BaseDataModule
-from . import _maker
+* Raw data input (on disk):
 
-logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s")
+    * PNG images 12 bit grayscale
+    * resolution: 4020 x 4892 px or 4892 x 4020 px
 
+* Output image:
+    
+    * Transforms:
 
-class DefaultModule(BaseDataModule):
-    def __init__(
-        self,
-        train_batch_size=1,
-        predict_batch_size=1,
-        drop_incomplete_batch=False,
-        multiproc_kwargs=None,
-    ):
-        super().__init__(
-            train_batch_size=train_batch_size,
-            predict_batch_size=predict_batch_size,
-            drop_incomplete_batch=drop_incomplete_batch,
-            multiproc_kwargs=multiproc_kwargs,
-        )
+        * Load raw PNG with :py:mod:`PIL`
+        * Remove black borders
+        * Torch resizing (512 x 512 px)
+        * Torch center cropping (512 x 512 px)
+
+    * Final specifications
+
+        * Fixed resolution: 512 x 512 px
+        * Color RGB encoding
+"""
+
+import importlib.resources
 
-    def setup(self, stage: str):
-        self.dataset = _maker("fold_4")
-        (
-            self.train_dataset,
-            self.validation_dataset,
-            self.extra_validation_datasets,
-            self.predict_dataset,
-        ) = return_subsets(self.dataset)
+from ..datamodule import CachingDataModule
+from ..split import JSONDatabaseSplit
+from .loader import RawDataLoader
+
+datamodule = CachingDataModule(
+    database_split=JSONDatabaseSplit(
+        importlib.resources.files(__name__.rsplit(".", 1)[0]).joinpath(
+            "fold_4.json.bz2"
+        )
+    ),
+    raw_data_loader=RawDataLoader(),
+)
 
 
-datamodule = DefaultModule
diff --git a/src/ptbench/data/montgomery/fold_4_rgb.py b/src/ptbench/data/montgomery/fold_4_rgb.py
deleted file mode 100644
index 3eb136f654ab8d8d648468948e05dad774d85076..0000000000000000000000000000000000000000
--- a/src/ptbench/data/montgomery/fold_4_rgb.py
+++ /dev/null
@@ -1,47 +0,0 @@
-# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
-#
-# SPDX-License-Identifier: GPL-3.0-or-later
-
-"""Montgomery dataset for TB detection (cross validation fold 4, RGB)
-
-* Split reference: first 64% of TB and healthy CXR for "train" 16% for
-* "validation", 20% for "test"
-* This configuration resolution: 512 x 512 (default)
-* See :py:mod:`ptbench.data.montgomery` for dataset details
-"""
-
-from clapper.logging import setup
-
-from .. import return_subsets
-from ..base_datamodule import BaseDataModule
-from . import _maker
-
-logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s")
-
-
-class DefaultModule(BaseDataModule):
-    def __init__(
-        self,
-        train_batch_size=1,
-        predict_batch_size=1,
-        drop_incomplete_batch=False,
-        multiproc_kwargs=None,
-    ):
-        super().__init__(
-            train_batch_size=train_batch_size,
-            predict_batch_size=predict_batch_size,
-            drop_incomplete_batch=drop_incomplete_batch,
-            multiproc_kwargs=multiproc_kwargs,
-        )
-
-    def setup(self, stage: str):
-        self.dataset = _maker("fold_4", RGB=True)
-        (
-            self.train_dataset,
-            self.validation_dataset,
-            self.extra_validation_datasets,
-            self.predict_dataset,
-        ) = return_subsets(self.dataset)
-
-
-datamodule = DefaultModule
diff --git a/src/ptbench/data/montgomery/fold_5.py b/src/ptbench/data/montgomery/fold_5.py
index 147690f6d54f15d50b52f88288dbc8a41dfb7f33..4df1451de2eb8bdaca8f5d46408a298f070ab6f0 100644
--- a/src/ptbench/data/montgomery/fold_5.py
+++ b/src/ptbench/data/montgomery/fold_5.py
@@ -2,46 +2,45 @@
 #
 # SPDX-License-Identifier: GPL-3.0-or-later
 
-"""Montgomery dataset for TB detection (cross validation fold 5)
+"""Montgomery datamodule for TB detection (default protocol)
 
-* Split reference: first 64% of TB and healthy CXR for "train" 16% for
-* "validation", 20% for "test"
-* This configuration resolution: 512 x 512 (default)
-* See :py:mod:`ptbench.data.montgomery` for dataset details
-"""
+* See :py:mod:`ptbench.data.montgomery` for more database details.
 
-from clapper.logging import setup
+This configuration:
 
-from .. import return_subsets
-from ..base_datamodule import BaseDataModule
-from . import _maker
+* Raw data input (on disk):
 
-logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s")
+    * PNG images 12 bit grayscale
+    * resolution: 4020 x 4892 px or 4892 x 4020 px
 
+* Output image:
+    
+    * Transforms:
 
-class DefaultModule(BaseDataModule):
-    def __init__(
-        self,
-        train_batch_size=1,
-        predict_batch_size=1,
-        drop_incomplete_batch=False,
-        multiproc_kwargs=None,
-    ):
-        super().__init__(
-            train_batch_size=train_batch_size,
-            predict_batch_size=predict_batch_size,
-            drop_incomplete_batch=drop_incomplete_batch,
-            multiproc_kwargs=multiproc_kwargs,
-        )
+        * Load raw PNG with :py:mod:`PIL`
+        * Remove black borders
+        * Torch resizing (512 x 512 px)
+        * Torch center cropping (512 x 512 px)
+
+    * Final specifications
+
+        * Fixed resolution: 512 x 512 px
+        * Color RGB encoding
+"""
+
+import importlib.resources
 
-    def setup(self, stage: str):
-        self.dataset = _maker("fold_5")
-        (
-            self.train_dataset,
-            self.validation_dataset,
-            self.extra_validation_datasets,
-            self.predict_dataset,
-        ) = return_subsets(self.dataset)
+from ..datamodule import CachingDataModule
+from ..split import JSONDatabaseSplit
+from .loader import RawDataLoader
+
+datamodule = CachingDataModule(
+    database_split=JSONDatabaseSplit(
+        importlib.resources.files(__name__.rsplit(".", 1)[0]).joinpath(
+            "fold_5.json.bz2"
+        )
+    ),
+    raw_data_loader=RawDataLoader(),
+)
 
 
-datamodule = DefaultModule
diff --git a/src/ptbench/data/montgomery/fold_5_rgb.py b/src/ptbench/data/montgomery/fold_5_rgb.py
deleted file mode 100644
index 3e7cb73f6957086b99147812b07f733dc51af9ec..0000000000000000000000000000000000000000
--- a/src/ptbench/data/montgomery/fold_5_rgb.py
+++ /dev/null
@@ -1,47 +0,0 @@
-# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
-#
-# SPDX-License-Identifier: GPL-3.0-or-later
-
-"""Montgomery dataset for TB detection (cross validation fold 5, RGB)
-
-* Split reference: first 64% of TB and healthy CXR for "train" 16% for
-* "validation", 20% for "test"
-* This configuration resolution: 512 x 512 (default)
-* See :py:mod:`ptbench.data.montgomery` for dataset details
-"""
-
-from clapper.logging import setup
-
-from .. import return_subsets
-from ..base_datamodule import BaseDataModule
-from . import _maker
-
-logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s")
-
-
-class DefaultModule(BaseDataModule):
-    def __init__(
-        self,
-        train_batch_size=1,
-        predict_batch_size=1,
-        drop_incomplete_batch=False,
-        multiproc_kwargs=None,
-    ):
-        super().__init__(
-            train_batch_size=train_batch_size,
-            predict_batch_size=predict_batch_size,
-            drop_incomplete_batch=drop_incomplete_batch,
-            multiproc_kwargs=multiproc_kwargs,
-        )
-
-    def setup(self, stage: str):
-        self.dataset = _maker("fold_5", RGB=True)
-        (
-            self.train_dataset,
-            self.validation_dataset,
-            self.extra_validation_datasets,
-            self.predict_dataset,
-        ) = return_subsets(self.dataset)
-
-
-datamodule = DefaultModule
diff --git a/src/ptbench/data/montgomery/fold_6.py b/src/ptbench/data/montgomery/fold_6.py
index 69f24390ac01271c3e961950d429d973e535c380..d0b36115690ee65f47e421dab91351c4b48f5309 100644
--- a/src/ptbench/data/montgomery/fold_6.py
+++ b/src/ptbench/data/montgomery/fold_6.py
@@ -2,46 +2,45 @@
 #
 # SPDX-License-Identifier: GPL-3.0-or-later
 
-"""Montgomery dataset for TB detection (cross validation fold 6)
+"""Montgomery datamodule for TB detection (default protocol)
 
-* Split reference: first 64% of TB and healthy CXR for "train" 16% for
-* "validation", 20% for "test"
-* This configuration resolution: 512 x 512 (default)
-* See :py:mod:`ptbench.data.montgomery` for dataset details
-"""
+* See :py:mod:`ptbench.data.montgomery` for more database details.
 
-from clapper.logging import setup
+This configuration:
 
-from .. import return_subsets
-from ..base_datamodule import BaseDataModule
-from . import _maker
+* Raw data input (on disk):
 
-logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s")
+    * PNG images 12 bit grayscale
+    * resolution: 4020 x 4892 px or 4892 x 4020 px
 
+* Output image:
+    
+    * Transforms:
 
-class DefaultModule(BaseDataModule):
-    def __init__(
-        self,
-        train_batch_size=1,
-        predict_batch_size=1,
-        drop_incomplete_batch=False,
-        multiproc_kwargs=None,
-    ):
-        super().__init__(
-            train_batch_size=train_batch_size,
-            predict_batch_size=predict_batch_size,
-            drop_incomplete_batch=drop_incomplete_batch,
-            multiproc_kwargs=multiproc_kwargs,
-        )
+        * Load raw PNG with :py:mod:`PIL`
+        * Remove black borders
+        * Torch resizing (512 x 512 px)
+        * Torch center cropping (512 x 512 px)
+
+    * Final specifications
+
+        * Fixed resolution: 512 x 512 px
+        * Color RGB encoding
+"""
+
+import importlib.resources
 
-    def setup(self, stage: str):
-        self.dataset = _maker("fold_6")
-        (
-            self.train_dataset,
-            self.validation_dataset,
-            self.extra_validation_datasets,
-            self.predict_dataset,
-        ) = return_subsets(self.dataset)
+from ..datamodule import CachingDataModule
+from ..split import JSONDatabaseSplit
+from .loader import RawDataLoader
+
+datamodule = CachingDataModule(
+    database_split=JSONDatabaseSplit(
+        importlib.resources.files(__name__.rsplit(".", 1)[0]).joinpath(
+            "fold_6.json.bz2"
+        )
+    ),
+    raw_data_loader=RawDataLoader(),
+)
 
 
-datamodule = DefaultModule
diff --git a/src/ptbench/data/montgomery/fold_6_rgb.py b/src/ptbench/data/montgomery/fold_6_rgb.py
deleted file mode 100644
index ff3a8cdb0c00f511f4ebb7abcfabb10ae7853e99..0000000000000000000000000000000000000000
--- a/src/ptbench/data/montgomery/fold_6_rgb.py
+++ /dev/null
@@ -1,47 +0,0 @@
-# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
-#
-# SPDX-License-Identifier: GPL-3.0-or-later
-
-"""Montgomery dataset for TB detection (cross validation fold 6, RGB)
-
-* Split reference: first 64% of TB and healthy CXR for "train" 16% for
-* "validation", 20% for "test"
-* This configuration resolution: 512 x 512 (default)
-* See :py:mod:`ptbench.data.montgomery` for dataset details
-"""
-
-from clapper.logging import setup
-
-from .. import return_subsets
-from ..base_datamodule import BaseDataModule
-from . import _maker
-
-logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s")
-
-
-class DefaultModule(BaseDataModule):
-    def __init__(
-        self,
-        train_batch_size=1,
-        predict_batch_size=1,
-        drop_incomplete_batch=False,
-        multiproc_kwargs=None,
-    ):
-        super().__init__(
-            train_batch_size=train_batch_size,
-            predict_batch_size=predict_batch_size,
-            drop_incomplete_batch=drop_incomplete_batch,
-            multiproc_kwargs=multiproc_kwargs,
-        )
-
-    def setup(self, stage: str):
-        self.dataset = _maker("fold_6", RGB=True)
-        (
-            self.train_dataset,
-            self.validation_dataset,
-            self.extra_validation_datasets,
-            self.predict_dataset,
-        ) = return_subsets(self.dataset)
-
-
-datamodule = DefaultModule
diff --git a/src/ptbench/data/montgomery/fold_7.py b/src/ptbench/data/montgomery/fold_7.py
index 20ba9d3a7da5ffcb8673e685a0534d82fdb7ed2b..b132b30ea14356774e820bfba5d6b66475c56b17 100644
--- a/src/ptbench/data/montgomery/fold_7.py
+++ b/src/ptbench/data/montgomery/fold_7.py
@@ -2,46 +2,45 @@
 #
 # SPDX-License-Identifier: GPL-3.0-or-later
 
-"""Montgomery dataset for TB detection (cross validation fold 7)
+"""Montgomery datamodule for TB detection (default protocol)
 
-* Split reference: first 64% of TB and healthy CXR for "train" 16% for
-* "validation", 20% for "test"
-* This configuration resolution: 512 x 512 (default)
-* See :py:mod:`ptbench.data.montgomery` for dataset details
-"""
+* See :py:mod:`ptbench.data.montgomery` for more database details.
 
-from clapper.logging import setup
+This configuration:
 
-from .. import return_subsets
-from ..base_datamodule import BaseDataModule
-from . import _maker
+* Raw data input (on disk):
 
-logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s")
+    * PNG images 12 bit grayscale
+    * resolution: 4020 x 4892 px or 4892 x 4020 px
 
+* Output image:
+    
+    * Transforms:
 
-class DefaultModule(BaseDataModule):
-    def __init__(
-        self,
-        train_batch_size=1,
-        predict_batch_size=1,
-        drop_incomplete_batch=False,
-        multiproc_kwargs=None,
-    ):
-        super().__init__(
-            train_batch_size=train_batch_size,
-            predict_batch_size=predict_batch_size,
-            drop_incomplete_batch=drop_incomplete_batch,
-            multiproc_kwargs=multiproc_kwargs,
-        )
+        * Load raw PNG with :py:mod:`PIL`
+        * Remove black borders
+        * Torch resizing (512 x 512 px)
+        * Torch center cropping (512 x 512 px)
+
+    * Final specifications
+
+        * Fixed resolution: 512 x 512 px
+        * Color RGB encoding
+"""
+
+import importlib.resources
 
-    def setup(self, stage: str):
-        self.dataset = _maker("fold_7")
-        (
-            self.train_dataset,
-            self.validation_dataset,
-            self.extra_validation_datasets,
-            self.predict_dataset,
-        ) = return_subsets(self.dataset)
+from ..datamodule import CachingDataModule
+from ..split import JSONDatabaseSplit
+from .loader import RawDataLoader
+
+datamodule = CachingDataModule(
+    database_split=JSONDatabaseSplit(
+        importlib.resources.files(__name__.rsplit(".", 1)[0]).joinpath(
+            "fold_7.json.bz2"
+        )
+    ),
+    raw_data_loader=RawDataLoader(),
+)
 
 
-datamodule = DefaultModule
diff --git a/src/ptbench/data/montgomery/fold_7_rgb.py b/src/ptbench/data/montgomery/fold_7_rgb.py
deleted file mode 100644
index 05664b06ab6393911a77b32418d6f2afb9d455fa..0000000000000000000000000000000000000000
--- a/src/ptbench/data/montgomery/fold_7_rgb.py
+++ /dev/null
@@ -1,47 +0,0 @@
-# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
-#
-# SPDX-License-Identifier: GPL-3.0-or-later
-
-"""Montgomery dataset for TB detection (cross validation fold 7, RGB)
-
-* Split reference: first 64% of TB and healthy CXR for "train" 16% for
-* "validation", 20% for "test"
-* This configuration resolution: 512 x 512 (default)
-* See :py:mod:`ptbench.data.montgomery` for dataset details
-"""
-
-from clapper.logging import setup
-
-from .. import return_subsets
-from ..base_datamodule import BaseDataModule
-from . import _maker
-
-logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s")
-
-
-class DefaultModule(BaseDataModule):
-    def __init__(
-        self,
-        train_batch_size=1,
-        predict_batch_size=1,
-        drop_incomplete_batch=False,
-        multiproc_kwargs=None,
-    ):
-        super().__init__(
-            train_batch_size=train_batch_size,
-            predict_batch_size=predict_batch_size,
-            drop_incomplete_batch=drop_incomplete_batch,
-            multiproc_kwargs=multiproc_kwargs,
-        )
-
-    def setup(self, stage: str):
-        self.dataset = _maker("fold_7", RGB=True)
-        (
-            self.train_dataset,
-            self.validation_dataset,
-            self.extra_validation_datasets,
-            self.predict_dataset,
-        ) = return_subsets(self.dataset)
-
-
-datamodule = DefaultModule
diff --git a/src/ptbench/data/montgomery/fold_8.py b/src/ptbench/data/montgomery/fold_8.py
index e92ff959a9b1028c174c95719867f5086831d6c9..73169ca0b40688e379bd31948e5925552bd7b5b0 100644
--- a/src/ptbench/data/montgomery/fold_8.py
+++ b/src/ptbench/data/montgomery/fold_8.py
@@ -2,46 +2,45 @@
 #
 # SPDX-License-Identifier: GPL-3.0-or-later
 
-"""Montgomery dataset for TB detection (cross validation fold 8)
+"""Montgomery datamodule for TB detection (default protocol)
 
-* Split reference: first 64% of TB and healthy CXR for "train" 16% for
-* "validation", 20% for "test"
-* This configuration resolution: 512 x 512 (default)
-* See :py:mod:`ptbench.data.montgomery` for dataset details
-"""
+* See :py:mod:`ptbench.data.montgomery` for more database details.
 
-from clapper.logging import setup
+This configuration:
 
-from .. import return_subsets
-from ..base_datamodule import BaseDataModule
-from . import _maker
+* Raw data input (on disk):
 
-logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s")
+    * PNG images 12 bit grayscale
+    * resolution: 4020 x 4892 px or 4892 x 4020 px
 
+* Output image:
+    
+    * Transforms:
 
-class DefaultModule(BaseDataModule):
-    def __init__(
-        self,
-        train_batch_size=1,
-        predict_batch_size=1,
-        drop_incomplete_batch=False,
-        multiproc_kwargs=None,
-    ):
-        super().__init__(
-            train_batch_size=train_batch_size,
-            predict_batch_size=predict_batch_size,
-            drop_incomplete_batch=drop_incomplete_batch,
-            multiproc_kwargs=multiproc_kwargs,
-        )
+        * Load raw PNG with :py:mod:`PIL`
+        * Remove black borders
+        * Torch resizing (512 x 512 px)
+        * Torch center cropping (512 x 512 px)
+
+    * Final specifications
+
+        * Fixed resolution: 512 x 512 px
+        * Color RGB encoding
+"""
+
+import importlib.resources
 
-    def setup(self, stage: str):
-        self.dataset = _maker("fold_8")
-        (
-            self.train_dataset,
-            self.validation_dataset,
-            self.extra_validation_datasets,
-            self.predict_dataset,
-        ) = return_subsets(self.dataset)
+from ..datamodule import CachingDataModule
+from ..split import JSONDatabaseSplit
+from .loader import RawDataLoader
+
+datamodule = CachingDataModule(
+    database_split=JSONDatabaseSplit(
+        importlib.resources.files(__name__.rsplit(".", 1)[0]).joinpath(
+            "fold_8.json.bz2"
+        )
+    ),
+    raw_data_loader=RawDataLoader(),
+)
 
 
-datamodule = DefaultModule
diff --git a/src/ptbench/data/montgomery/fold_8_rgb.py b/src/ptbench/data/montgomery/fold_8_rgb.py
deleted file mode 100644
index b7d59359dcde32694affea0e3df88ad747f48e31..0000000000000000000000000000000000000000
--- a/src/ptbench/data/montgomery/fold_8_rgb.py
+++ /dev/null
@@ -1,47 +0,0 @@
-# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
-#
-# SPDX-License-Identifier: GPL-3.0-or-later
-
-"""Montgomery dataset for TB detection (cross validation fold 8, RGB)
-
-* Split reference: first 64% of TB and healthy CXR for "train" 16% for
-* "validation", 20% for "test"
-* This configuration resolution: 512 x 512 (default)
-* See :py:mod:`ptbench.data.montgomery` for dataset details
-"""
-
-from clapper.logging import setup
-
-from .. import return_subsets
-from ..base_datamodule import BaseDataModule
-from . import _maker
-
-logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s")
-
-
-class DefaultModule(BaseDataModule):
-    def __init__(
-        self,
-        train_batch_size=1,
-        predict_batch_size=1,
-        drop_incomplete_batch=False,
-        multiproc_kwargs=None,
-    ):
-        super().__init__(
-            train_batch_size=train_batch_size,
-            predict_batch_size=predict_batch_size,
-            drop_incomplete_batch=drop_incomplete_batch,
-            multiproc_kwargs=multiproc_kwargs,
-        )
-
-    def setup(self, stage: str):
-        self.dataset = _maker("fold_8", RGB=True)
-        (
-            self.train_dataset,
-            self.validation_dataset,
-            self.extra_validation_datasets,
-            self.predict_dataset,
-        ) = return_subsets(self.dataset)
-
-
-datamodule = DefaultModule
diff --git a/src/ptbench/data/montgomery/fold_9.py b/src/ptbench/data/montgomery/fold_9.py
index 81bbf72e78826f7e9560189be149d51cb729064e..18561f80221b05a467c72308f6ebacd5fc3280f8 100644
--- a/src/ptbench/data/montgomery/fold_9.py
+++ b/src/ptbench/data/montgomery/fold_9.py
@@ -2,46 +2,45 @@
 #
 # SPDX-License-Identifier: GPL-3.0-or-later
 
-"""Montgomery dataset for TB detection (cross validation fold 9)
+"""Montgomery datamodule for TB detection (default protocol)
 
-* Split reference: first 64% of TB and healthy CXR for "train" 16% for
-* "validation", 20% for "test"
-* This configuration resolution: 512 x 512 (default)
-* See :py:mod:`ptbench.data.montgomery` for dataset details
-"""
+* See :py:mod:`ptbench.data.montgomery` for more database details.
 
-from clapper.logging import setup
+This configuration:
 
-from .. import return_subsets
-from ..base_datamodule import BaseDataModule
-from . import _maker
+* Raw data input (on disk):
 
-logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s")
+    * PNG images 12 bit grayscale
+    * resolution: 4020 x 4892 px or 4892 x 4020 px
 
+* Output image:
+    
+    * Transforms:
 
-class DefaultModule(BaseDataModule):
-    def __init__(
-        self,
-        train_batch_size=1,
-        predict_batch_size=1,
-        drop_incomplete_batch=False,
-        multiproc_kwargs=None,
-    ):
-        super().__init__(
-            train_batch_size=train_batch_size,
-            predict_batch_size=predict_batch_size,
-            drop_incomplete_batch=drop_incomplete_batch,
-            multiproc_kwargs=multiproc_kwargs,
-        )
+        * Load raw PNG with :py:mod:`PIL`
+        * Remove black borders
+        * Torch resizing (512 x 512 px)
+        * Torch center cropping (512 x 512 px)
+
+    * Final specifications
+
+        * Fixed resolution: 512 x 512 px
+        * Color RGB encoding
+"""
+
+import importlib.resources
 
-    def setup(self, stage: str):
-        self.dataset = _maker("fold_9")
-        (
-            self.train_dataset,
-            self.validation_dataset,
-            self.extra_validation_datasets,
-            self.predict_dataset,
-        ) = return_subsets(self.dataset)
+from ..datamodule import CachingDataModule
+from ..split import JSONDatabaseSplit
+from .loader import RawDataLoader
+
+datamodule = CachingDataModule(
+    database_split=JSONDatabaseSplit(
+        importlib.resources.files(__name__.rsplit(".", 1)[0]).joinpath(
+            "fold_9.json.bz2"
+        )
+    ),
+    raw_data_loader=RawDataLoader(),
+)
 
 
-datamodule = DefaultModule
diff --git a/src/ptbench/data/montgomery/fold_9_rgb.py b/src/ptbench/data/montgomery/fold_9_rgb.py
deleted file mode 100644
index e961e08ffe49a94001252c641ba8bee86758b44f..0000000000000000000000000000000000000000
--- a/src/ptbench/data/montgomery/fold_9_rgb.py
+++ /dev/null
@@ -1,47 +0,0 @@
-# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
-#
-# SPDX-License-Identifier: GPL-3.0-or-later
-
-"""Montgomery dataset for TB detection (cross validation fold 9, RGB)
-
-* Split reference: first 64% of TB and healthy CXR for "train" 16% for
-* "validation", 20% for "test"
-* This configuration resolution: 512 x 512 (default)
-* See :py:mod:`ptbench.data.montgomery` for dataset details
-"""
-
-from clapper.logging import setup
-
-from .. import return_subsets
-from ..base_datamodule import BaseDataModule
-from . import _maker
-
-logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s")
-
-
-class DefaultModule(BaseDataModule):
-    def __init__(
-        self,
-        train_batch_size=1,
-        predict_batch_size=1,
-        drop_incomplete_batch=False,
-        multiproc_kwargs=None,
-    ):
-        super().__init__(
-            train_batch_size=train_batch_size,
-            predict_batch_size=predict_batch_size,
-            drop_incomplete_batch=drop_incomplete_batch,
-            multiproc_kwargs=multiproc_kwargs,
-        )
-
-    def setup(self, stage: str):
-        self.dataset = _maker("fold_9", RGB=True)
-        (
-            self.train_dataset,
-            self.validation_dataset,
-            self.extra_validation_datasets,
-            self.predict_dataset,
-        ) = return_subsets(self.dataset)
-
-
-datamodule = DefaultModule
diff --git a/src/ptbench/data/montgomery/loader.py b/src/ptbench/data/montgomery/loader.py
new file mode 100644
index 0000000000000000000000000000000000000000..0dce873853603dfeab4de7f819dc0ab4cd1f6d92
--- /dev/null
+++ b/src/ptbench/data/montgomery/loader.py
@@ -0,0 +1,114 @@
+# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
+#
+# SPDX-License-Identifier: GPL-3.0-or-later
+
+"""Montgomery datamodule for TB detection (default protocol)
+
+* See :py:mod:`ptbench.data.montgomery` for more database details.
+
+This configuration:
+
+* Raw data input (on disk):
+
+    * PNG images 12 bit grayscale
+    * resolution: 4020 x 4892 px or 4892 x 4020 px
+
+* Output image:
+    
+    * Transforms:
+
+        * Load raw PNG with :py:mod:`PIL`
+        * Remove black borders
+        * Torch resizing (512 x 512 px)
+        * Torch center cropping (512 x 512 px)
+
+    * Final specifications
+
+        * Fixed resolution: 512 x 512 px
+        * Color RGB encoding
+"""
+
+import os
+
+import torchvision.transforms
+
+from ...utils.rc import load_rc
+from ..image_utils import RemoveBlackBorders, load_pil_baw
+from ..typing import RawDataLoader as _BaseRawDataLoader
+from ..typing import Sample
+
+
+class RawDataLoader(_BaseRawDataLoader):
+    """A specialized raw-data-loader for the Montgomery dataset.
+
+    Attributes
+    ----------
+
+    datadir
+        This variable contains the base directory where the database raw data
+        is stored.
+
+    transform
+        Transforms that are always applied to the loaded raw images.
+    """
+
+    datadir: str
+    transform: torchvision.transforms.Compose
+
+    def __init__(self):
+        self.datadir = load_rc().get(
+            "datadir.montgomery", os.path.realpath(os.curdir)
+        )
+
+        self.transform = torchvision.transforms.Compose(
+            [
+                RemoveBlackBorders(),
+                torchvision.transforms.Resize(512),
+                torchvision.transforms.CenterCrop(512),
+                torchvision.transforms.ToTensor(),
+            ]
+        )
+
+    def sample(self, sample: tuple[str, int]) -> Sample:
+        """Loads a single image sample from the disk.
+
+        Parameters
+        ----------
+
+        sample:
+            A tuple containing the path suffix, within the dataset root folder,
+            where to find the image to be loaded, and an integer, representing the
+            sample label.
+
+
+        Returns
+        -------
+
+        sample
+            The sample representation
+        """
+        tensor = self.transform(
+            load_pil_baw(os.path.join(self.datadir, sample[0]))
+        )
+
+        return tensor, dict(label=sample[1], name=sample[0])  # type: ignore[arg-type]
+
+    def label(self, sample: tuple[str, int]) -> int:
+        """Loads a single image sample label from the disk.
+
+        Parameters
+        ----------
+
+        sample:
+            A tuple containing the path suffix, within the dataset root folder,
+            where to find the image to be loaded, and an integer, representing the
+            sample label.
+
+
+        Returns
+        -------
+
+        label
+            The integer label associated with the sample
+        """
+        return sample[1]
diff --git a/src/ptbench/data/montgomery/rgb.py b/src/ptbench/data/montgomery/rgb.py
deleted file mode 100644
index c162126648f0baae5a921fa7f009da171fb8ccc7..0000000000000000000000000000000000000000
--- a/src/ptbench/data/montgomery/rgb.py
+++ /dev/null
@@ -1,47 +0,0 @@
-# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
-#
-# SPDX-License-Identifier: GPL-3.0-or-later
-
-"""Montgomery dataset for TB detection (default protocol, converted in RGB)
-
-* Split reference: first 64% of TB and healthy CXR for "train" 16% for
-* "validation", 20% for "test"
-* This configuration resolution: 512 x 512 (default)
-* See :py:mod:`ptbench.data.montgomery` for dataset details
-"""
-
-from clapper.logging import setup
-
-from .. import return_subsets
-from ..base_datamodule import BaseDataModule
-from . import _maker
-
-logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s")
-
-
-class DefaultModule(BaseDataModule):
-    def __init__(
-        self,
-        train_batch_size=1,
-        predict_batch_size=1,
-        drop_incomplete_batch=False,
-        multiproc_kwargs=None,
-    ):
-        super().__init__(
-            train_batch_size=train_batch_size,
-            predict_batch_size=predict_batch_size,
-            drop_incomplete_batch=drop_incomplete_batch,
-            multiproc_kwargs=multiproc_kwargs,
-        )
-
-    def setup(self, stage: str):
-        self.dataset = _maker("default", RGB=True)
-        (
-            self.train_dataset,
-            self.validation_dataset,
-            self.extra_validation_datasets,
-            self.predict_dataset,
-        ) = return_subsets(self.dataset)
-
-
-datamodule = DefaultModule