From 5d89b43721185297d419bc995ea336264314a3e9 Mon Sep 17 00:00:00 2001
From: Andre Anjos <andre.dos.anjos@gmail.com>
Date: Wed, 26 Jul 2023 23:22:45 +0200
Subject: [PATCH] [data.montgomery/shenzhen] Make all declarations into a
 single Python module (c.f. issue #27)

---
 .../montgomery/{loader.py => datamodules.py}  | 66 +++++++++++++++++-
 src/ptbench/data/montgomery/default.py        | 61 -----------------
 src/ptbench/data/montgomery/fold_0.py         | 23 -------
 src/ptbench/data/montgomery/fold_1.py         | 23 -------
 src/ptbench/data/montgomery/fold_2.py         | 23 -------
 src/ptbench/data/montgomery/fold_3.py         | 23 -------
 src/ptbench/data/montgomery/fold_4.py         | 23 -------
 src/ptbench/data/montgomery/fold_5.py         | 23 -------
 src/ptbench/data/montgomery/fold_6.py         | 23 -------
 src/ptbench/data/montgomery/fold_7.py         | 23 -------
 src/ptbench/data/montgomery/fold_8.py         | 23 -------
 src/ptbench/data/montgomery/fold_9.py         | 23 -------
 .../shenzhen/{loader.py => datamodules.py}    | 68 ++++++++++++++++++-
 src/ptbench/data/shenzhen/default.py          | 56 ---------------
 src/ptbench/data/shenzhen/fold_0.py           | 23 -------
 src/ptbench/data/shenzhen/fold_1.py           | 23 -------
 src/ptbench/data/shenzhen/fold_2.py           | 23 -------
 src/ptbench/data/shenzhen/fold_3.py           | 23 -------
 src/ptbench/data/shenzhen/fold_4.py           | 23 -------
 src/ptbench/data/shenzhen/fold_5.py           | 23 -------
 src/ptbench/data/shenzhen/fold_6.py           | 23 -------
 src/ptbench/data/shenzhen/fold_7.py           | 23 -------
 src/ptbench/data/shenzhen/fold_8.py           | 23 -------
 src/ptbench/data/shenzhen/fold_9.py           | 23 -------
 24 files changed, 130 insertions(+), 581 deletions(-)
 rename src/ptbench/data/montgomery/{loader.py => datamodules.py} (54%)
 delete mode 100644 src/ptbench/data/montgomery/default.py
 delete mode 100644 src/ptbench/data/montgomery/fold_0.py
 delete mode 100644 src/ptbench/data/montgomery/fold_1.py
 delete mode 100644 src/ptbench/data/montgomery/fold_2.py
 delete mode 100644 src/ptbench/data/montgomery/fold_3.py
 delete mode 100644 src/ptbench/data/montgomery/fold_4.py
 delete mode 100644 src/ptbench/data/montgomery/fold_5.py
 delete mode 100644 src/ptbench/data/montgomery/fold_6.py
 delete mode 100644 src/ptbench/data/montgomery/fold_7.py
 delete mode 100644 src/ptbench/data/montgomery/fold_8.py
 delete mode 100644 src/ptbench/data/montgomery/fold_9.py
 rename src/ptbench/data/shenzhen/{loader.py => datamodules.py} (52%)
 delete mode 100644 src/ptbench/data/shenzhen/default.py
 delete mode 100644 src/ptbench/data/shenzhen/fold_0.py
 delete mode 100644 src/ptbench/data/shenzhen/fold_1.py
 delete mode 100644 src/ptbench/data/shenzhen/fold_2.py
 delete mode 100644 src/ptbench/data/shenzhen/fold_3.py
 delete mode 100644 src/ptbench/data/shenzhen/fold_4.py
 delete mode 100644 src/ptbench/data/shenzhen/fold_5.py
 delete mode 100644 src/ptbench/data/shenzhen/fold_6.py
 delete mode 100644 src/ptbench/data/shenzhen/fold_7.py
 delete mode 100644 src/ptbench/data/shenzhen/fold_8.py
 delete mode 100644 src/ptbench/data/shenzhen/fold_9.py

diff --git a/src/ptbench/data/montgomery/loader.py b/src/ptbench/data/montgomery/datamodules.py
similarity index 54%
rename from src/ptbench/data/montgomery/loader.py
rename to src/ptbench/data/montgomery/datamodules.py
index ad856d5f..bf368c78 100644
--- a/src/ptbench/data/montgomery/loader.py
+++ b/src/ptbench/data/montgomery/datamodules.py
@@ -2,8 +2,7 @@
 #
 # SPDX-License-Identifier: GPL-3.0-or-later
 
-"""Specialized raw-data loaders for the Montgomery dataset."""
-
+import importlib.resources
 import os
 
 import PIL.Image
@@ -11,7 +10,9 @@ import PIL.Image
 from torchvision.transforms.functional import center_crop, to_tensor
 
 from ...utils.rc import load_rc
+from ..datamodule import CachingDataModule
 from ..image_utils import remove_black_borders
+from ..split import JSONDatabaseSplit
 from ..typing import RawDataLoader as _BaseRawDataLoader
 from ..typing import Sample
 
@@ -85,3 +86,64 @@ class RawDataLoader(_BaseRawDataLoader):
             The integer label associated with the sample
         """
         return sample[1]
+
+
+class DataModule(CachingDataModule):
+    """Montgomery datamodule for TB detection.
+
+    The standard digital image database for Tuberculosis was created by the National
+    Library of Medicine, Maryland, USA in collaboration with Shenzhen No.3 People’s
+    Hospital, Guangdong Medical College, Shenzhen, China. The Chest X-rays are from
+
+    * Database reference: [MONTGOMERY-SHENZHEN-2014]_
+    * Original resolution (height x width or width x height): 4020x4892 px or 4892x4020 px
+    * This split:
+
+      * Split reference: None
+      * Training samples: ?? of TB and healthy CXR
+      * Validation samples: ?? of TB and healthy CXR
+      * Test samples: ?? of TB and healthy CXR
+
+    Data specifications:
+
+    * Raw data input (on disk):
+
+        * PNG images 8 bit grayscale
+        * resolution: fixed to one of the cases above
+
+    * Output image:
+
+        * Transforms:
+
+            * Load raw PNG with :py:mod:`PIL`
+            * Remove black borders
+            * Torch center cropping to get square image
+
+        * Final specifications
+
+            * Grayscale, encoded as a single plane image, 8 bits
+            * Square (4020x4020 px)
+    """
+
+    def __init__(self, split_filename: str):
+        super().__init__(
+            database_split=JSONDatabaseSplit(
+                importlib.resources.files(__name__.rsplit(".", 1)[0]).joinpath(
+                    split_filename
+                )
+            ),
+            raw_data_loader=RawDataLoader(),
+        )
+
+
+default = DataModule("default.json.bz2")
+fold_0 = DataModule("fold_0.json.bz2")
+fold_1 = DataModule("fold_1.json.bz2")
+fold_2 = DataModule("fold_2.json.bz2")
+fold_3 = DataModule("fold_3.json.bz2")
+fold_4 = DataModule("fold_4.json.bz2")
+fold_5 = DataModule("fold_5.json.bz2")
+fold_6 = DataModule("fold_6.json.bz2")
+fold_7 = DataModule("fold_7.json.bz2")
+fold_8 = DataModule("fold_8.json.bz2")
+fold_9 = DataModule("fold_9.json.bz2")
diff --git a/src/ptbench/data/montgomery/default.py b/src/ptbench/data/montgomery/default.py
deleted file mode 100644
index bb57b9a7..00000000
--- a/src/ptbench/data/montgomery/default.py
+++ /dev/null
@@ -1,61 +0,0 @@
-# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
-#
-# SPDX-License-Identifier: GPL-3.0-or-later
-
-"""Montgomery datamodule for TB detection (``default`` protocol)
-
-The standard digital image database for Tuberculosis was created by the National
-Library of Medicine, Maryland, USA in collaboration with Shenzhen No.3 People’s
-Hospital, Guangdong Medical College, Shenzhen, China. The Chest X-rays are from
-
-* Database reference: [MONTGOMERY-SHENZHEN-2014]_
-* Original resolution (height x width or width x height): 4020x4892 px or 4892x4020 px
-* This split:
-
-  * Split reference: None
-  * Training samples: ?? of TB and healthy CXR
-  * Validation samples: ?? of TB and healthy CXR
-  * Test samples: ?? of TB and healthy CXR
-
-Data specifications:
-
-* Raw data input (on disk):
-
-    * PNG images 8 bit grayscale
-    * resolution: fixed to one of the cases above
-
-* Output image:
-
-    * Transforms:
-
-        * Load raw PNG with :py:mod:`PIL`
-        * Remove black borders
-        * Torch center cropping to get square image
-
-    * Final specifications
-
-        * Grayscale, encoded as a single plane image, 8 bits
-        * Square (4020x4020 px)
-
-
-Protocol ``default``:
-
-    * Training samples: first 64% of TB and healthy CXR (including labels)
-    * Validation samples: 16% of TB and healthy CXR (including labels)
-    * Test samples: 20% of TB and healty CXR (including labels)
-"""
-
-import importlib.resources
-
-from ..datamodule import CachingDataModule
-from ..split import JSONDatabaseSplit
-from .loader import RawDataLoader
-
-datamodule = CachingDataModule(
-    database_split=JSONDatabaseSplit(
-        importlib.resources.files(__name__.rsplit(".", 1)[0]).joinpath(
-            "default.json.bz2"
-        )
-    ),
-    raw_data_loader=RawDataLoader(),
-)
diff --git a/src/ptbench/data/montgomery/fold_0.py b/src/ptbench/data/montgomery/fold_0.py
deleted file mode 100644
index e50d2e30..00000000
--- a/src/ptbench/data/montgomery/fold_0.py
+++ /dev/null
@@ -1,23 +0,0 @@
-# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
-#
-# SPDX-License-Identifier: GPL-3.0-or-later
-
-"""Montgomery datamodule for TB detection (``fold 0`` protocol)
-
-See :py:mod:`ptbench.data.montgomery.default` for input/output details.
-"""
-
-import importlib.resources
-
-from ..datamodule import CachingDataModule
-from ..split import JSONDatabaseSplit
-from .loader import RawDataLoader
-
-datamodule = CachingDataModule(
-    database_split=JSONDatabaseSplit(
-        importlib.resources.files(__name__.rsplit(".", 1)[0]).joinpath(
-            "fold_0.json.bz2"
-        )
-    ),
-    raw_data_loader=RawDataLoader(),
-)
diff --git a/src/ptbench/data/montgomery/fold_1.py b/src/ptbench/data/montgomery/fold_1.py
deleted file mode 100644
index 3698a9ed..00000000
--- a/src/ptbench/data/montgomery/fold_1.py
+++ /dev/null
@@ -1,23 +0,0 @@
-# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
-#
-# SPDX-License-Identifier: GPL-3.0-or-later
-
-"""Montgomery datamodule for TB detection (default protocol)
-
-See :py:mod:`ptbench.data.montgomery.default` for input/output details.
-"""
-
-import importlib.resources
-
-from ..datamodule import CachingDataModule
-from ..split import JSONDatabaseSplit
-from .loader import RawDataLoader
-
-datamodule = CachingDataModule(
-    database_split=JSONDatabaseSplit(
-        importlib.resources.files(__name__.rsplit(".", 1)[0]).joinpath(
-            "fold_1.json.bz2"
-        )
-    ),
-    raw_data_loader=RawDataLoader(),
-)
diff --git a/src/ptbench/data/montgomery/fold_2.py b/src/ptbench/data/montgomery/fold_2.py
deleted file mode 100644
index b2d7ac2c..00000000
--- a/src/ptbench/data/montgomery/fold_2.py
+++ /dev/null
@@ -1,23 +0,0 @@
-# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
-#
-# SPDX-License-Identifier: GPL-3.0-or-later
-
-"""Montgomery datamodule for TB detection (default protocol)
-
-See :py:mod:`ptbench.data.montgomery.default` for input/output details.
-"""
-
-import importlib.resources
-
-from ..datamodule import CachingDataModule
-from ..split import JSONDatabaseSplit
-from .loader import RawDataLoader
-
-datamodule = CachingDataModule(
-    database_split=JSONDatabaseSplit(
-        importlib.resources.files(__name__.rsplit(".", 1)[0]).joinpath(
-            "fold_2.json.bz2"
-        )
-    ),
-    raw_data_loader=RawDataLoader(),
-)
diff --git a/src/ptbench/data/montgomery/fold_3.py b/src/ptbench/data/montgomery/fold_3.py
deleted file mode 100644
index 1c566e4f..00000000
--- a/src/ptbench/data/montgomery/fold_3.py
+++ /dev/null
@@ -1,23 +0,0 @@
-# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
-#
-# SPDX-License-Identifier: GPL-3.0-or-later
-
-"""Montgomery datamodule for TB detection (default protocol)
-
-See :py:mod:`ptbench.data.montgomery.default` for input/output details.
-"""
-
-import importlib.resources
-
-from ..datamodule import CachingDataModule
-from ..split import JSONDatabaseSplit
-from .loader import RawDataLoader
-
-datamodule = CachingDataModule(
-    database_split=JSONDatabaseSplit(
-        importlib.resources.files(__name__.rsplit(".", 1)[0]).joinpath(
-            "fold_3.json.bz2"
-        )
-    ),
-    raw_data_loader=RawDataLoader(),
-)
diff --git a/src/ptbench/data/montgomery/fold_4.py b/src/ptbench/data/montgomery/fold_4.py
deleted file mode 100644
index 4b68bd53..00000000
--- a/src/ptbench/data/montgomery/fold_4.py
+++ /dev/null
@@ -1,23 +0,0 @@
-# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
-#
-# SPDX-License-Identifier: GPL-3.0-or-later
-
-"""Montgomery datamodule for TB detection (default protocol)
-
-See :py:mod:`ptbench.data.montgomery.default` for input/output details.
-"""
-
-import importlib.resources
-
-from ..datamodule import CachingDataModule
-from ..split import JSONDatabaseSplit
-from .loader import RawDataLoader
-
-datamodule = CachingDataModule(
-    database_split=JSONDatabaseSplit(
-        importlib.resources.files(__name__.rsplit(".", 1)[0]).joinpath(
-            "fold_4.json.bz2"
-        )
-    ),
-    raw_data_loader=RawDataLoader(),
-)
diff --git a/src/ptbench/data/montgomery/fold_5.py b/src/ptbench/data/montgomery/fold_5.py
deleted file mode 100644
index 59891e8e..00000000
--- a/src/ptbench/data/montgomery/fold_5.py
+++ /dev/null
@@ -1,23 +0,0 @@
-# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
-#
-# SPDX-License-Identifier: GPL-3.0-or-later
-
-"""Montgomery datamodule for TB detection (default protocol)
-
-See :py:mod:`ptbench.data.montgomery.default` for input/output details.
-"""
-
-import importlib.resources
-
-from ..datamodule import CachingDataModule
-from ..split import JSONDatabaseSplit
-from .loader import RawDataLoader
-
-datamodule = CachingDataModule(
-    database_split=JSONDatabaseSplit(
-        importlib.resources.files(__name__.rsplit(".", 1)[0]).joinpath(
-            "fold_5.json.bz2"
-        )
-    ),
-    raw_data_loader=RawDataLoader(),
-)
diff --git a/src/ptbench/data/montgomery/fold_6.py b/src/ptbench/data/montgomery/fold_6.py
deleted file mode 100644
index e6c1d31a..00000000
--- a/src/ptbench/data/montgomery/fold_6.py
+++ /dev/null
@@ -1,23 +0,0 @@
-# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
-#
-# SPDX-License-Identifier: GPL-3.0-or-later
-
-"""Montgomery datamodule for TB detection (default protocol)
-
-See :py:mod:`ptbench.data.montgomery.default` for input/output details.
-"""
-
-import importlib.resources
-
-from ..datamodule import CachingDataModule
-from ..split import JSONDatabaseSplit
-from .loader import RawDataLoader
-
-datamodule = CachingDataModule(
-    database_split=JSONDatabaseSplit(
-        importlib.resources.files(__name__.rsplit(".", 1)[0]).joinpath(
-            "fold_6.json.bz2"
-        )
-    ),
-    raw_data_loader=RawDataLoader(),
-)
diff --git a/src/ptbench/data/montgomery/fold_7.py b/src/ptbench/data/montgomery/fold_7.py
deleted file mode 100644
index 44dd8051..00000000
--- a/src/ptbench/data/montgomery/fold_7.py
+++ /dev/null
@@ -1,23 +0,0 @@
-# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
-#
-# SPDX-License-Identifier: GPL-3.0-or-later
-
-"""Montgomery datamodule for TB detection (default protocol)
-
-See :py:mod:`ptbench.data.montgomery.default` for input/output details.
-"""
-
-import importlib.resources
-
-from ..datamodule import CachingDataModule
-from ..split import JSONDatabaseSplit
-from .loader import RawDataLoader
-
-datamodule = CachingDataModule(
-    database_split=JSONDatabaseSplit(
-        importlib.resources.files(__name__.rsplit(".", 1)[0]).joinpath(
-            "fold_7.json.bz2"
-        )
-    ),
-    raw_data_loader=RawDataLoader(),
-)
diff --git a/src/ptbench/data/montgomery/fold_8.py b/src/ptbench/data/montgomery/fold_8.py
deleted file mode 100644
index fd7edde6..00000000
--- a/src/ptbench/data/montgomery/fold_8.py
+++ /dev/null
@@ -1,23 +0,0 @@
-# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
-#
-# SPDX-License-Identifier: GPL-3.0-or-later
-
-"""Montgomery datamodule for TB detection (default protocol)
-
-See :py:mod:`ptbench.data.montgomery.default` for input/output details.
-"""
-
-import importlib.resources
-
-from ..datamodule import CachingDataModule
-from ..split import JSONDatabaseSplit
-from .loader import RawDataLoader
-
-datamodule = CachingDataModule(
-    database_split=JSONDatabaseSplit(
-        importlib.resources.files(__name__.rsplit(".", 1)[0]).joinpath(
-            "fold_8.json.bz2"
-        )
-    ),
-    raw_data_loader=RawDataLoader(),
-)
diff --git a/src/ptbench/data/montgomery/fold_9.py b/src/ptbench/data/montgomery/fold_9.py
deleted file mode 100644
index 91228362..00000000
--- a/src/ptbench/data/montgomery/fold_9.py
+++ /dev/null
@@ -1,23 +0,0 @@
-# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
-#
-# SPDX-License-Identifier: GPL-3.0-or-later
-
-"""Montgomery datamodule for TB detection (default protocol)
-
-See :py:mod:`ptbench.data.montgomery.default` for input/output details.
-"""
-
-import importlib.resources
-
-from ..datamodule import CachingDataModule
-from ..split import JSONDatabaseSplit
-from .loader import RawDataLoader
-
-datamodule = CachingDataModule(
-    database_split=JSONDatabaseSplit(
-        importlib.resources.files(__name__.rsplit(".", 1)[0]).joinpath(
-            "fold_9.json.bz2"
-        )
-    ),
-    raw_data_loader=RawDataLoader(),
-)
diff --git a/src/ptbench/data/shenzhen/loader.py b/src/ptbench/data/shenzhen/datamodules.py
similarity index 52%
rename from src/ptbench/data/shenzhen/loader.py
rename to src/ptbench/data/shenzhen/datamodules.py
index 3409fed2..f0f68e26 100644
--- a/src/ptbench/data/shenzhen/loader.py
+++ b/src/ptbench/data/shenzhen/datamodules.py
@@ -2,8 +2,7 @@
 #
 # SPDX-License-Identifier: GPL-3.0-or-later
 
-"""Specialized raw-data loaders for the Shenzen dataset."""
-
+import importlib.resources
 import os
 
 import PIL.Image
@@ -11,7 +10,9 @@ import PIL.Image
 from torchvision.transforms.functional import center_crop, to_tensor
 
 from ...utils.rc import load_rc
+from ..datamodule import CachingDataModule
 from ..image_utils import remove_black_borders
+from ..split import JSONDatabaseSplit
 from ..typing import RawDataLoader as _BaseRawDataLoader
 from ..typing import Sample
 
@@ -90,3 +91,66 @@ class RawDataLoader(_BaseRawDataLoader):
             The integer label associated with the sample
         """
         return sample[1]
+
+
+class DataModule(CachingDataModule):
+    """Shenzhen datamodule for computer-aided diagnosis.
+
+    The standard digital image database for Tuberculosis was created by the National
+    Library of Medicine, Maryland, USA in collaboration with Shenzhen No.3 People’s
+    Hospital, Guangdong Medical College, Shenzhen, China. The Chest X-rays are from
+    out-patient clinics, and were captured as part of the daily routine using
+    Philips DR Digital Diagnose systems.
+
+    * Database reference: [MONTGOMERY-SHENZHEN-2014]_
+    * Original resolution (height x width or width x height): 3000 x 3000 or less
+    * This split:
+
+      * Split reference: None
+      * Training samples: 64% of TB and healthy CXR (including labels)
+      * Validation samples: 16% of TB and healthy CXR (including labels)
+      * Test samples: 20% of TB and healthy CXR (including labels)
+
+    Data specifications:
+
+    * Raw data input (on disk):
+
+        * PNG images (grayscale, encoded as RGB images with "inverted" grayscale scale)
+        * Variable width and height
+
+    * Output image:
+
+        * Transforms:
+
+            * Load raw PNG with :py:mod:`PIL`
+            * Remove black borders
+            * Torch center cropping to get square image
+
+        * Final specifications:
+
+            * Grayscale, encoded as a single plane image, 8 bits
+            * Square, with varying resolutions, depending on the input image
+    """
+
+    def __init__(self, split_filename: str):
+        super().__init__(
+            database_split=JSONDatabaseSplit(
+                importlib.resources.files(__name__.rsplit(".", 1)[0]).joinpath(
+                    split_filename
+                )
+            ),
+            raw_data_loader=RawDataLoader(),
+        )
+
+
+default = DataModule("default.json.bz2")
+fold_0 = DataModule("fold_0.json.bz2")
+fold_1 = DataModule("fold_1.json.bz2")
+fold_2 = DataModule("fold_2.json.bz2")
+fold_3 = DataModule("fold_3.json.bz2")
+fold_4 = DataModule("fold_4.json.bz2")
+fold_5 = DataModule("fold_5.json.bz2")
+fold_6 = DataModule("fold_6.json.bz2")
+fold_7 = DataModule("fold_7.json.bz2")
+fold_8 = DataModule("fold_8.json.bz2")
+fold_9 = DataModule("fold_9.json.bz2")
diff --git a/src/ptbench/data/shenzhen/default.py b/src/ptbench/data/shenzhen/default.py
deleted file mode 100644
index a163b9bc..00000000
--- a/src/ptbench/data/shenzhen/default.py
+++ /dev/null
@@ -1,56 +0,0 @@
-# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
-#
-# SPDX-License-Identifier: GPL-3.0-or-later
-
-"""Shenzhen datamodule for computer-aided diagnosis (``default`` protocol)
-
-The standard digital image database for Tuberculosis was created by the National
-Library of Medicine, Maryland, USA in collaboration with Shenzhen No.3 People’s
-Hospital, Guangdong Medical College, Shenzhen, China. The Chest X-rays are from
-out-patient clinics, and were captured as part of the daily routine using
-Philips DR Digital Diagnose systems.
-
-* Database reference: [MONTGOMERY-SHENZHEN-2014]_
-* Original resolution (height x width or width x height): 3000 x 3000 or less
-* This split:
-
-  * Split reference: None
-  * Training samples: 64% of TB and healthy CXR (including labels)
-  * Validation samples: 16% of TB and healthy CXR (including labels)
-  * Test samples: 20% of TB and healthy CXR (including labels)
-
-Data specifications:
-
-* Raw data input (on disk):
-
-    * PNG images (grayscale, encoded as RGB images with "inverted" grayscale scale)
-    * Variable width and height
-
-* Output image:
-
-    * Transforms:
-
-        * Load raw PNG with :py:mod:`PIL`
-        * Remove black borders
-        * Torch center cropping to get square image
-
-    * Final specifications:
-
-        * Grayscale, encoded as a single plane image, 8 bits
-        * Square, with varying resolutions, depending on the input image
-"""
-
-import importlib.resources
-
-from ..datamodule import CachingDataModule
-from ..split import JSONDatabaseSplit
-from .loader import RawDataLoader
-
-datamodule = CachingDataModule(
-    database_split=JSONDatabaseSplit(
-        importlib.resources.files(__name__.rsplit(".", 1)[0]).joinpath(
-            "default.json.bz2"
-        )
-    ),
-    raw_data_loader=RawDataLoader(),
-)
diff --git a/src/ptbench/data/shenzhen/fold_0.py b/src/ptbench/data/shenzhen/fold_0.py
deleted file mode 100644
index b5059744..00000000
--- a/src/ptbench/data/shenzhen/fold_0.py
+++ /dev/null
@@ -1,23 +0,0 @@
-# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
-#
-# SPDX-License-Identifier: GPL-3.0-or-later
-
-"""Shenzhen datamodule for computer-aided diagnosis (fold 0)
-
-See :py:mod:`ptbench.data.shenzhen.default` for input/output details.
-"""
-
-import importlib.resources
-
-from ..datamodule import CachingDataModule
-from ..split import JSONDatabaseSplit
-from .loader import RawDataLoader
-
-datamodule = CachingDataModule(
-    database_split=JSONDatabaseSplit(
-        importlib.resources.files(__name__.rsplit(".", 1)[0]).joinpath(
-            "fold_0.json.bz2"
-        )
-    ),
-    raw_data_loader=RawDataLoader(),
-)
diff --git a/src/ptbench/data/shenzhen/fold_1.py b/src/ptbench/data/shenzhen/fold_1.py
deleted file mode 100644
index 1041c3e4..00000000
--- a/src/ptbench/data/shenzhen/fold_1.py
+++ /dev/null
@@ -1,23 +0,0 @@
-# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
-#
-# SPDX-License-Identifier: GPL-3.0-or-later
-
-"""Shenzhen datamodule for computer-aided diagnosis (fold 1)
-
-See :py:mod:`ptbench.data.shenzhen.default` for input/output details.
-"""
-
-import importlib.resources
-
-from ..datamodule import CachingDataModule
-from ..split import JSONDatabaseSplit
-from .loader import RawDataLoader
-
-datamodule = CachingDataModule(
-    database_split=JSONDatabaseSplit(
-        importlib.resources.files(__name__.rsplit(".", 1)[0]).joinpath(
-            "fold_1.json.bz2"
-        )
-    ),
-    raw_data_loader=RawDataLoader(),
-)
diff --git a/src/ptbench/data/shenzhen/fold_2.py b/src/ptbench/data/shenzhen/fold_2.py
deleted file mode 100644
index 5026116a..00000000
--- a/src/ptbench/data/shenzhen/fold_2.py
+++ /dev/null
@@ -1,23 +0,0 @@
-# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
-#
-# SPDX-License-Identifier: GPL-3.0-or-later
-
-"""Shenzhen datamodule for computer-aided diagnosis (fold 2)
-
-See :py:mod:`ptbench.data.shenzhen.default` for input/output details.
-"""
-
-import importlib.resources
-
-from ..datamodule import CachingDataModule
-from ..split import JSONDatabaseSplit
-from .loader import RawDataLoader
-
-datamodule = CachingDataModule(
-    database_split=JSONDatabaseSplit(
-        importlib.resources.files(__name__.rsplit(".", 1)[0]).joinpath(
-            "fold_2.json.bz2"
-        )
-    ),
-    raw_data_loader=RawDataLoader(),
-)
diff --git a/src/ptbench/data/shenzhen/fold_3.py b/src/ptbench/data/shenzhen/fold_3.py
deleted file mode 100644
index 16c00157..00000000
--- a/src/ptbench/data/shenzhen/fold_3.py
+++ /dev/null
@@ -1,23 +0,0 @@
-# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
-#
-# SPDX-License-Identifier: GPL-3.0-or-later
-
-"""Shenzhen datamodule for computer-aided diagnosis (fold 3)
-
-See :py:mod:`ptbench.data.shenzhen.default` for input/output details.
-"""
-
-import importlib.resources
-
-from ..datamodule import CachingDataModule
-from ..split import JSONDatabaseSplit
-from .loader import RawDataLoader
-
-datamodule = CachingDataModule(
-    database_split=JSONDatabaseSplit(
-        importlib.resources.files(__name__.rsplit(".", 1)[0]).joinpath(
-            "fold_3.json.bz2"
-        )
-    ),
-    raw_data_loader=RawDataLoader(),
-)
diff --git a/src/ptbench/data/shenzhen/fold_4.py b/src/ptbench/data/shenzhen/fold_4.py
deleted file mode 100644
index c0b0fdac..00000000
--- a/src/ptbench/data/shenzhen/fold_4.py
+++ /dev/null
@@ -1,23 +0,0 @@
-# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
-#
-# SPDX-License-Identifier: GPL-3.0-or-later
-
-"""Shenzhen datamodule for computer-aided diagnosis (fold 4)
-
-See :py:mod:`ptbench.data.shenzhen.default` for input/output details.
-"""
-
-import importlib.resources
-
-from ..datamodule import CachingDataModule
-from ..split import JSONDatabaseSplit
-from .loader import RawDataLoader
-
-datamodule = CachingDataModule(
-    database_split=JSONDatabaseSplit(
-        importlib.resources.files(__name__.rsplit(".", 1)[0]).joinpath(
-            "fold_4.json.bz2"
-        )
-    ),
-    raw_data_loader=RawDataLoader(),
-)
diff --git a/src/ptbench/data/shenzhen/fold_5.py b/src/ptbench/data/shenzhen/fold_5.py
deleted file mode 100644
index 0397955e..00000000
--- a/src/ptbench/data/shenzhen/fold_5.py
+++ /dev/null
@@ -1,23 +0,0 @@
-# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
-#
-# SPDX-License-Identifier: GPL-3.0-or-later
-
-"""Shenzhen datamodule for computer-aided diagnosis (fold 5)
-
-See :py:mod:`ptbench.data.shenzhen.default` for input/output details.
-"""
-
-import importlib.resources
-
-from ..datamodule import CachingDataModule
-from ..split import JSONDatabaseSplit
-from .loader import RawDataLoader
-
-datamodule = CachingDataModule(
-    database_split=JSONDatabaseSplit(
-        importlib.resources.files(__name__.rsplit(".", 1)[0]).joinpath(
-            "fold_5.json.bz2"
-        )
-    ),
-    raw_data_loader=RawDataLoader(),
-)
diff --git a/src/ptbench/data/shenzhen/fold_6.py b/src/ptbench/data/shenzhen/fold_6.py
deleted file mode 100644
index 145685ea..00000000
--- a/src/ptbench/data/shenzhen/fold_6.py
+++ /dev/null
@@ -1,23 +0,0 @@
-# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
-#
-# SPDX-License-Identifier: GPL-3.0-or-later
-
-"""Shenzhen datamodule for computer-aided diagnosis (fold 6)
-
-See :py:mod:`ptbench.data.shenzhen.default` for input/output details.
-"""
-
-import importlib.resources
-
-from ..datamodule import CachingDataModule
-from ..split import JSONDatabaseSplit
-from .loader import RawDataLoader
-
-datamodule = CachingDataModule(
-    database_split=JSONDatabaseSplit(
-        importlib.resources.files(__name__.rsplit(".", 1)[0]).joinpath(
-            "fold_6.json.bz2"
-        )
-    ),
-    raw_data_loader=RawDataLoader(),
-)
diff --git a/src/ptbench/data/shenzhen/fold_7.py b/src/ptbench/data/shenzhen/fold_7.py
deleted file mode 100644
index 5b8d7403..00000000
--- a/src/ptbench/data/shenzhen/fold_7.py
+++ /dev/null
@@ -1,23 +0,0 @@
-# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
-#
-# SPDX-License-Identifier: GPL-3.0-or-later
-
-"""Shenzhen datamodule for computer-aided diagnosis (fold 7)
-
-See :py:mod:`ptbench.data.shenzhen.default` for input/output details.
-"""
-
-import importlib.resources
-
-from ..datamodule import CachingDataModule
-from ..split import JSONDatabaseSplit
-from .loader import RawDataLoader
-
-datamodule = CachingDataModule(
-    database_split=JSONDatabaseSplit(
-        importlib.resources.files(__name__.rsplit(".", 1)[0]).joinpath(
-            "fold_7.json.bz2"
-        )
-    ),
-    raw_data_loader=RawDataLoader(),
-)
diff --git a/src/ptbench/data/shenzhen/fold_8.py b/src/ptbench/data/shenzhen/fold_8.py
deleted file mode 100644
index e9ce1a2f..00000000
--- a/src/ptbench/data/shenzhen/fold_8.py
+++ /dev/null
@@ -1,23 +0,0 @@
-# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
-#
-# SPDX-License-Identifier: GPL-3.0-or-later
-
-"""Shenzhen datamodule for computer-aided diagnosis (fold 8)
-
-See :py:mod:`ptbench.data.shenzhen.default` for input/output details.
-"""
-
-import importlib.resources
-
-from ..datamodule import CachingDataModule
-from ..split import JSONDatabaseSplit
-from .loader import RawDataLoader
-
-datamodule = CachingDataModule(
-    database_split=JSONDatabaseSplit(
-        importlib.resources.files(__name__.rsplit(".", 1)[0]).joinpath(
-            "fold_8.json.bz2"
-        )
-    ),
-    raw_data_loader=RawDataLoader(),
-)
diff --git a/src/ptbench/data/shenzhen/fold_9.py b/src/ptbench/data/shenzhen/fold_9.py
deleted file mode 100644
index 6da8dd3d..00000000
--- a/src/ptbench/data/shenzhen/fold_9.py
+++ /dev/null
@@ -1,23 +0,0 @@
-# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
-#
-# SPDX-License-Identifier: GPL-3.0-or-later
-
-"""Shenzhen datamodule for computer-aided diagnosis (fold 9)
-
-See :py:mod:`ptbench.data.shenzhen.default` for input/output details.
-"""
-
-import importlib.resources
-
-from ..datamodule import CachingDataModule
-from ..split import JSONDatabaseSplit
-from .loader import RawDataLoader
-
-datamodule = CachingDataModule(
-    database_split=JSONDatabaseSplit(
-        importlib.resources.files(__name__.rsplit(".", 1)[0]).joinpath(
-            "fold_9.json.bz2"
-        )
-    ),
-    raw_data_loader=RawDataLoader(),
-)
-- 
GitLab