From 2c39d7d9f994d2d4899d05146f53e8e61743b50d Mon Sep 17 00:00:00 2001
From: dcarron <daniel.carron@idiap.ch>
Date: Thu, 1 Jun 2023 11:39:36 +0200
Subject: [PATCH] Moved shenzhen configs to data

---
 pyproject.toml                                | 42 ++++++++---------
 .../configs/datasets/shenzhen/__init__.py     | 29 ------------
 .../configs/datasets/shenzhen/fold_0.py       | 14 ------
 .../configs/datasets/shenzhen/fold_0_rgb.py   | 14 ------
 .../configs/datasets/shenzhen/fold_1.py       | 14 ------
 .../configs/datasets/shenzhen/fold_1_rgb.py   | 14 ------
 .../configs/datasets/shenzhen/fold_2.py       | 14 ------
 .../configs/datasets/shenzhen/fold_2_rgb.py   | 14 ------
 .../configs/datasets/shenzhen/fold_3.py       | 14 ------
 .../configs/datasets/shenzhen/fold_3_rgb.py   | 14 ------
 .../configs/datasets/shenzhen/fold_4.py       | 14 ------
 .../configs/datasets/shenzhen/fold_4_rgb.py   | 14 ------
 .../configs/datasets/shenzhen/fold_5.py       | 14 ------
 .../configs/datasets/shenzhen/fold_5_rgb.py   | 14 ------
 .../configs/datasets/shenzhen/fold_6.py       | 14 ------
 .../configs/datasets/shenzhen/fold_6_rgb.py   | 14 ------
 .../configs/datasets/shenzhen/fold_7.py       | 14 ------
 .../configs/datasets/shenzhen/fold_7_rgb.py   | 14 ------
 .../configs/datasets/shenzhen/fold_8.py       | 14 ------
 .../configs/datasets/shenzhen/fold_8_rgb.py   | 14 ------
 .../configs/datasets/shenzhen/fold_9.py       | 14 ------
 .../configs/datasets/shenzhen/fold_9_rgb.py   | 14 ------
 src/ptbench/configs/datasets/shenzhen/rgb.py  | 15 ------
 src/ptbench/data/shenzhen/default.py          |  8 ++++
 src/ptbench/data/shenzhen/fold_0.py           | 45 ++++++++++++++++++
 src/ptbench/data/shenzhen/fold_0_rgb.py       | 45 ++++++++++++++++++
 src/ptbench/data/shenzhen/fold_1.py           | 45 ++++++++++++++++++
 src/ptbench/data/shenzhen/fold_1_rgb.py       | 46 +++++++++++++++++++
 src/ptbench/data/shenzhen/fold_2.py           | 45 ++++++++++++++++++
 src/ptbench/data/shenzhen/fold_2_rgb.py       | 45 ++++++++++++++++++
 src/ptbench/data/shenzhen/fold_3.py           | 45 ++++++++++++++++++
 src/ptbench/data/shenzhen/fold_3_rgb.py       | 45 ++++++++++++++++++
 src/ptbench/data/shenzhen/fold_4.py           | 45 ++++++++++++++++++
 src/ptbench/data/shenzhen/fold_4_rgb.py       | 45 ++++++++++++++++++
 src/ptbench/data/shenzhen/fold_5.py           | 45 ++++++++++++++++++
 src/ptbench/data/shenzhen/fold_5_rgb.py       | 45 ++++++++++++++++++
 src/ptbench/data/shenzhen/fold_6.py           | 45 ++++++++++++++++++
 src/ptbench/data/shenzhen/fold_6_rgb.py       | 45 ++++++++++++++++++
 src/ptbench/data/shenzhen/fold_7.py           | 45 ++++++++++++++++++
 src/ptbench/data/shenzhen/fold_7_rgb.py       | 45 ++++++++++++++++++
 src/ptbench/data/shenzhen/fold_8.py           | 45 ++++++++++++++++++
 src/ptbench/data/shenzhen/fold_8_rgb.py       | 45 ++++++++++++++++++
 src/ptbench/data/shenzhen/fold_9.py           | 45 ++++++++++++++++++
 src/ptbench/data/shenzhen/fold_9_rgb.py       | 45 ++++++++++++++++++
 src/ptbench/data/shenzhen/rgb.py              | 46 +++++++++++++++++++
 45 files changed, 976 insertions(+), 345 deletions(-)
 delete mode 100644 src/ptbench/configs/datasets/shenzhen/__init__.py
 delete mode 100644 src/ptbench/configs/datasets/shenzhen/fold_0.py
 delete mode 100644 src/ptbench/configs/datasets/shenzhen/fold_0_rgb.py
 delete mode 100644 src/ptbench/configs/datasets/shenzhen/fold_1.py
 delete mode 100644 src/ptbench/configs/datasets/shenzhen/fold_1_rgb.py
 delete mode 100644 src/ptbench/configs/datasets/shenzhen/fold_2.py
 delete mode 100644 src/ptbench/configs/datasets/shenzhen/fold_2_rgb.py
 delete mode 100644 src/ptbench/configs/datasets/shenzhen/fold_3.py
 delete mode 100644 src/ptbench/configs/datasets/shenzhen/fold_3_rgb.py
 delete mode 100644 src/ptbench/configs/datasets/shenzhen/fold_4.py
 delete mode 100644 src/ptbench/configs/datasets/shenzhen/fold_4_rgb.py
 delete mode 100644 src/ptbench/configs/datasets/shenzhen/fold_5.py
 delete mode 100644 src/ptbench/configs/datasets/shenzhen/fold_5_rgb.py
 delete mode 100644 src/ptbench/configs/datasets/shenzhen/fold_6.py
 delete mode 100644 src/ptbench/configs/datasets/shenzhen/fold_6_rgb.py
 delete mode 100644 src/ptbench/configs/datasets/shenzhen/fold_7.py
 delete mode 100644 src/ptbench/configs/datasets/shenzhen/fold_7_rgb.py
 delete mode 100644 src/ptbench/configs/datasets/shenzhen/fold_8.py
 delete mode 100644 src/ptbench/configs/datasets/shenzhen/fold_8_rgb.py
 delete mode 100644 src/ptbench/configs/datasets/shenzhen/fold_9.py
 delete mode 100644 src/ptbench/configs/datasets/shenzhen/fold_9_rgb.py
 delete mode 100644 src/ptbench/configs/datasets/shenzhen/rgb.py
 create mode 100644 src/ptbench/data/shenzhen/fold_0.py
 create mode 100644 src/ptbench/data/shenzhen/fold_0_rgb.py
 create mode 100644 src/ptbench/data/shenzhen/fold_1.py
 create mode 100644 src/ptbench/data/shenzhen/fold_1_rgb.py
 create mode 100644 src/ptbench/data/shenzhen/fold_2.py
 create mode 100644 src/ptbench/data/shenzhen/fold_2_rgb.py
 create mode 100644 src/ptbench/data/shenzhen/fold_3.py
 create mode 100644 src/ptbench/data/shenzhen/fold_3_rgb.py
 create mode 100644 src/ptbench/data/shenzhen/fold_4.py
 create mode 100644 src/ptbench/data/shenzhen/fold_4_rgb.py
 create mode 100644 src/ptbench/data/shenzhen/fold_5.py
 create mode 100644 src/ptbench/data/shenzhen/fold_5_rgb.py
 create mode 100644 src/ptbench/data/shenzhen/fold_6.py
 create mode 100644 src/ptbench/data/shenzhen/fold_6_rgb.py
 create mode 100644 src/ptbench/data/shenzhen/fold_7.py
 create mode 100644 src/ptbench/data/shenzhen/fold_7_rgb.py
 create mode 100644 src/ptbench/data/shenzhen/fold_8.py
 create mode 100644 src/ptbench/data/shenzhen/fold_8_rgb.py
 create mode 100644 src/ptbench/data/shenzhen/fold_9.py
 create mode 100644 src/ptbench/data/shenzhen/fold_9_rgb.py
 create mode 100644 src/ptbench/data/shenzhen/rgb.py

diff --git a/pyproject.toml b/pyproject.toml
index f22d566a..40c7a8fa 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -118,27 +118,27 @@ montgomery_rs_f8 = "ptbench.configs.datasets.montgomery_RS.fold_8"
 montgomery_rs_f9 = "ptbench.configs.datasets.montgomery_RS.fold_9"
 # shenzhen dataset (and cross-validation folds)
 shenzhen = "ptbench.data.shenzhen.default"
-shenzhen_rgb = "ptbench.configs.datasets.shenzhen.rgb"
-shenzhen_f0 = "ptbench.configs.datasets.shenzhen.fold_0"
-shenzhen_f1 = "ptbench.configs.datasets.shenzhen.fold_1"
-shenzhen_f2 = "ptbench.configs.datasets.shenzhen.fold_2"
-shenzhen_f3 = "ptbench.configs.datasets.shenzhen.fold_3"
-shenzhen_f4 = "ptbench.configs.datasets.shenzhen.fold_4"
-shenzhen_f5 = "ptbench.configs.datasets.shenzhen.fold_5"
-shenzhen_f6 = "ptbench.configs.datasets.shenzhen.fold_6"
-shenzhen_f7 = "ptbench.configs.datasets.shenzhen.fold_7"
-shenzhen_f8 = "ptbench.configs.datasets.shenzhen.fold_8"
-shenzhen_f9 = "ptbench.configs.datasets.shenzhen.fold_9"
-shenzhen_f0_rgb = "ptbench.configs.datasets.shenzhen.fold_0_rgb"
-shenzhen_f1_rgb = "ptbench.configs.datasets.shenzhen.fold_1_rgb"
-shenzhen_f2_rgb = "ptbench.configs.datasets.shenzhen.fold_2_rgb"
-shenzhen_f3_rgb = "ptbench.configs.datasets.shenzhen.fold_3_rgb"
-shenzhen_f4_rgb = "ptbench.configs.datasets.shenzhen.fold_4_rgb"
-shenzhen_f5_rgb = "ptbench.configs.datasets.shenzhen.fold_5_rgb"
-shenzhen_f6_rgb = "ptbench.configs.datasets.shenzhen.fold_6_rgb"
-shenzhen_f7_rgb = "ptbench.configs.datasets.shenzhen.fold_7_rgb"
-shenzhen_f8_rgb = "ptbench.configs.datasets.shenzhen.fold_8_rgb"
-shenzhen_f9_rgb = "ptbench.configs.datasets.shenzhen.fold_9_rgb"
+shenzhen_rgb = "ptbench.data.shenzhen.rgb"
+shenzhen_f0 = "ptbench.data.shenzhen.fold_0"
+shenzhen_f1 = "ptbench.data.shenzhen.fold_1"
+shenzhen_f2 = "ptbench.data.shenzhen.fold_2"
+shenzhen_f3 = "ptbench.data.shenzhen.fold_3"
+shenzhen_f4 = "ptbench.data.shenzhen.fold_4"
+shenzhen_f5 = "ptbench.data.shenzhen.fold_5"
+shenzhen_f6 = "ptbench.data.shenzhen.fold_6"
+shenzhen_f7 = "ptbench.data.shenzhen.fold_7"
+shenzhen_f8 = "ptbench.data.shenzhen.fold_8"
+shenzhen_f9 = "ptbench.data.shenzhen.fold_9"
+shenzhen_f0_rgb = "ptbench.data.shenzhen.fold_0_rgb"
+shenzhen_f1_rgb = "ptbench.data.shenzhen.fold_1_rgb"
+shenzhen_f2_rgb = "ptbench.data.shenzhen.fold_2_rgb"
+shenzhen_f3_rgb = "ptbench.data.shenzhen.fold_3_rgb"
+shenzhen_f4_rgb = "ptbench.data.shenzhen.fold_4_rgb"
+shenzhen_f5_rgb = "ptbench.data.shenzhen.fold_5_rgb"
+shenzhen_f6_rgb = "ptbench.data.shenzhen.fold_6_rgb"
+shenzhen_f7_rgb = "ptbench.data.shenzhen.fold_7_rgb"
+shenzhen_f8_rgb = "ptbench.data.shenzhen.fold_8_rgb"
+shenzhen_f9_rgb = "ptbench.data.shenzhen.fold_9_rgb"
 # extended shenzhen dataset (with radiological signs)
 shenzhen_rs = "ptbench.configs.datasets.shenzhen_RS.default"
 shenzhen_rs_f0 = "ptbench.configs.datasets.shenzhen_RS.fold_0"
diff --git a/src/ptbench/configs/datasets/shenzhen/__init__.py b/src/ptbench/configs/datasets/shenzhen/__init__.py
deleted file mode 100644
index 9c08f777..00000000
--- a/src/ptbench/configs/datasets/shenzhen/__init__.py
+++ /dev/null
@@ -1,29 +0,0 @@
-# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
-#
-# SPDX-License-Identifier: GPL-3.0-or-later
-
-
-def _maker(protocol, resize_size=512, cc_size=512, RGB=False):
-    from torchvision import transforms
-
-    from ....data.shenzhen import dataset as raw
-    from ....data.transforms import ElasticDeformation, RemoveBlackBorders
-    from .. import make_dataset as mk
-
-    post_transforms = []
-    if RGB:
-        post_transforms = [
-            transforms.Lambda(lambda x: x.convert("RGB")),
-            transforms.ToTensor(),
-        ]
-
-    return mk(
-        [raw.subsets(protocol)],
-        [
-            RemoveBlackBorders(),
-            transforms.Resize(resize_size),
-            transforms.CenterCrop(cc_size),
-        ],
-        [ElasticDeformation(p=0.8)],
-        post_transforms,
-    )
diff --git a/src/ptbench/configs/datasets/shenzhen/fold_0.py b/src/ptbench/configs/datasets/shenzhen/fold_0.py
deleted file mode 100644
index 845623a0..00000000
--- a/src/ptbench/configs/datasets/shenzhen/fold_0.py
+++ /dev/null
@@ -1,14 +0,0 @@
-# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
-#
-# SPDX-License-Identifier: GPL-3.0-or-later
-
-"""Shenzhen dataset for TB detection (cross validation fold 0)
-
-* Split reference: first 80% of TB and healthy CXR for "train", rest for "test"
-* This configuration resolution: 512 x 512 (default)
-* See :py:mod:`ptbench.data.shenzhen` for dataset details
-"""
-
-from . import _maker
-
-dataset = _maker("fold_0")
diff --git a/src/ptbench/configs/datasets/shenzhen/fold_0_rgb.py b/src/ptbench/configs/datasets/shenzhen/fold_0_rgb.py
deleted file mode 100644
index b0f33f89..00000000
--- a/src/ptbench/configs/datasets/shenzhen/fold_0_rgb.py
+++ /dev/null
@@ -1,14 +0,0 @@
-# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
-#
-# SPDX-License-Identifier: GPL-3.0-or-later
-
-"""Shenzhen dataset for TB detection (cross validation fold 0, RGB)
-
-* Split reference: first 80% of TB and healthy CXR for "train", rest for "test"
-* This configuration resolution: 512 x 512 (default)
-* See :py:mod:`ptbench.data.shenzhen` for dataset details
-"""
-
-from . import _maker
-
-dataset = _maker("fold_0", RGB=True)
diff --git a/src/ptbench/configs/datasets/shenzhen/fold_1.py b/src/ptbench/configs/datasets/shenzhen/fold_1.py
deleted file mode 100644
index 898e2d18..00000000
--- a/src/ptbench/configs/datasets/shenzhen/fold_1.py
+++ /dev/null
@@ -1,14 +0,0 @@
-# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
-#
-# SPDX-License-Identifier: GPL-3.0-or-later
-
-"""Shenzhen dataset for TB detection (cross validation fold 1)
-
-* Split reference: first 80% of TB and healthy CXR for "train", rest for "test"
-* This configuration resolution: 512 x 512 (default)
-* See :py:mod:`ptbench.data.shenzhen` for dataset details
-"""
-
-from . import _maker
-
-dataset = _maker("fold_1")
diff --git a/src/ptbench/configs/datasets/shenzhen/fold_1_rgb.py b/src/ptbench/configs/datasets/shenzhen/fold_1_rgb.py
deleted file mode 100644
index c241f1e9..00000000
--- a/src/ptbench/configs/datasets/shenzhen/fold_1_rgb.py
+++ /dev/null
@@ -1,14 +0,0 @@
-# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
-#
-# SPDX-License-Identifier: GPL-3.0-or-later
-
-"""Shenzhen dataset for TB detection (cross validation fold 1, RGB)
-
-* Split reference: first 80% of TB and healthy CXR for "train", rest for "test"
-* This configuration resolution: 512 x 512 (default)
-* See :py:mod:`ptbench.data.shenzhen` for dataset details
-"""
-
-from . import _maker
-
-dataset = _maker("fold_1", RGB=True)
diff --git a/src/ptbench/configs/datasets/shenzhen/fold_2.py b/src/ptbench/configs/datasets/shenzhen/fold_2.py
deleted file mode 100644
index 42ba926c..00000000
--- a/src/ptbench/configs/datasets/shenzhen/fold_2.py
+++ /dev/null
@@ -1,14 +0,0 @@
-# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
-#
-# SPDX-License-Identifier: GPL-3.0-or-later
-
-"""Shenzhen dataset for TB detection (cross validation fold 2)
-
-* Split reference: first 80% of TB and healthy CXR for "train", rest for "test"
-* This configuration resolution: 512 x 512 (default)
-* See :py:mod:`ptbench.data.shenzhen` for dataset details
-"""
-
-from . import _maker
-
-dataset = _maker("fold_2")
diff --git a/src/ptbench/configs/datasets/shenzhen/fold_2_rgb.py b/src/ptbench/configs/datasets/shenzhen/fold_2_rgb.py
deleted file mode 100644
index 22c9cf5b..00000000
--- a/src/ptbench/configs/datasets/shenzhen/fold_2_rgb.py
+++ /dev/null
@@ -1,14 +0,0 @@
-# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
-#
-# SPDX-License-Identifier: GPL-3.0-or-later
-
-"""Shenzhen dataset for TB detection (cross validation fold 2, RGB)
-
-* Split reference: first 80% of TB and healthy CXR for "train", rest for "test"
-* This configuration resolution: 512 x 512 (default)
-* See :py:mod:`ptbench.data.shenzhen` for dataset details
-"""
-
-from . import _maker
-
-dataset = _maker("fold_2", RGB=True)
diff --git a/src/ptbench/configs/datasets/shenzhen/fold_3.py b/src/ptbench/configs/datasets/shenzhen/fold_3.py
deleted file mode 100644
index a6ab1138..00000000
--- a/src/ptbench/configs/datasets/shenzhen/fold_3.py
+++ /dev/null
@@ -1,14 +0,0 @@
-# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
-#
-# SPDX-License-Identifier: GPL-3.0-or-later
-
-"""Shenzhen dataset for TB detection (cross validation fold 3)
-
-* Split reference: first 80% of TB and healthy CXR for "train", rest for "test"
-* This configuration resolution: 512 x 512 (default)
-* See :py:mod:`ptbench.data.shenzhen` for dataset details
-"""
-
-from . import _maker
-
-dataset = _maker("fold_3")
diff --git a/src/ptbench/configs/datasets/shenzhen/fold_3_rgb.py b/src/ptbench/configs/datasets/shenzhen/fold_3_rgb.py
deleted file mode 100644
index 91de9feb..00000000
--- a/src/ptbench/configs/datasets/shenzhen/fold_3_rgb.py
+++ /dev/null
@@ -1,14 +0,0 @@
-# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
-#
-# SPDX-License-Identifier: GPL-3.0-or-later
-
-"""Shenzhen dataset for TB detection (cross validation fold 3, RGB)
-
-* Split reference: first 80% of TB and healthy CXR for "train", rest for "test"
-* This configuration resolution: 512 x 512 (default)
-* See :py:mod:`ptbench.data.shenzhen` for dataset details
-"""
-
-from . import _maker
-
-dataset = _maker("fold_3", RGB=True)
diff --git a/src/ptbench/configs/datasets/shenzhen/fold_4.py b/src/ptbench/configs/datasets/shenzhen/fold_4.py
deleted file mode 100644
index a9f29e1c..00000000
--- a/src/ptbench/configs/datasets/shenzhen/fold_4.py
+++ /dev/null
@@ -1,14 +0,0 @@
-# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
-#
-# SPDX-License-Identifier: GPL-3.0-or-later
-
-"""Shenzhen dataset for TB detection (cross validation fold 4)
-
-* Split reference: first 80% of TB and healthy CXR for "train", rest for "test"
-* This configuration resolution: 512 x 512 (default)
-* See :py:mod:`ptbench.data.shenzhen` for dataset details
-"""
-
-from . import _maker
-
-dataset = _maker("fold_4")
diff --git a/src/ptbench/configs/datasets/shenzhen/fold_4_rgb.py b/src/ptbench/configs/datasets/shenzhen/fold_4_rgb.py
deleted file mode 100644
index ca643620..00000000
--- a/src/ptbench/configs/datasets/shenzhen/fold_4_rgb.py
+++ /dev/null
@@ -1,14 +0,0 @@
-# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
-#
-# SPDX-License-Identifier: GPL-3.0-or-later
-
-"""Shenzhen dataset for TB detection (cross validation fold 4, RGB)
-
-* Split reference: first 80% of TB and healthy CXR for "train", rest for "test"
-* This configuration resolution: 512 x 512 (default)
-* See :py:mod:`ptbench.data.shenzhen` for dataset details
-"""
-
-from . import _maker
-
-dataset = _maker("fold_4", RGB=True)
diff --git a/src/ptbench/configs/datasets/shenzhen/fold_5.py b/src/ptbench/configs/datasets/shenzhen/fold_5.py
deleted file mode 100644
index e5d61914..00000000
--- a/src/ptbench/configs/datasets/shenzhen/fold_5.py
+++ /dev/null
@@ -1,14 +0,0 @@
-# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
-#
-# SPDX-License-Identifier: GPL-3.0-or-later
-
-"""Shenzhen dataset for TB detection (cross validation fold 5)
-
-* Split reference: first 80% of TB and healthy CXR for "train", rest for "test"
-* This configuration resolution: 512 x 512 (default)
-* See :py:mod:`ptbench.data.shenzhen` for dataset details
-"""
-
-from . import _maker
-
-dataset = _maker("fold_5")
diff --git a/src/ptbench/configs/datasets/shenzhen/fold_5_rgb.py b/src/ptbench/configs/datasets/shenzhen/fold_5_rgb.py
deleted file mode 100644
index 77a3b389..00000000
--- a/src/ptbench/configs/datasets/shenzhen/fold_5_rgb.py
+++ /dev/null
@@ -1,14 +0,0 @@
-# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
-#
-# SPDX-License-Identifier: GPL-3.0-or-later
-
-"""Shenzhen dataset for TB detection (cross validation fold 5, RGB)
-
-* Split reference: first 80% of TB and healthy CXR for "train", rest for "test"
-* This configuration resolution: 512 x 512 (default)
-* See :py:mod:`ptbench.data.shenzhen` for dataset details
-"""
-
-from . import _maker
-
-dataset = _maker("fold_5", RGB=True)
diff --git a/src/ptbench/configs/datasets/shenzhen/fold_6.py b/src/ptbench/configs/datasets/shenzhen/fold_6.py
deleted file mode 100644
index 3d71fa27..00000000
--- a/src/ptbench/configs/datasets/shenzhen/fold_6.py
+++ /dev/null
@@ -1,14 +0,0 @@
-# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
-#
-# SPDX-License-Identifier: GPL-3.0-or-later
-
-"""Shenzhen dataset for TB detection (cross validation fold 6)
-
-* Split reference: first 80% of TB and healthy CXR for "train", rest for "test"
-* This configuration resolution: 512 x 512 (default)
-* See :py:mod:`ptbench.data.shenzhen` for dataset details
-"""
-
-from . import _maker
-
-dataset = _maker("fold_6")
diff --git a/src/ptbench/configs/datasets/shenzhen/fold_6_rgb.py b/src/ptbench/configs/datasets/shenzhen/fold_6_rgb.py
deleted file mode 100644
index 1b78181f..00000000
--- a/src/ptbench/configs/datasets/shenzhen/fold_6_rgb.py
+++ /dev/null
@@ -1,14 +0,0 @@
-# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
-#
-# SPDX-License-Identifier: GPL-3.0-or-later
-
-"""Shenzhen dataset for TB detection (cross validation fold 6, RGB)
-
-* Split reference: first 80% of TB and healthy CXR for "train", rest for "test"
-* This configuration resolution: 512 x 512 (default)
-* See :py:mod:`ptbench.data.shenzhen` for dataset details
-"""
-
-from . import _maker
-
-dataset = _maker("fold_6", RGB=True)
diff --git a/src/ptbench/configs/datasets/shenzhen/fold_7.py b/src/ptbench/configs/datasets/shenzhen/fold_7.py
deleted file mode 100644
index 28323a6f..00000000
--- a/src/ptbench/configs/datasets/shenzhen/fold_7.py
+++ /dev/null
@@ -1,14 +0,0 @@
-# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
-#
-# SPDX-License-Identifier: GPL-3.0-or-later
-
-"""Shenzhen dataset for TB detection (cross validation fold 7)
-
-* Split reference: first 80% of TB and healthy CXR for "train", rest for "test"
-* This configuration resolution: 512 x 512 (default)
-* See :py:mod:`ptbench.data.shenzhen` for dataset details
-"""
-
-from . import _maker
-
-dataset = _maker("fold_7")
diff --git a/src/ptbench/configs/datasets/shenzhen/fold_7_rgb.py b/src/ptbench/configs/datasets/shenzhen/fold_7_rgb.py
deleted file mode 100644
index 48302215..00000000
--- a/src/ptbench/configs/datasets/shenzhen/fold_7_rgb.py
+++ /dev/null
@@ -1,14 +0,0 @@
-# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
-#
-# SPDX-License-Identifier: GPL-3.0-or-later
-
-"""Shenzhen dataset for TB detection (cross validation fold 7, RGB)
-
-* Split reference: first 80% of TB and healthy CXR for "train", rest for "test"
-* This configuration resolution: 512 x 512 (default)
-* See :py:mod:`ptbench.data.shenzhen` for dataset details
-"""
-
-from . import _maker
-
-dataset = _maker("fold_7", RGB=True)
diff --git a/src/ptbench/configs/datasets/shenzhen/fold_8.py b/src/ptbench/configs/datasets/shenzhen/fold_8.py
deleted file mode 100644
index 608ba67b..00000000
--- a/src/ptbench/configs/datasets/shenzhen/fold_8.py
+++ /dev/null
@@ -1,14 +0,0 @@
-# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
-#
-# SPDX-License-Identifier: GPL-3.0-or-later
-
-"""Shenzhen dataset for TB detection (cross validation fold 8)
-
-* Split reference: first 80% of TB and healthy CXR for "train", rest for "test"
-* This configuration resolution: 512 x 512 (default)
-* See :py:mod:`ptbench.data.shenzhen` for dataset details
-"""
-
-from . import _maker
-
-dataset = _maker("fold_8")
diff --git a/src/ptbench/configs/datasets/shenzhen/fold_8_rgb.py b/src/ptbench/configs/datasets/shenzhen/fold_8_rgb.py
deleted file mode 100644
index 79573e9c..00000000
--- a/src/ptbench/configs/datasets/shenzhen/fold_8_rgb.py
+++ /dev/null
@@ -1,14 +0,0 @@
-# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
-#
-# SPDX-License-Identifier: GPL-3.0-or-later
-
-"""Shenzhen dataset for TB detection (cross validation fold 8, RGB)
-
-* Split reference: first 80% of TB and healthy CXR for "train", rest for "test"
-* This configuration resolution: 512 x 512 (default)
-* See :py:mod:`ptbench.data.shenzhen` for dataset details
-"""
-
-from . import _maker
-
-dataset = _maker("fold_8", RGB=True)
diff --git a/src/ptbench/configs/datasets/shenzhen/fold_9.py b/src/ptbench/configs/datasets/shenzhen/fold_9.py
deleted file mode 100644
index 7f26bd6b..00000000
--- a/src/ptbench/configs/datasets/shenzhen/fold_9.py
+++ /dev/null
@@ -1,14 +0,0 @@
-# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
-#
-# SPDX-License-Identifier: GPL-3.0-or-later
-
-"""Shenzhen dataset for TB detection (cross validation fold 9)
-
-* Split reference: first 80% of TB and healthy CXR for "train", rest for "test"
-* This configuration resolution: 512 x 512 (default)
-* See :py:mod:`ptbench.data.shenzhen` for dataset details
-"""
-
-from . import _maker
-
-dataset = _maker("fold_9")
diff --git a/src/ptbench/configs/datasets/shenzhen/fold_9_rgb.py b/src/ptbench/configs/datasets/shenzhen/fold_9_rgb.py
deleted file mode 100644
index c39e4f05..00000000
--- a/src/ptbench/configs/datasets/shenzhen/fold_9_rgb.py
+++ /dev/null
@@ -1,14 +0,0 @@
-# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
-#
-# SPDX-License-Identifier: GPL-3.0-or-later
-
-"""Shenzhen dataset for TB detection (cross validation fold 9, RGB)
-
-* Split reference: first 80% of TB and healthy CXR for "train", rest for "test"
-* This configuration resolution: 512 x 512 (default)
-* See :py:mod:`ptbench.data.shenzhen` for dataset details
-"""
-
-from . import _maker
-
-dataset = _maker("fold_9", RGB=True)
diff --git a/src/ptbench/configs/datasets/shenzhen/rgb.py b/src/ptbench/configs/datasets/shenzhen/rgb.py
deleted file mode 100644
index 18684478..00000000
--- a/src/ptbench/configs/datasets/shenzhen/rgb.py
+++ /dev/null
@@ -1,15 +0,0 @@
-# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
-#
-# SPDX-License-Identifier: GPL-3.0-or-later
-
-"""Shenzhen dataset for TB detection (default protocol, converted in RGB)
-
-* Split reference: first 64% of TB and healthy CXR for "train" 16% for
-* "validation", 20% for "test"
-* This configuration resolution: 512 x 512 (default)
-* See :py:mod:`ptbench.data.shenzhen` for dataset details
-"""
-
-from . import _maker
-
-dataset = _maker("default", RGB=True)
diff --git a/src/ptbench/data/shenzhen/default.py b/src/ptbench/data/shenzhen/default.py
index 8347cd56..3801efbc 100644
--- a/src/ptbench/data/shenzhen/default.py
+++ b/src/ptbench/data/shenzhen/default.py
@@ -2,6 +2,14 @@
 #
 # SPDX-License-Identifier: GPL-3.0-or-later
 
+"""Shenzhen dataset for TB detection (default protocol)
+
+* Split reference: first 64% of TB and healthy CXR for "train" 16% for
+* "validation", 20% for "test"
+* This configuration resolution: 512 x 512 (default)
+* See :py:mod:`ptbench.data.shenzhen` for dataset details
+"""
+
 from clapper.logging import setup
 
 from ..base_datamodule import BaseDataModule
diff --git a/src/ptbench/data/shenzhen/fold_0.py b/src/ptbench/data/shenzhen/fold_0.py
new file mode 100644
index 00000000..d65d513b
--- /dev/null
+++ b/src/ptbench/data/shenzhen/fold_0.py
@@ -0,0 +1,45 @@
+# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
+#
+# SPDX-License-Identifier: GPL-3.0-or-later
+
+"""Shenzhen dataset for TB detection (cross validation fold 0)
+
+* Split reference: first 80% of TB and healthy CXR for "train", rest for "test"
+* This configuration resolution: 512 x 512 (default)
+* See :py:mod:`ptbench.data.shenzhen` for dataset details
+"""
+
+from clapper.logging import setup
+
+from ..base_datamodule import BaseDataModule
+from . import _maker, return_subsets
+
+logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s")
+
+
+class Fold0Module(BaseDataModule):
+    def __init__(
+        self,
+        train_batch_size=1,
+        predict_batch_size=1,
+        drop_incomplete_batch=False,
+        multiproc_kwargs=None,
+    ):
+        super().__init__(
+            train_batch_size=train_batch_size,
+            predict_batch_size=predict_batch_size,
+            drop_incomplete_batch=drop_incomplete_batch,
+            multiproc_kwargs=multiproc_kwargs,
+        )
+
+    def setup(self, stage: str):
+        self.dataset = _maker("fold_0")
+        (
+            self.train_dataset,
+            self.validation_dataset,
+            self.extra_validation_datasets,
+            self.predict_dataset,
+        ) = return_subsets(self.dataset)
+
+
+datamodule = Fold0Module
diff --git a/src/ptbench/data/shenzhen/fold_0_rgb.py b/src/ptbench/data/shenzhen/fold_0_rgb.py
new file mode 100644
index 00000000..bcc853dd
--- /dev/null
+++ b/src/ptbench/data/shenzhen/fold_0_rgb.py
@@ -0,0 +1,45 @@
+# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
+#
+# SPDX-License-Identifier: GPL-3.0-or-later
+
+"""Shenzhen dataset for TB detection (cross validation fold 0, RGB)
+
+* Split reference: first 80% of TB and healthy CXR for "train", rest for "test"
+* This configuration resolution: 512 x 512 (default)
+* See :py:mod:`ptbench.data.shenzhen` for dataset details
+"""
+
+from clapper.logging import setup
+
+from ..base_datamodule import BaseDataModule
+from . import _maker, return_subsets
+
+logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s")
+
+
+class Fold0Module(BaseDataModule):
+    def __init__(
+        self,
+        train_batch_size=1,
+        predict_batch_size=1,
+        drop_incomplete_batch=False,
+        multiproc_kwargs=None,
+    ):
+        super().__init__(
+            train_batch_size=train_batch_size,
+            predict_batch_size=predict_batch_size,
+            drop_incomplete_batch=drop_incomplete_batch,
+            multiproc_kwargs=multiproc_kwargs,
+        )
+
+    def setup(self, stage: str):
+        self.dataset = _maker("fold_0", RGB=True)
+        (
+            self.train_dataset,
+            self.validation_dataset,
+            self.extra_validation_datasets,
+            self.predict_dataset,
+        ) = return_subsets(self.dataset)
+
+
+datamodule = Fold0Module
diff --git a/src/ptbench/data/shenzhen/fold_1.py b/src/ptbench/data/shenzhen/fold_1.py
new file mode 100644
index 00000000..b9494f15
--- /dev/null
+++ b/src/ptbench/data/shenzhen/fold_1.py
@@ -0,0 +1,45 @@
+# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
+#
+# SPDX-License-Identifier: GPL-3.0-or-later
+
+"""Shenzhen dataset for TB detection (cross validation fold 1)
+
+* Split reference: first 80% of TB and healthy CXR for "train", rest for "test"
+* This configuration resolution: 512 x 512 (default)
+* See :py:mod:`ptbench.data.shenzhen` for dataset details
+"""
+
+from clapper.logging import setup
+
+from ..base_datamodule import BaseDataModule
+from . import _maker, return_subsets
+
+logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s")
+
+
+class Fold0Module(BaseDataModule):
+    def __init__(
+        self,
+        train_batch_size=1,
+        predict_batch_size=1,
+        drop_incomplete_batch=False,
+        multiproc_kwargs=None,
+    ):
+        super().__init__(
+            train_batch_size=train_batch_size,
+            predict_batch_size=predict_batch_size,
+            drop_incomplete_batch=drop_incomplete_batch,
+            multiproc_kwargs=multiproc_kwargs,
+        )
+
+    def setup(self, stage: str):
+        self.dataset = _maker("fold_1")
+        (
+            self.train_dataset,
+            self.validation_dataset,
+            self.extra_validation_datasets,
+            self.predict_dataset,
+        ) = return_subsets(self.dataset)
+
+
+datamodule = Fold0Module
diff --git a/src/ptbench/data/shenzhen/fold_1_rgb.py b/src/ptbench/data/shenzhen/fold_1_rgb.py
new file mode 100644
index 00000000..01e23967
--- /dev/null
+++ b/src/ptbench/data/shenzhen/fold_1_rgb.py
@@ -0,0 +1,46 @@
+# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
+#
+# SPDX-License-Identifier: GPL-3.0-or-later
+
+"""Shenzhen dataset for TB detection (cross validation fold 1, RGB)
+
+* Split reference: first 80% of TB and healthy CXR for "train", rest for "test"
+* This configuration resolution: 512 x 512 (default)
+* See :py:mod:`ptbench.data.shenzhen` for dataset details
+"""
+
+
+from clapper.logging import setup
+
+from ..base_datamodule import BaseDataModule
+from . import _maker, return_subsets
+
+logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s")
+
+
+class Fold0Module(BaseDataModule):
+    def __init__(
+        self,
+        train_batch_size=1,
+        predict_batch_size=1,
+        drop_incomplete_batch=False,
+        multiproc_kwargs=None,
+    ):
+        super().__init__(
+            train_batch_size=train_batch_size,
+            predict_batch_size=predict_batch_size,
+            drop_incomplete_batch=drop_incomplete_batch,
+            multiproc_kwargs=multiproc_kwargs,
+        )
+
+    def setup(self, stage: str):
+        self.dataset = _maker("fold_1", RGB=True)
+        (
+            self.train_dataset,
+            self.validation_dataset,
+            self.extra_validation_datasets,
+            self.predict_dataset,
+        ) = return_subsets(self.dataset)
+
+
+datamodule = Fold0Module
diff --git a/src/ptbench/data/shenzhen/fold_2.py b/src/ptbench/data/shenzhen/fold_2.py
new file mode 100644
index 00000000..8d5cf816
--- /dev/null
+++ b/src/ptbench/data/shenzhen/fold_2.py
@@ -0,0 +1,45 @@
+# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
+#
+# SPDX-License-Identifier: GPL-3.0-or-later
+
+"""Shenzhen dataset for TB detection (cross validation fold 2)
+
+* Split reference: first 80% of TB and healthy CXR for "train", rest for "test"
+* This configuration resolution: 512 x 512 (default)
+* See :py:mod:`ptbench.data.shenzhen` for dataset details
+"""
+
+from clapper.logging import setup
+
+from ..base_datamodule import BaseDataModule
+from . import _maker, return_subsets
+
+logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s")
+
+
+class Fold0Module(BaseDataModule):
+    def __init__(
+        self,
+        train_batch_size=1,
+        predict_batch_size=1,
+        drop_incomplete_batch=False,
+        multiproc_kwargs=None,
+    ):
+        super().__init__(
+            train_batch_size=train_batch_size,
+            predict_batch_size=predict_batch_size,
+            drop_incomplete_batch=drop_incomplete_batch,
+            multiproc_kwargs=multiproc_kwargs,
+        )
+
+    def setup(self, stage: str):
+        self.dataset = _maker("fold_2")
+        (
+            self.train_dataset,
+            self.validation_dataset,
+            self.extra_validation_datasets,
+            self.predict_dataset,
+        ) = return_subsets(self.dataset)
+
+
+datamodule = Fold0Module
diff --git a/src/ptbench/data/shenzhen/fold_2_rgb.py b/src/ptbench/data/shenzhen/fold_2_rgb.py
new file mode 100644
index 00000000..baf6752d
--- /dev/null
+++ b/src/ptbench/data/shenzhen/fold_2_rgb.py
@@ -0,0 +1,45 @@
+# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
+#
+# SPDX-License-Identifier: GPL-3.0-or-later
+
+"""Shenzhen dataset for TB detection (cross validation fold 2, RGB)
+
+* Split reference: first 80% of TB and healthy CXR for "train", rest for "test"
+* This configuration resolution: 512 x 512 (default)
+* See :py:mod:`ptbench.data.shenzhen` for dataset details
+"""
+
+from clapper.logging import setup
+
+from ..base_datamodule import BaseDataModule
+from . import _maker, return_subsets
+
+logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s")
+
+
+class Fold0Module(BaseDataModule):
+    def __init__(
+        self,
+        train_batch_size=1,
+        predict_batch_size=1,
+        drop_incomplete_batch=False,
+        multiproc_kwargs=None,
+    ):
+        super().__init__(
+            train_batch_size=train_batch_size,
+            predict_batch_size=predict_batch_size,
+            drop_incomplete_batch=drop_incomplete_batch,
+            multiproc_kwargs=multiproc_kwargs,
+        )
+
+    def setup(self, stage: str):
+        self.dataset = _maker("fold_2", RGB=True)
+        (
+            self.train_dataset,
+            self.validation_dataset,
+            self.extra_validation_datasets,
+            self.predict_dataset,
+        ) = return_subsets(self.dataset)
+
+
+datamodule = Fold0Module
diff --git a/src/ptbench/data/shenzhen/fold_3.py b/src/ptbench/data/shenzhen/fold_3.py
new file mode 100644
index 00000000..b42882cc
--- /dev/null
+++ b/src/ptbench/data/shenzhen/fold_3.py
@@ -0,0 +1,45 @@
+# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
+#
+# SPDX-License-Identifier: GPL-3.0-or-later
+
+"""Shenzhen dataset for TB detection (cross validation fold 3)
+
+* Split reference: first 80% of TB and healthy CXR for "train", rest for "test"
+* This configuration resolution: 512 x 512 (default)
+* See :py:mod:`ptbench.data.shenzhen` for dataset details
+"""
+
+from clapper.logging import setup
+
+from ..base_datamodule import BaseDataModule
+from . import _maker, return_subsets
+
+logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s")
+
+
+class Fold0Module(BaseDataModule):
+    def __init__(
+        self,
+        train_batch_size=1,
+        predict_batch_size=1,
+        drop_incomplete_batch=False,
+        multiproc_kwargs=None,
+    ):
+        super().__init__(
+            train_batch_size=train_batch_size,
+            predict_batch_size=predict_batch_size,
+            drop_incomplete_batch=drop_incomplete_batch,
+            multiproc_kwargs=multiproc_kwargs,
+        )
+
+    def setup(self, stage: str):
+        self.dataset = _maker("fold_3")
+        (
+            self.train_dataset,
+            self.validation_dataset,
+            self.extra_validation_datasets,
+            self.predict_dataset,
+        ) = return_subsets(self.dataset)
+
+
+datamodule = Fold0Module
diff --git a/src/ptbench/data/shenzhen/fold_3_rgb.py b/src/ptbench/data/shenzhen/fold_3_rgb.py
new file mode 100644
index 00000000..a02c2b1d
--- /dev/null
+++ b/src/ptbench/data/shenzhen/fold_3_rgb.py
@@ -0,0 +1,45 @@
+# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
+#
+# SPDX-License-Identifier: GPL-3.0-or-later
+
+"""Shenzhen dataset for TB detection (cross validation fold 3, RGB)
+
+* Split reference: first 80% of TB and healthy CXR for "train", rest for "test"
+* This configuration resolution: 512 x 512 (default)
+* See :py:mod:`ptbench.data.shenzhen` for dataset details
+"""
+
+from clapper.logging import setup
+
+from ..base_datamodule import BaseDataModule
+from . import _maker, return_subsets
+
+logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s")
+
+
+class Fold0Module(BaseDataModule):
+    def __init__(
+        self,
+        train_batch_size=1,
+        predict_batch_size=1,
+        drop_incomplete_batch=False,
+        multiproc_kwargs=None,
+    ):
+        super().__init__(
+            train_batch_size=train_batch_size,
+            predict_batch_size=predict_batch_size,
+            drop_incomplete_batch=drop_incomplete_batch,
+            multiproc_kwargs=multiproc_kwargs,
+        )
+
+    def setup(self, stage: str):
+        self.dataset = _maker("fold_3", RGB=True)
+        (
+            self.train_dataset,
+            self.validation_dataset,
+            self.extra_validation_datasets,
+            self.predict_dataset,
+        ) = return_subsets(self.dataset)
+
+
+datamodule = Fold0Module
diff --git a/src/ptbench/data/shenzhen/fold_4.py b/src/ptbench/data/shenzhen/fold_4.py
new file mode 100644
index 00000000..a9ad1471
--- /dev/null
+++ b/src/ptbench/data/shenzhen/fold_4.py
@@ -0,0 +1,45 @@
+# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
+#
+# SPDX-License-Identifier: GPL-3.0-or-later
+
+"""Shenzhen dataset for TB detection (cross validation fold 4)
+
+* Split reference: first 80% of TB and healthy CXR for "train", rest for "test"
+* This configuration resolution: 512 x 512 (default)
+* See :py:mod:`ptbench.data.shenzhen` for dataset details
+"""
+
+from clapper.logging import setup
+
+from ..base_datamodule import BaseDataModule
+from . import _maker, return_subsets
+
+logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s")
+
+
+class Fold0Module(BaseDataModule):
+    def __init__(
+        self,
+        train_batch_size=1,
+        predict_batch_size=1,
+        drop_incomplete_batch=False,
+        multiproc_kwargs=None,
+    ):
+        super().__init__(
+            train_batch_size=train_batch_size,
+            predict_batch_size=predict_batch_size,
+            drop_incomplete_batch=drop_incomplete_batch,
+            multiproc_kwargs=multiproc_kwargs,
+        )
+
+    def setup(self, stage: str):
+        self.dataset = _maker("fold_4")
+        (
+            self.train_dataset,
+            self.validation_dataset,
+            self.extra_validation_datasets,
+            self.predict_dataset,
+        ) = return_subsets(self.dataset)
+
+
+datamodule = Fold0Module
diff --git a/src/ptbench/data/shenzhen/fold_4_rgb.py b/src/ptbench/data/shenzhen/fold_4_rgb.py
new file mode 100644
index 00000000..3620ba42
--- /dev/null
+++ b/src/ptbench/data/shenzhen/fold_4_rgb.py
@@ -0,0 +1,45 @@
+# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
+#
+# SPDX-License-Identifier: GPL-3.0-or-later
+
+"""Shenzhen dataset for TB detection (cross validation fold 4, RGB)
+
+* Split reference: first 80% of TB and healthy CXR for "train", rest for "test"
+* This configuration resolution: 512 x 512 (default)
+* See :py:mod:`ptbench.data.shenzhen` for dataset details
+"""
+
+from clapper.logging import setup
+
+from ..base_datamodule import BaseDataModule
+from . import _maker, return_subsets
+
+logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s")
+
+
+class Fold0Module(BaseDataModule):
+    def __init__(
+        self,
+        train_batch_size=1,
+        predict_batch_size=1,
+        drop_incomplete_batch=False,
+        multiproc_kwargs=None,
+    ):
+        super().__init__(
+            train_batch_size=train_batch_size,
+            predict_batch_size=predict_batch_size,
+            drop_incomplete_batch=drop_incomplete_batch,
+            multiproc_kwargs=multiproc_kwargs,
+        )
+
+    def setup(self, stage: str):
+        self.dataset = _maker("fold_4", RGB=True)
+        (
+            self.train_dataset,
+            self.validation_dataset,
+            self.extra_validation_datasets,
+            self.predict_dataset,
+        ) = return_subsets(self.dataset)
+
+
+datamodule = Fold0Module
diff --git a/src/ptbench/data/shenzhen/fold_5.py b/src/ptbench/data/shenzhen/fold_5.py
new file mode 100644
index 00000000..426a9d66
--- /dev/null
+++ b/src/ptbench/data/shenzhen/fold_5.py
@@ -0,0 +1,45 @@
+# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
+#
+# SPDX-License-Identifier: GPL-3.0-or-later
+
+"""Shenzhen dataset for TB detection (cross validation fold 5)
+
+* Split reference: first 80% of TB and healthy CXR for "train", rest for "test"
+* This configuration resolution: 512 x 512 (default)
+* See :py:mod:`ptbench.data.shenzhen` for dataset details
+"""
+
+from clapper.logging import setup
+
+from ..base_datamodule import BaseDataModule
+from . import _maker, return_subsets
+
+logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s")
+
+
+class Fold0Module(BaseDataModule):
+    def __init__(
+        self,
+        train_batch_size=1,
+        predict_batch_size=1,
+        drop_incomplete_batch=False,
+        multiproc_kwargs=None,
+    ):
+        super().__init__(
+            train_batch_size=train_batch_size,
+            predict_batch_size=predict_batch_size,
+            drop_incomplete_batch=drop_incomplete_batch,
+            multiproc_kwargs=multiproc_kwargs,
+        )
+
+    def setup(self, stage: str):
+        self.dataset = _maker("fold_5")
+        (
+            self.train_dataset,
+            self.validation_dataset,
+            self.extra_validation_datasets,
+            self.predict_dataset,
+        ) = return_subsets(self.dataset)
+
+
+datamodule = Fold0Module
diff --git a/src/ptbench/data/shenzhen/fold_5_rgb.py b/src/ptbench/data/shenzhen/fold_5_rgb.py
new file mode 100644
index 00000000..29e70138
--- /dev/null
+++ b/src/ptbench/data/shenzhen/fold_5_rgb.py
@@ -0,0 +1,45 @@
+# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
+#
+# SPDX-License-Identifier: GPL-3.0-or-later
+
+"""Shenzhen dataset for TB detection (cross validation fold 5, RGB)
+
+* Split reference: first 80% of TB and healthy CXR for "train", rest for "test"
+* This configuration resolution: 512 x 512 (default)
+* See :py:mod:`ptbench.data.shenzhen` for dataset details
+"""
+
+from clapper.logging import setup
+
+from ..base_datamodule import BaseDataModule
+from . import _maker, return_subsets
+
+logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s")
+
+
+class Fold0Module(BaseDataModule):
+    def __init__(
+        self,
+        train_batch_size=1,
+        predict_batch_size=1,
+        drop_incomplete_batch=False,
+        multiproc_kwargs=None,
+    ):
+        super().__init__(
+            train_batch_size=train_batch_size,
+            predict_batch_size=predict_batch_size,
+            drop_incomplete_batch=drop_incomplete_batch,
+            multiproc_kwargs=multiproc_kwargs,
+        )
+
+    def setup(self, stage: str):
+        self.dataset = _maker("fold_5", RGB=True)
+        (
+            self.train_dataset,
+            self.validation_dataset,
+            self.extra_validation_datasets,
+            self.predict_dataset,
+        ) = return_subsets(self.dataset)
+
+
+datamodule = Fold0Module
diff --git a/src/ptbench/data/shenzhen/fold_6.py b/src/ptbench/data/shenzhen/fold_6.py
new file mode 100644
index 00000000..fb0a91b8
--- /dev/null
+++ b/src/ptbench/data/shenzhen/fold_6.py
@@ -0,0 +1,45 @@
+# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
+#
+# SPDX-License-Identifier: GPL-3.0-or-later
+
+"""Shenzhen dataset for TB detection (cross validation fold 6)
+
+* Split reference: first 80% of TB and healthy CXR for "train", rest for "test"
+* This configuration resolution: 512 x 512 (default)
+* See :py:mod:`ptbench.data.shenzhen` for dataset details
+"""
+
+from clapper.logging import setup
+
+from ..base_datamodule import BaseDataModule
+from . import _maker, return_subsets
+
+logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s")
+
+
+class Fold0Module(BaseDataModule):
+    def __init__(
+        self,
+        train_batch_size=1,
+        predict_batch_size=1,
+        drop_incomplete_batch=False,
+        multiproc_kwargs=None,
+    ):
+        super().__init__(
+            train_batch_size=train_batch_size,
+            predict_batch_size=predict_batch_size,
+            drop_incomplete_batch=drop_incomplete_batch,
+            multiproc_kwargs=multiproc_kwargs,
+        )
+
+    def setup(self, stage: str):
+        self.dataset = _maker("fold_6")
+        (
+            self.train_dataset,
+            self.validation_dataset,
+            self.extra_validation_datasets,
+            self.predict_dataset,
+        ) = return_subsets(self.dataset)
+
+
+datamodule = Fold0Module
diff --git a/src/ptbench/data/shenzhen/fold_6_rgb.py b/src/ptbench/data/shenzhen/fold_6_rgb.py
new file mode 100644
index 00000000..35e7e6d7
--- /dev/null
+++ b/src/ptbench/data/shenzhen/fold_6_rgb.py
@@ -0,0 +1,45 @@
+# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
+#
+# SPDX-License-Identifier: GPL-3.0-or-later
+
+"""Shenzhen dataset for TB detection (cross validation fold 6, RGB)
+
+* Split reference: first 80% of TB and healthy CXR for "train", rest for "test"
+* This configuration resolution: 512 x 512 (default)
+* See :py:mod:`ptbench.data.shenzhen` for dataset details
+"""
+
+from clapper.logging import setup
+
+from ..base_datamodule import BaseDataModule
+from . import _maker, return_subsets
+
+logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s")
+
+
+class Fold0Module(BaseDataModule):
+    def __init__(
+        self,
+        train_batch_size=1,
+        predict_batch_size=1,
+        drop_incomplete_batch=False,
+        multiproc_kwargs=None,
+    ):
+        super().__init__(
+            train_batch_size=train_batch_size,
+            predict_batch_size=predict_batch_size,
+            drop_incomplete_batch=drop_incomplete_batch,
+            multiproc_kwargs=multiproc_kwargs,
+        )
+
+    def setup(self, stage: str):
+        self.dataset = _maker("fold_6", RGB=True)
+        (
+            self.train_dataset,
+            self.validation_dataset,
+            self.extra_validation_datasets,
+            self.predict_dataset,
+        ) = return_subsets(self.dataset)
+
+
+datamodule = Fold0Module
diff --git a/src/ptbench/data/shenzhen/fold_7.py b/src/ptbench/data/shenzhen/fold_7.py
new file mode 100644
index 00000000..743b344d
--- /dev/null
+++ b/src/ptbench/data/shenzhen/fold_7.py
@@ -0,0 +1,45 @@
+# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
+#
+# SPDX-License-Identifier: GPL-3.0-or-later
+
+"""Shenzhen dataset for TB detection (cross validation fold 7)
+
+* Split reference: first 80% of TB and healthy CXR for "train", rest for "test"
+* This configuration resolution: 512 x 512 (default)
+* See :py:mod:`ptbench.data.shenzhen` for dataset details
+"""
+
+from clapper.logging import setup
+
+from ..base_datamodule import BaseDataModule
+from . import _maker, return_subsets
+
+logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s")
+
+
+class Fold0Module(BaseDataModule):
+    def __init__(
+        self,
+        train_batch_size=1,
+        predict_batch_size=1,
+        drop_incomplete_batch=False,
+        multiproc_kwargs=None,
+    ):
+        super().__init__(
+            train_batch_size=train_batch_size,
+            predict_batch_size=predict_batch_size,
+            drop_incomplete_batch=drop_incomplete_batch,
+            multiproc_kwargs=multiproc_kwargs,
+        )
+
+    def setup(self, stage: str):
+        self.dataset = _maker("fold_7")
+        (
+            self.train_dataset,
+            self.validation_dataset,
+            self.extra_validation_datasets,
+            self.predict_dataset,
+        ) = return_subsets(self.dataset)
+
+
+datamodule = Fold0Module
diff --git a/src/ptbench/data/shenzhen/fold_7_rgb.py b/src/ptbench/data/shenzhen/fold_7_rgb.py
new file mode 100644
index 00000000..0a9f83d7
--- /dev/null
+++ b/src/ptbench/data/shenzhen/fold_7_rgb.py
@@ -0,0 +1,45 @@
+# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
+#
+# SPDX-License-Identifier: GPL-3.0-or-later
+
+"""Shenzhen dataset for TB detection (cross validation fold 7, RGB)
+
+* Split reference: first 80% of TB and healthy CXR for "train", rest for "test"
+* This configuration resolution: 512 x 512 (default)
+* See :py:mod:`ptbench.data.shenzhen` for dataset details
+"""
+
+from clapper.logging import setup
+
+from ..base_datamodule import BaseDataModule
+from . import _maker, return_subsets
+
+logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s")
+
+
+class Fold0Module(BaseDataModule):
+    def __init__(
+        self,
+        train_batch_size=1,
+        predict_batch_size=1,
+        drop_incomplete_batch=False,
+        multiproc_kwargs=None,
+    ):
+        super().__init__(
+            train_batch_size=train_batch_size,
+            predict_batch_size=predict_batch_size,
+            drop_incomplete_batch=drop_incomplete_batch,
+            multiproc_kwargs=multiproc_kwargs,
+        )
+
+    def setup(self, stage: str):
+        self.dataset = _maker("fold_7", RGB=True)
+        (
+            self.train_dataset,
+            self.validation_dataset,
+            self.extra_validation_datasets,
+            self.predict_dataset,
+        ) = return_subsets(self.dataset)
+
+
+datamodule = Fold0Module
diff --git a/src/ptbench/data/shenzhen/fold_8.py b/src/ptbench/data/shenzhen/fold_8.py
new file mode 100644
index 00000000..ee7f7167
--- /dev/null
+++ b/src/ptbench/data/shenzhen/fold_8.py
@@ -0,0 +1,45 @@
+# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
+#
+# SPDX-License-Identifier: GPL-3.0-or-later
+
+"""Shenzhen dataset for TB detection (cross validation fold 8)
+
+* Split reference: first 80% of TB and healthy CXR for "train", rest for "test"
+* This configuration resolution: 512 x 512 (default)
+* See :py:mod:`ptbench.data.shenzhen` for dataset details
+"""
+
+from clapper.logging import setup
+
+from ..base_datamodule import BaseDataModule
+from . import _maker, return_subsets
+
+logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s")
+
+
+class Fold0Module(BaseDataModule):
+    def __init__(
+        self,
+        train_batch_size=1,
+        predict_batch_size=1,
+        drop_incomplete_batch=False,
+        multiproc_kwargs=None,
+    ):
+        super().__init__(
+            train_batch_size=train_batch_size,
+            predict_batch_size=predict_batch_size,
+            drop_incomplete_batch=drop_incomplete_batch,
+            multiproc_kwargs=multiproc_kwargs,
+        )
+
+    def setup(self, stage: str):
+        self.dataset = _maker("fold_8")
+        (
+            self.train_dataset,
+            self.validation_dataset,
+            self.extra_validation_datasets,
+            self.predict_dataset,
+        ) = return_subsets(self.dataset)
+
+
+datamodule = Fold0Module
diff --git a/src/ptbench/data/shenzhen/fold_8_rgb.py b/src/ptbench/data/shenzhen/fold_8_rgb.py
new file mode 100644
index 00000000..1351790f
--- /dev/null
+++ b/src/ptbench/data/shenzhen/fold_8_rgb.py
@@ -0,0 +1,45 @@
+# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
+#
+# SPDX-License-Identifier: GPL-3.0-or-later
+
+"""Shenzhen dataset for TB detection (cross validation fold 8, RGB)
+
+* Split reference: first 80% of TB and healthy CXR for "train", rest for "test"
+* This configuration resolution: 512 x 512 (default)
+* See :py:mod:`ptbench.data.shenzhen` for dataset details
+"""
+
+from clapper.logging import setup
+
+from ..base_datamodule import BaseDataModule
+from . import _maker, return_subsets
+
+logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s")
+
+
+class Fold0Module(BaseDataModule):
+    def __init__(
+        self,
+        train_batch_size=1,
+        predict_batch_size=1,
+        drop_incomplete_batch=False,
+        multiproc_kwargs=None,
+    ):
+        super().__init__(
+            train_batch_size=train_batch_size,
+            predict_batch_size=predict_batch_size,
+            drop_incomplete_batch=drop_incomplete_batch,
+            multiproc_kwargs=multiproc_kwargs,
+        )
+
+    def setup(self, stage: str):
+        self.dataset = _maker("fold_8", RGB=True)
+        (
+            self.train_dataset,
+            self.validation_dataset,
+            self.extra_validation_datasets,
+            self.predict_dataset,
+        ) = return_subsets(self.dataset)
+
+
+datamodule = Fold0Module
diff --git a/src/ptbench/data/shenzhen/fold_9.py b/src/ptbench/data/shenzhen/fold_9.py
new file mode 100644
index 00000000..dbd8ab31
--- /dev/null
+++ b/src/ptbench/data/shenzhen/fold_9.py
@@ -0,0 +1,45 @@
+# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
+#
+# SPDX-License-Identifier: GPL-3.0-or-later
+
+"""Shenzhen dataset for TB detection (cross validation fold 9)
+
+* Split reference: first 80% of TB and healthy CXR for "train", rest for "test"
+* This configuration resolution: 512 x 512 (default)
+* See :py:mod:`ptbench.data.shenzhen` for dataset details
+"""
+
+from clapper.logging import setup
+
+from ..base_datamodule import BaseDataModule
+from . import _maker, return_subsets
+
+logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s")
+
+
+class Fold0Module(BaseDataModule):
+    def __init__(
+        self,
+        train_batch_size=1,
+        predict_batch_size=1,
+        drop_incomplete_batch=False,
+        multiproc_kwargs=None,
+    ):
+        super().__init__(
+            train_batch_size=train_batch_size,
+            predict_batch_size=predict_batch_size,
+            drop_incomplete_batch=drop_incomplete_batch,
+            multiproc_kwargs=multiproc_kwargs,
+        )
+
+    def setup(self, stage: str):
+        self.dataset = _maker("fold_9")
+        (
+            self.train_dataset,
+            self.validation_dataset,
+            self.extra_validation_datasets,
+            self.predict_dataset,
+        ) = return_subsets(self.dataset)
+
+
+datamodule = Fold0Module
diff --git a/src/ptbench/data/shenzhen/fold_9_rgb.py b/src/ptbench/data/shenzhen/fold_9_rgb.py
new file mode 100644
index 00000000..729141dc
--- /dev/null
+++ b/src/ptbench/data/shenzhen/fold_9_rgb.py
@@ -0,0 +1,45 @@
+# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
+#
+# SPDX-License-Identifier: GPL-3.0-or-later
+
+"""Shenzhen dataset for TB detection (cross validation fold 9, RGB)
+
+* Split reference: first 80% of TB and healthy CXR for "train", rest for "test"
+* This configuration resolution: 512 x 512 (default)
+* See :py:mod:`ptbench.data.shenzhen` for dataset details
+"""
+
+from clapper.logging import setup
+
+from ..base_datamodule import BaseDataModule
+from . import _maker, return_subsets
+
+logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s")
+
+
+class Fold0Module(BaseDataModule):
+    def __init__(
+        self,
+        train_batch_size=1,
+        predict_batch_size=1,
+        drop_incomplete_batch=False,
+        multiproc_kwargs=None,
+    ):
+        super().__init__(
+            train_batch_size=train_batch_size,
+            predict_batch_size=predict_batch_size,
+            drop_incomplete_batch=drop_incomplete_batch,
+            multiproc_kwargs=multiproc_kwargs,
+        )
+
+    def setup(self, stage: str):
+        self.dataset = _maker("fold_9", RGB=True)
+        (
+            self.train_dataset,
+            self.validation_dataset,
+            self.extra_validation_datasets,
+            self.predict_dataset,
+        ) = return_subsets(self.dataset)
+
+
+datamodule = Fold0Module
diff --git a/src/ptbench/data/shenzhen/rgb.py b/src/ptbench/data/shenzhen/rgb.py
new file mode 100644
index 00000000..6186990e
--- /dev/null
+++ b/src/ptbench/data/shenzhen/rgb.py
@@ -0,0 +1,46 @@
+# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
+#
+# SPDX-License-Identifier: GPL-3.0-or-later
+
+"""Shenzhen dataset for TB detection (default protocol, converted in RGB)
+
+* Split reference: first 64% of TB and healthy CXR for "train" 16% for
+* "validation", 20% for "test"
+* This configuration resolution: 512 x 512 (default)
+* See :py:mod:`ptbench.data.shenzhen` for dataset details
+"""
+
+from clapper.logging import setup
+
+from ..base_datamodule import BaseDataModule
+from . import _maker, return_subsets
+
+logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s")
+
+
+class DefaultModule(BaseDataModule):
+    def __init__(
+        self,
+        train_batch_size=1,
+        predict_batch_size=1,
+        drop_incomplete_batch=False,
+        multiproc_kwargs=None,
+    ):
+        super().__init__(
+            train_batch_size=train_batch_size,
+            predict_batch_size=predict_batch_size,
+            drop_incomplete_batch=drop_incomplete_batch,
+            multiproc_kwargs=multiproc_kwargs,
+        )
+
+    def setup(self, stage: str):
+        self.dataset = _maker("default", RGB=True)
+        (
+            self.train_dataset,
+            self.validation_dataset,
+            self.extra_validation_datasets,
+            self.predict_dataset,
+        ) = return_subsets(self.dataset)
+
+
+datamodule = DefaultModule
-- 
GitLab