diff --git a/src/ptbench/data/shenzhen/datamodule.py b/src/ptbench/data/shenzhen/datamodule.py
index 59a9d0bda9e5cbc05027531b8def8aed2a9c261c..45ce8762a0e46bf7cf9feb10d8f2183572dfb4ae 100644
--- a/src/ptbench/data/shenzhen/datamodule.py
+++ b/src/ptbench/data/shenzhen/datamodule.py
@@ -104,12 +104,6 @@ class DataModule(CachingDataModule):
 
     * Database reference: [MONTGOMERY-SHENZHEN-2014]_
     * Original resolution (height x width or width x height): 3000 x 3000 or less
-    * This split:
-
-      * Split reference: None
-      * Training samples: 64% of TB and healthy CXR (including labels)
-      * Validation samples: 16% of TB and healthy CXR (including labels)
-      * Test samples: 20% of TB and healthy CXR (including labels)
 
     Data specifications:
 
diff --git a/src/ptbench/data/shenzhen/default.py b/src/ptbench/data/shenzhen/default.py
index 7fe993a981c86c0161327d1ddb4498e08a90313c..0e29c38563d0fbb12dd172957f044aa6893117d9 100644
--- a/src/ptbench/data/shenzhen/default.py
+++ b/src/ptbench/data/shenzhen/default.py
@@ -5,3 +5,9 @@
 from .datamodule import DataModule
 
 datamodule = DataModule("default.json.bz2")
+"""Default Shenzen TB database split.
+
+* Training samples: 64% of TB and healthy CXR (including labels)
+* Validation samples: 16% of TB and healthy CXR (including labels)
+* Test samples: 20% of TB and healthy CXR (including labels)
+"""