diff --git a/doc/extras.inv b/doc/extras.inv
index accf3afad6207ab4d7a1d0bb210b125c061f360d..8cfc7b8c8d65e5a2248c7b55dcdd121c4031f2ae 100644
--- a/doc/extras.inv
+++ b/doc/extras.inv
@@ -2,5 +2,4 @@
 # Project: extras
 # Version: stable
 # The remainder of this file is compressed using zlib.
-xÚEÍAÂ0À;¯ˆWbqíà
(´¦±”Ä–mÚ×S‚*®«ÝYg󛌸E×ÔìÉZ-È2Tž^Ã%dw±@ÿ–#ë¿˜§GAøcöZÂùÀâTiE½›£t=ó´qI
-ÍÙµ9îèÈŠñºÇ·þݱãiã>·T>
+xÚMÎ=Â0ཧˆk!¶î,ˆŸ 4
MPG¶A*§'MUÁfY~Ÿ÷öä!IFèIä©‹0¼‚;á˜3uJå‰çc	8ª%¡ˆu¬ú¥ãDÛ@fýÇâØæÊYv0Ng¯‚'ŸF¹¢ÐÊÓº>×ßÛl÷¤RÏ&ƒEy¼]/‡eža4Ñ_Íͬ+‘¹ê¾|ëQ㤪RÍ:ý;mó´ša,
\ No newline at end of file
diff --git a/doc/extras.txt b/doc/extras.txt
index 4be885052aba6e035178d345d660714ac2a49c68..3c5ec06e6b4f0dea9e8ef9b03e5f5d4f8d5ed1e0 100644
--- a/doc/extras.txt
+++ b/doc/extras.txt
@@ -4,3 +4,4 @@
 # The remainder of this file is compressed using zlib.
 torchvision.transforms py:module 1 https://pytorch.org/vision/stable/transforms.html -
 optimizer_step py:method 1 api/lightning.pytorch.core.LightningModule.html#$ -
+json.encoder.JSONEncoder py:class 1 https://docs.python.org/3/library/json.html#json.JSONEncoder -
diff --git a/pyproject.toml b/pyproject.toml
index 0457cdff94604b93b689e4cf0176ac2080238010..11192a1dbb8b1bd7b8f1dbbe8a7034298eacfb52 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -94,6 +94,7 @@ montgomery-f9 = "mednet.config.data.montgomery.fold_9"
 
 # shenzhen dataset (and cross-validation folds)
 shenzhen = "mednet.config.data.shenzhen.default"
+shenzhen-alltest = "mednet.config.data.shenzhen.alltest"
 shenzhen-f0 = "mednet.config.data.shenzhen.fold_0"
 shenzhen-f1 = "mednet.config.data.shenzhen.fold_1"
 shenzhen-f2 = "mednet.config.data.shenzhen.fold_2"
diff --git a/src/mednet/config/data/shenzhen/alltest.json b/src/mednet/config/data/shenzhen/alltest.json
new file mode 100644
index 0000000000000000000000000000000000000000..dd46147abaae5e2246ec6cbdbed3f7a5c68e5314
--- /dev/null
+++ b/src/mednet/config/data/shenzhen/alltest.json
@@ -0,0 +1,666 @@
+{
+  "test": [
+    ["CXR_png/CHNCXR_0313_0.png", 0],
+    ["CXR_png/CHNCXR_0091_0.png", 0],
+    ["CXR_png/CHNCXR_0049_0.png", 0],
+    ["CXR_png/CHNCXR_0287_0.png", 0],
+    ["CXR_png/CHNCXR_0009_0.png", 0],
+    ["CXR_png/CHNCXR_0186_0.png", 0],
+    ["CXR_png/CHNCXR_0173_0.png", 0],
+    ["CXR_png/CHNCXR_0322_0.png", 0],
+    ["CXR_png/CHNCXR_0111_0.png", 0],
+    ["CXR_png/CHNCXR_0154_0.png", 0],
+    ["CXR_png/CHNCXR_0023_0.png", 0],
+    ["CXR_png/CHNCXR_0066_0.png", 0],
+    ["CXR_png/CHNCXR_0136_0.png", 0],
+    ["CXR_png/CHNCXR_0247_0.png", 0],
+    ["CXR_png/CHNCXR_0202_0.png", 0],
+    ["CXR_png/CHNCXR_0210_0.png", 0],
+    ["CXR_png/CHNCXR_0255_0.png", 0],
+    ["CXR_png/CHNCXR_0146_0.png", 0],
+    ["CXR_png/CHNCXR_0306_0.png", 0],
+    ["CXR_png/CHNCXR_0103_0.png", 0],
+    ["CXR_png/CHNCXR_0308_0.png", 0],
+    ["CXR_png/CHNCXR_0007_0.png", 0],
+    ["CXR_png/CHNCXR_0086_0.png", 0],
+    ["CXR_png/CHNCXR_0302_0.png", 0],
+    ["CXR_png/CHNCXR_0295_0.png", 0],
+    ["CXR_png/CHNCXR_0169_0.png", 0],
+    ["CXR_png/CHNCXR_0003_0.png", 0],
+    ["CXR_png/CHNCXR_0046_0.png", 0],
+    ["CXR_png/CHNCXR_0134_0.png", 0],
+    ["CXR_png/CHNCXR_0250_0.png", 0],
+    ["CXR_png/CHNCXR_0074_0.png", 0],
+    ["CXR_png/CHNCXR_0082_0.png", 0],
+    ["CXR_png/CHNCXR_0162_0.png", 0],
+    ["CXR_png/CHNCXR_0127_0.png", 0],
+    ["CXR_png/CHNCXR_0031_0.png", 0],
+    ["CXR_png/CHNCXR_0029_0.png", 0],
+    ["CXR_png/CHNCXR_0263_0.png", 0],
+    ["CXR_png/CHNCXR_0226_0.png", 0],
+    ["CXR_png/CHNCXR_0089_0.png", 0],
+    ["CXR_png/CHNCXR_0042_0.png", 0],
+    ["CXR_png/CHNCXR_0171_0.png", 0],
+    ["CXR_png/CHNCXR_0028_0.png", 0],
+    ["CXR_png/CHNCXR_0272_0.png", 0],
+    ["CXR_png/CHNCXR_0274_0.png", 0],
+    ["CXR_png/CHNCXR_0016_0.png", 0],
+    ["CXR_png/CHNCXR_0118_0.png", 0],
+    ["CXR_png/CHNCXR_0108_0.png", 0],
+    ["CXR_png/CHNCXR_0053_0.png", 0],
+    ["CXR_png/CHNCXR_0219_0.png", 0],
+    ["CXR_png/CHNCXR_0289_0.png", 0],
+    ["CXR_png/CHNCXR_0209_0.png", 0],
+    ["CXR_png/CHNCXR_0304_0.png", 0],
+    ["CXR_png/CHNCXR_0314_0.png", 0],
+    ["CXR_png/CHNCXR_0005_0.png", 0],
+    ["CXR_png/CHNCXR_0040_0.png", 0],
+    ["CXR_png/CHNCXR_0317_0.png", 0],
+    ["CXR_png/CHNCXR_0038_0.png", 0],
+    ["CXR_png/CHNCXR_0269_0.png", 0],
+    ["CXR_png/CHNCXR_0050_0.png", 0],
+    ["CXR_png/CHNCXR_0261_0.png", 0],
+    ["CXR_png/CHNCXR_0099_0.png", 0],
+    ["CXR_png/CHNCXR_0059_0.png", 0],
+    ["CXR_png/CHNCXR_0320_0.png", 0],
+    ["CXR_png/CHNCXR_0170_0.png", 0],
+    ["CXR_png/CHNCXR_0144_0.png", 0],
+    ["CXR_png/CHNCXR_0101_0.png", 0],
+    ["CXR_png/CHNCXR_0237_0.png", 0],
+    ["CXR_png/CHNCXR_0090_0.png", 0],
+    ["CXR_png/CHNCXR_0234_0.png", 0],
+    ["CXR_png/CHNCXR_0245_0.png", 0],
+    ["CXR_png/CHNCXR_0200_0.png", 0],
+    ["CXR_png/CHNCXR_0271_0.png", 0],
+    ["CXR_png/CHNCXR_0278_0.png", 0],
+    ["CXR_png/CHNCXR_0093_0.png", 0],
+    ["CXR_png/CHNCXR_0184_0.png", 0],
+    ["CXR_png/CHNCXR_0135_0.png", 0],
+    ["CXR_png/CHNCXR_0179_0.png", 0],
+    ["CXR_png/CHNCXR_0285_0.png", 0],
+    ["CXR_png/CHNCXR_0188_0.png", 0],
+    ["CXR_png/CHNCXR_0021_0.png", 0],
+    ["CXR_png/CHNCXR_0064_0.png", 0],
+    ["CXR_png/CHNCXR_0231_0.png", 0],
+    ["CXR_png/CHNCXR_0268_0.png", 0],
+    ["CXR_png/CHNCXR_0215_0.png", 0],
+    ["CXR_png/CHNCXR_0123_0.png", 0],
+    ["CXR_png/CHNCXR_0300_0.png", 0],
+    ["CXR_png/CHNCXR_0258_0.png", 0],
+    ["CXR_png/CHNCXR_0014_0.png", 0],
+    ["CXR_png/CHNCXR_0316_0.png", 0],
+    ["CXR_png/CHNCXR_0159_0.png", 0],
+    ["CXR_png/CHNCXR_0092_0.png", 0],
+    ["CXR_png/CHNCXR_0051_0.png", 0],
+    ["CXR_png/CHNCXR_0298_0.png", 0],
+    ["CXR_png/CHNCXR_0164_0.png", 0],
+    ["CXR_png/CHNCXR_0121_0.png", 0],
+    ["CXR_png/CHNCXR_0199_0.png", 0],
+    ["CXR_png/CHNCXR_0265_0.png", 0],
+    ["CXR_png/CHNCXR_0220_0.png", 0],
+    ["CXR_png/CHNCXR_0084_0.png", 0],
+    ["CXR_png/CHNCXR_0290_0.png", 0],
+    ["CXR_png/CHNCXR_0137_0.png", 0],
+    ["CXR_png/CHNCXR_0079_0.png", 0],
+    ["CXR_png/CHNCXR_0129_0.png", 0],
+    ["CXR_png/CHNCXR_0241_0.png", 0],
+    ["CXR_png/CHNCXR_0017_0.png", 0],
+    ["CXR_png/CHNCXR_0001_0.png", 0],
+    ["CXR_png/CHNCXR_0088_0.png", 0],
+    ["CXR_png/CHNCXR_0075_0.png", 0],
+    ["CXR_png/CHNCXR_0030_0.png", 0],
+    ["CXR_png/CHNCXR_0294_0.png", 0],
+    ["CXR_png/CHNCXR_0228_0.png", 0],
+    ["CXR_png/CHNCXR_0048_0.png", 0],
+    ["CXR_png/CHNCXR_0034_0.png", 0],
+    ["CXR_png/CHNCXR_0071_0.png", 0],
+    ["CXR_png/CHNCXR_0211_0.png", 0],
+    ["CXR_png/CHNCXR_0254_0.png", 0],
+    ["CXR_png/CHNCXR_0168_0.png", 0],
+    ["CXR_png/CHNCXR_0110_0.png", 0],
+    ["CXR_png/CHNCXR_0191_0.png", 0],
+    ["CXR_png/CHNCXR_0155_0.png", 0],
+    ["CXR_png/CHNCXR_0096_0.png", 0],
+    ["CXR_png/CHNCXR_0315_0.png", 0],
+    ["CXR_png/CHNCXR_0052_0.png", 0],
+    ["CXR_png/CHNCXR_0044_0.png", 0],
+    ["CXR_png/CHNCXR_0204_0.png", 0],
+    ["CXR_png/CHNCXR_0172_0.png", 0],
+    ["CXR_png/CHNCXR_0140_0.png", 0],
+    ["CXR_png/CHNCXR_0027_0.png", 0],
+    ["CXR_png/CHNCXR_0062_0.png", 0],
+    ["CXR_png/CHNCXR_0326_0.png", 0],
+    ["CXR_png/CHNCXR_0182_0.png", 0],
+    ["CXR_png/CHNCXR_0076_0.png", 0],
+    ["CXR_png/CHNCXR_0283_0.png", 0],
+    ["CXR_png/CHNCXR_0033_0.png", 0],
+    ["CXR_png/CHNCXR_0270_0.png", 0],
+    ["CXR_png/CHNCXR_0235_0.png", 0],
+    ["CXR_png/CHNCXR_0142_0.png", 0],
+    ["CXR_png/CHNCXR_0107_0.png", 0],
+    ["CXR_png/CHNCXR_0194_0.png", 0],
+    ["CXR_png/CHNCXR_0243_0.png", 0],
+    ["CXR_png/CHNCXR_0206_0.png", 0],
+    ["CXR_png/CHNCXR_0267_0.png", 0],
+    ["CXR_png/CHNCXR_0222_0.png", 0],
+    ["CXR_png/CHNCXR_0166_0.png", 0],
+    ["CXR_png/CHNCXR_0196_0.png", 0],
+    ["CXR_png/CHNCXR_0180_0.png", 0],
+    ["CXR_png/CHNCXR_0208_0.png", 0],
+    ["CXR_png/CHNCXR_0239_0.png", 0],
+    ["CXR_png/CHNCXR_0236_0.png", 0],
+    ["CXR_png/CHNCXR_0105_0.png", 0],
+    ["CXR_png/CHNCXR_0273_0.png", 0],
+    ["CXR_png/CHNCXR_0212_0.png", 0],
+    ["CXR_png/CHNCXR_0018_0.png", 0],
+    ["CXR_png/CHNCXR_0257_0.png", 0],
+    ["CXR_png/CHNCXR_0319_0.png", 0],
+    ["CXR_png/CHNCXR_0109_0.png", 0],
+    ["CXR_png/CHNCXR_0224_0.png", 0],
+    ["CXR_png/CHNCXR_0324_0.png", 0],
+    ["CXR_png/CHNCXR_0156_0.png", 0],
+    ["CXR_png/CHNCXR_0025_0.png", 0],
+    ["CXR_png/CHNCXR_0060_0.png", 0],
+    ["CXR_png/CHNCXR_0114_0.png", 0],
+    ["CXR_png/CHNCXR_0151_0.png", 0],
+    ["CXR_png/CHNCXR_0138_0.png", 0],
+    ["CXR_png/CHNCXR_0281_0.png", 0],
+    ["CXR_png/CHNCXR_0297_0.png", 0],
+    ["CXR_png/CHNCXR_0113_0.png", 0],
+    ["CXR_png/CHNCXR_0015_0.png", 0],
+    ["CXR_png/CHNCXR_0080_0.png", 0],
+    ["CXR_png/CHNCXR_0160_0.png", 0],
+    ["CXR_png/CHNCXR_0309_0.png", 0],
+    ["CXR_png/CHNCXR_0201_0.png", 0],
+    ["CXR_png/CHNCXR_0244_0.png", 0],
+    ["CXR_png/CHNCXR_0117_0.png", 0],
+    ["CXR_png/CHNCXR_0152_0.png", 0],
+    ["CXR_png/CHNCXR_0058_0.png", 0],
+    ["CXR_png/CHNCXR_0116_0.png", 0],
+    ["CXR_png/CHNCXR_0225_0.png", 0],
+    ["CXR_png/CHNCXR_0260_0.png", 0],
+    ["CXR_png/CHNCXR_0216_0.png", 0],
+    ["CXR_png/CHNCXR_0253_0.png", 0],
+    ["CXR_png/CHNCXR_0124_0.png", 0],
+    ["CXR_png/CHNCXR_0161_0.png", 0],
+    ["CXR_png/CHNCXR_0072_0.png", 0],
+    ["CXR_png/CHNCXR_0039_0.png", 0],
+    ["CXR_png/CHNCXR_0081_0.png", 0],
+    ["CXR_png/CHNCXR_0037_0.png", 0],
+    ["CXR_png/CHNCXR_0153_0.png", 0],
+    ["CXR_png/CHNCXR_0252_0.png", 0],
+    ["CXR_png/CHNCXR_0145_0.png", 0],
+    ["CXR_png/CHNCXR_0100_0.png", 0],
+    ["CXR_png/CHNCXR_0141_0.png", 0],
+    ["CXR_png/CHNCXR_0036_0.png", 0],
+    ["CXR_png/CHNCXR_0214_0.png", 0],
+    ["CXR_png/CHNCXR_0130_0.png", 0],
+    ["CXR_png/CHNCXR_0065_0.png", 0],
+    ["CXR_png/CHNCXR_0020_0.png", 0],
+    ["CXR_png/CHNCXR_0073_0.png", 0],
+    ["CXR_png/CHNCXR_0321_0.png", 0],
+    ["CXR_png/CHNCXR_0115_0.png", 0],
+    ["CXR_png/CHNCXR_0098_0.png", 0],
+    ["CXR_png/CHNCXR_0055_0.png", 0],
+    ["CXR_png/CHNCXR_0279_0.png", 0],
+    ["CXR_png/CHNCXR_0185_0.png", 0],
+    ["CXR_png/CHNCXR_0150_0.png", 0],
+    ["CXR_png/CHNCXR_0178_0.png", 0],
+    ["CXR_png/CHNCXR_0010_0.png", 0],
+    ["CXR_png/CHNCXR_0540_1.png", 1],
+    ["CXR_png/CHNCXR_0460_1.png", 1],
+    ["CXR_png/CHNCXR_0484_1.png", 1],
+    ["CXR_png/CHNCXR_0425_1.png", 1],
+    ["CXR_png/CHNCXR_0505_1.png", 1],
+    ["CXR_png/CHNCXR_0394_1.png", 1],
+    ["CXR_png/CHNCXR_0507_1.png", 1],
+    ["CXR_png/CHNCXR_0370_1.png", 1],
+    ["CXR_png/CHNCXR_0538_1.png", 1],
+    ["CXR_png/CHNCXR_0401_1.png", 1],
+    ["CXR_png/CHNCXR_0444_1.png", 1],
+    ["CXR_png/CHNCXR_0641_1.png", 1],
+    ["CXR_png/CHNCXR_0604_1.png", 1],
+    ["CXR_png/CHNCXR_0559_1.png", 1],
+    ["CXR_png/CHNCXR_0479_1.png", 1],
+    ["CXR_png/CHNCXR_0369_1.png", 1],
+    ["CXR_png/CHNCXR_0335_1.png", 1],
+    ["CXR_png/CHNCXR_0580_1.png", 1],
+    ["CXR_png/CHNCXR_0639_1.png", 1],
+    ["CXR_png/CHNCXR_0418_1.png", 1],
+    ["CXR_png/CHNCXR_0354_1.png", 1],
+    ["CXR_png/CHNCXR_0658_1.png", 1],
+    ["CXR_png/CHNCXR_0492_1.png", 1],
+    ["CXR_png/CHNCXR_0599_1.png", 1],
+    ["CXR_png/CHNCXR_0355_1.png", 1],
+    ["CXR_png/CHNCXR_0478_1.png", 1],
+    ["CXR_png/CHNCXR_0565_1.png", 1],
+    ["CXR_png/CHNCXR_0520_1.png", 1],
+    ["CXR_png/CHNCXR_0598_1.png", 1],
+    ["CXR_png/CHNCXR_0621_1.png", 1],
+    ["CXR_png/CHNCXR_0419_1.png", 1],
+    ["CXR_png/CHNCXR_0334_1.png", 1],
+    ["CXR_png/CHNCXR_0371_1.png", 1],
+    ["CXR_png/CHNCXR_0640_1.png", 1],
+    ["CXR_png/CHNCXR_0605_1.png", 1],
+    ["CXR_png/CHNCXR_0541_1.png", 1],
+    ["CXR_png/CHNCXR_0504_1.png", 1],
+    ["CXR_png/CHNCXR_0539_1.png", 1],
+    ["CXR_png/CHNCXR_0638_1.png", 1],
+    ["CXR_png/CHNCXR_0581_1.png", 1],
+    ["CXR_png/CHNCXR_0424_1.png", 1],
+    ["CXR_png/CHNCXR_0461_1.png", 1],
+    ["CXR_png/CHNCXR_0545_1.png", 1],
+    ["CXR_png/CHNCXR_0485_1.png", 1],
+    ["CXR_png/CHNCXR_0500_1.png", 1],
+    ["CXR_png/CHNCXR_0395_1.png", 1],
+    ["CXR_png/CHNCXR_0400_1.png", 1],
+    ["CXR_png/CHNCXR_0426_1.png", 1],
+    ["CXR_png/CHNCXR_0463_1.png", 1],
+    ["CXR_png/CHNCXR_0336_1.png", 1],
+    ["CXR_png/CHNCXR_0373_1.png", 1],
+    ["CXR_png/CHNCXR_0543_1.png", 1],
+    ["CXR_png/CHNCXR_0506_1.png", 1],
+    ["CXR_png/CHNCXR_0642_1.png", 1],
+    ["CXR_png/CHNCXR_0607_1.png", 1],
+    ["CXR_png/CHNCXR_0357_1.png", 1],
+    ["CXR_png/CHNCXR_0487_1.png", 1],
+    ["CXR_png/CHNCXR_0623_1.png", 1],
+    ["CXR_png/CHNCXR_0567_1.png", 1],
+    ["CXR_png/CHNCXR_0522_1.png", 1],
+    ["CXR_png/CHNCXR_0397_1.png", 1],
+    ["CXR_png/CHNCXR_0402_1.png", 1],
+    ["CXR_png/CHNCXR_0447_1.png", 1],
+    ["CXR_png/CHNCXR_0659_1.png", 1],
+    ["CXR_png/CHNCXR_0558_1.png", 1],
+    ["CXR_png/CHNCXR_0368_1.png", 1],
+    ["CXR_png/CHNCXR_0445_1.png", 1],
+    ["CXR_png/CHNCXR_0620_1.png", 1],
+    ["CXR_png/CHNCXR_0644_1.png", 1],
+    ["CXR_png/CHNCXR_0388_1.png", 1],
+    ["CXR_png/CHNCXR_0526_1.png", 1],
+    ["CXR_png/CHNCXR_0662_1.png", 1],
+    ["CXR_png/CHNCXR_0627_1.png", 1],
+    ["CXR_png/CHNCXR_0483_1.png", 1],
+    ["CXR_png/CHNCXR_0353_1.png", 1],
+    ["CXR_png/CHNCXR_0406_1.png", 1],
+    ["CXR_png/CHNCXR_0443_1.png", 1],
+    ["CXR_png/CHNCXR_0393_1.png", 1],
+    ["CXR_png/CHNCXR_0422_1.png", 1],
+    ["CXR_png/CHNCXR_0467_1.png", 1],
+    ["CXR_png/CHNCXR_0587_1.png", 1],
+    ["CXR_png/CHNCXR_0646_1.png", 1],
+    ["CXR_png/CHNCXR_0603_1.png", 1],
+    ["CXR_png/CHNCXR_0547_1.png", 1],
+    ["CXR_png/CHNCXR_0502_1.png", 1],
+    ["CXR_png/CHNCXR_0332_1.png", 1],
+    ["CXR_png/CHNCXR_0377_1.png", 1],
+    ["CXR_png/CHNCXR_0521_1.png", 1],
+    ["CXR_png/CHNCXR_0564_1.png", 1],
+    ["CXR_png/CHNCXR_0563_1.png", 1],
+    ["CXR_png/CHNCXR_0601_1.png", 1],
+    ["CXR_png/CHNCXR_0481_1.png", 1],
+    ["CXR_png/CHNCXR_0351_1.png", 1],
+    ["CXR_png/CHNCXR_0458_1.png", 1],
+    ["CXR_png/CHNCXR_0330_1.png", 1],
+    ["CXR_png/CHNCXR_0375_1.png", 1],
+    ["CXR_png/CHNCXR_0420_1.png", 1],
+    ["CXR_png/CHNCXR_0465_1.png", 1],
+    ["CXR_png/CHNCXR_0348_1.png", 1],
+    ["CXR_png/CHNCXR_0498_1.png", 1],
+    ["CXR_png/CHNCXR_0585_1.png", 1],
+    ["CXR_png/CHNCXR_0578_1.png", 1],
+    ["CXR_png/CHNCXR_0542_1.png", 1],
+    ["CXR_png/CHNCXR_0329_1.png", 1],
+    ["CXR_png/CHNCXR_0404_1.png", 1],
+    ["CXR_png/CHNCXR_0441_1.png", 1],
+    ["CXR_png/CHNCXR_0519_1.png", 1],
+    ["CXR_png/CHNCXR_0618_1.png", 1],
+    ["CXR_png/CHNCXR_0660_1.png", 1],
+    ["CXR_png/CHNCXR_0625_1.png", 1],
+    ["CXR_png/CHNCXR_0561_1.png", 1],
+    ["CXR_png/CHNCXR_0524_1.png", 1],
+    ["CXR_png/CHNCXR_0439_1.png", 1],
+    ["CXR_png/CHNCXR_0391_1.png", 1],
+    ["CXR_png/CHNCXR_0440_1.png", 1],
+    ["CXR_png/CHNCXR_0643_1.png", 1],
+    ["CXR_png/CHNCXR_0386_1.png", 1],
+    ["CXR_png/CHNCXR_0496_1.png", 1],
+    ["CXR_png/CHNCXR_0346_1.png", 1],
+    ["CXR_png/CHNCXR_0533_1.png", 1],
+    ["CXR_png/CHNCXR_0576_1.png", 1],
+    ["CXR_png/CHNCXR_0632_1.png", 1],
+    ["CXR_png/CHNCXR_0362_1.png", 1],
+    ["CXR_png/CHNCXR_0327_1.png", 1],
+    ["CXR_png/CHNCXR_0616_1.png", 1],
+    ["CXR_png/CHNCXR_0653_1.png", 1],
+    ["CXR_png/CHNCXR_0517_1.png", 1],
+    ["CXR_png/CHNCXR_0552_1.png", 1],
+    ["CXR_png/CHNCXR_0592_1.png", 1],
+    ["CXR_png/CHNCXR_0472_1.png", 1],
+    ["CXR_png/CHNCXR_0437_1.png", 1],
+    ["CXR_png/CHNCXR_0629_1.png", 1],
+    ["CXR_png/CHNCXR_0590_1.png", 1],
+    ["CXR_png/CHNCXR_0528_1.png", 1],
+    ["CXR_png/CHNCXR_0470_1.png", 1],
+    ["CXR_png/CHNCXR_0435_1.png", 1],
+    ["CXR_png/CHNCXR_0408_1.png", 1],
+    ["CXR_png/CHNCXR_0360_1.png", 1],
+    ["CXR_png/CHNCXR_0515_1.png", 1],
+    ["CXR_png/CHNCXR_0413_1.png", 1],
+    ["CXR_png/CHNCXR_0456_1.png", 1],
+    ["CXR_png/CHNCXR_0358_1.png", 1],
+    ["CXR_png/CHNCXR_0475_1.png", 1],
+    ["CXR_png/CHNCXR_0453_1.png", 1],
+    ["CXR_png/CHNCXR_0608_1.png", 1],
+    ["CXR_png/CHNCXR_0509_1.png", 1],
+    ["CXR_png/CHNCXR_0339_1.png", 1],
+    ["CXR_png/CHNCXR_0414_1.png", 1],
+    ["CXR_png/CHNCXR_0451_1.png", 1],
+    ["CXR_png/CHNCXR_0381_1.png", 1],
+    ["CXR_png/CHNCXR_0491_1.png", 1],
+    ["CXR_png/CHNCXR_0341_1.png", 1],
+    ["CXR_png/CHNCXR_0429_1.png", 1],
+    ["CXR_png/CHNCXR_0571_1.png", 1],
+    ["CXR_png/CHNCXR_0550_1.png", 1],
+    ["CXR_png/CHNCXR_0534_1.png", 1],
+    ["CXR_png/CHNCXR_0448_1.png", 1],
+    ["CXR_png/CHNCXR_0365_1.png", 1],
+    ["CXR_png/CHNCXR_0398_1.png", 1],
+    ["CXR_png/CHNCXR_0654_1.png", 1],
+    ["CXR_png/CHNCXR_0611_1.png", 1],
+    ["CXR_png/CHNCXR_0555_1.png", 1],
+    ["CXR_png/CHNCXR_0510_1.png", 1],
+    ["CXR_png/CHNCXR_0568_1.png", 1],
+    ["CXR_png/CHNCXR_0595_1.png", 1],
+    ["CXR_png/CHNCXR_0488_1.png", 1],
+    ["CXR_png/CHNCXR_0430_1.png", 1],
+    ["CXR_png/CHNCXR_0635_1.png", 1],
+    ["CXR_png/CHNCXR_0614_1.png", 1],
+    ["CXR_png/CHNCXR_0651_1.png", 1],
+    ["CXR_png/CHNCXR_0344_1.png", 1],
+    ["CXR_png/CHNCXR_0569_1.png", 1],
+    ["CXR_png/CHNCXR_0594_1.png", 1],
+    ["CXR_png/CHNCXR_0610_1.png", 1],
+    ["CXR_png/CHNCXR_0655_1.png", 1],
+    ["CXR_png/CHNCXR_0511_1.png", 1],
+    ["CXR_png/CHNCXR_0554_1.png", 1],
+    ["CXR_png/CHNCXR_0449_1.png", 1],
+    ["CXR_png/CHNCXR_0364_1.png", 1],
+    ["CXR_png/CHNCXR_0399_1.png", 1],
+    ["CXR_png/CHNCXR_0513_1.png", 1],
+    ["CXR_png/CHNCXR_0556_1.png", 1],
+    ["CXR_png/CHNCXR_0359_1.png", 1],
+    ["CXR_png/CHNCXR_0612_1.png", 1],
+    ["CXR_png/CHNCXR_0366_1.png", 1],
+    ["CXR_png/CHNCXR_0476_1.png", 1],
+    ["CXR_png/CHNCXR_0433_1.png", 1],
+    ["CXR_png/CHNCXR_0596_1.png", 1],
+    ["CXR_png/CHNCXR_0382_1.png", 1],
+    ["CXR_png/CHNCXR_0452_1.png", 1],
+    ["CXR_png/CHNCXR_0417_1.png", 1],
+    ["CXR_png/CHNCXR_0636_1.png", 1],
+    ["CXR_png/CHNCXR_0537_1.png", 1],
+    ["CXR_png/CHNCXR_0572_1.png", 1],
+    ["CXR_png/CHNCXR_0342_1.png", 1],
+    ["CXR_png/CHNCXR_0657_1.png", 1],
+    ["CXR_png/CHNCXR_0416_1.png", 1],
+    ["CXR_png/CHNCXR_0431_1.png", 1],
+    ["CXR_png/CHNCXR_0489_1.png", 1],
+    ["CXR_png/CHNCXR_0469_1.png", 1],
+    ["CXR_png/CHNCXR_0494_1.png", 1],
+    ["CXR_png/CHNCXR_0589_1.png", 1],
+    ["CXR_png/CHNCXR_0630_1.png", 1],
+    ["CXR_png/CHNCXR_0531_1.png", 1],
+    ["CXR_png/CHNCXR_0574_1.png", 1],
+    ["CXR_png/CHNCXR_0549_1.png", 1],
+    ["CXR_png/CHNCXR_0648_1.png", 1],
+    ["CXR_png/CHNCXR_0384_1.png", 1],
+    ["CXR_png/CHNCXR_0379_1.png", 1],
+    ["CXR_png/CHNCXR_0454_1.png", 1],
+    ["CXR_png/CHNCXR_0474_1.png", 1],
+    ["CXR_png/CHNCXR_0411_1.png", 1],
+    ["CXR_png/CHNCXR_0570_1.png", 1],
+    ["CXR_png/CHNCXR_0634_1.png", 1],
+    ["CXR_png/CHNCXR_0490_1.png", 1],
+    ["CXR_png/CHNCXR_0284_0.png", 0],
+    ["CXR_png/CHNCXR_0008_0.png", 0],
+    ["CXR_png/CHNCXR_0104_0.png", 0],
+    ["CXR_png/CHNCXR_0217_0.png", 0],
+    ["CXR_png/CHNCXR_0148_0.png", 0],
+    ["CXR_png/CHNCXR_0177_0.png", 0],
+    ["CXR_png/CHNCXR_0083_0.png", 0],
+    ["CXR_png/CHNCXR_0132_0.png", 0],
+    ["CXR_png/CHNCXR_0203_0.png", 0],
+    ["CXR_png/CHNCXR_0246_0.png", 0],
+    ["CXR_png/CHNCXR_0232_0.png", 0],
+    ["CXR_png/CHNCXR_0277_0.png", 0],
+    ["CXR_png/CHNCXR_0102_0.png", 0],
+    ["CXR_png/CHNCXR_0147_0.png", 0],
+    ["CXR_png/CHNCXR_0012_0.png", 0],
+    ["CXR_png/CHNCXR_0323_0.png", 0],
+    ["CXR_png/CHNCXR_0133_0.png", 0],
+    ["CXR_png/CHNCXR_0067_0.png", 0],
+    ["CXR_png/CHNCXR_0022_0.png", 0],
+    ["CXR_png/CHNCXR_0286_0.png", 0],
+    ["CXR_png/CHNCXR_0176_0.png", 0],
+    ["CXR_png/CHNCXR_0187_0.png", 0],
+    ["CXR_png/CHNCXR_0262_0.png", 0],
+    ["CXR_png/CHNCXR_0305_0.png", 0],
+    ["CXR_png/CHNCXR_0227_0.png", 0],
+    ["CXR_png/CHNCXR_0056_0.png", 0],
+    ["CXR_png/CHNCXR_0097_0.png", 0],
+    ["CXR_png/CHNCXR_0041_0.png", 0],
+    ["CXR_png/CHNCXR_0004_0.png", 0],
+    ["CXR_png/CHNCXR_0192_0.png", 0],
+    ["CXR_png/CHNCXR_0119_0.png", 0],
+    ["CXR_png/CHNCXR_0293_0.png", 0],
+    ["CXR_png/CHNCXR_0218_0.png", 0],
+    ["CXR_png/CHNCXR_0043_0.png", 0],
+    ["CXR_png/CHNCXR_0006_0.png", 0],
+    ["CXR_png/CHNCXR_0276_0.png", 0],
+    ["CXR_png/CHNCXR_0233_0.png", 0],
+    ["CXR_png/CHNCXR_0307_0.png", 0],
+    ["CXR_png/CHNCXR_0312_0.png", 0],
+    ["CXR_png/CHNCXR_0249_0.png", 0],
+    ["CXR_png/CHNCXR_0057_0.png", 0],
+    ["CXR_png/CHNCXR_0126_0.png", 0],
+    ["CXR_png/CHNCXR_0163_0.png", 0],
+    ["CXR_png/CHNCXR_0013_0.png", 0],
+    ["CXR_png/CHNCXR_0229_0.png", 0],
+    ["CXR_png/CHNCXR_0251_0.png", 0],
+    ["CXR_png/CHNCXR_0205_0.png", 0],
+    ["CXR_png/CHNCXR_0047_0.png", 0],
+    ["CXR_png/CHNCXR_0002_0.png", 0],
+    ["CXR_png/CHNCXR_0095_0.png", 0],
+    ["CXR_png/CHNCXR_0054_0.png", 0],
+    ["CXR_png/CHNCXR_0011_0.png", 0],
+    ["CXR_png/CHNCXR_0087_0.png", 0],
+    ["CXR_png/CHNCXR_0340_1.png", 1],
+    ["CXR_png/CHNCXR_0428_1.png", 1],
+    ["CXR_png/CHNCXR_0338_1.png", 1],
+    ["CXR_png/CHNCXR_0450_1.png", 1],
+    ["CXR_png/CHNCXR_0415_1.png", 1],
+    ["CXR_png/CHNCXR_0380_1.png", 1],
+    ["CXR_png/CHNCXR_0609_1.png", 1],
+    ["CXR_png/CHNCXR_0508_1.png", 1],
+    ["CXR_png/CHNCXR_0535_1.png", 1],
+    ["CXR_png/CHNCXR_0606_1.png", 1],
+    ["CXR_png/CHNCXR_0383_1.png", 1],
+    ["CXR_png/CHNCXR_0573_1.png", 1],
+    ["CXR_png/CHNCXR_0466_1.png", 1],
+    ["CXR_png/CHNCXR_0423_1.png", 1],
+    ["CXR_png/CHNCXR_0584_1.png", 1],
+    ["CXR_png/CHNCXR_0579_1.png", 1],
+    ["CXR_png/CHNCXR_0464_1.png", 1],
+    ["CXR_png/CHNCXR_0421_1.png", 1],
+    ["CXR_png/CHNCXR_0349_1.png", 1],
+    ["CXR_png/CHNCXR_0499_1.png", 1],
+    ["CXR_png/CHNCXR_0389_1.png", 1],
+    ["CXR_png/CHNCXR_0459_1.png", 1],
+    ["CXR_png/CHNCXR_0374_1.png", 1],
+    ["CXR_png/CHNCXR_0331_1.png", 1],
+    ["CXR_png/CHNCXR_0501_1.png", 1],
+    ["CXR_png/CHNCXR_0544_1.png", 1],
+    ["CXR_png/CHNCXR_0600_1.png", 1],
+    ["CXR_png/CHNCXR_0645_1.png", 1],
+    ["CXR_png/CHNCXR_0350_1.png", 1],
+    ["CXR_png/CHNCXR_0438_1.png", 1],
+    ["CXR_png/CHNCXR_0480_1.png", 1],
+    ["CXR_png/CHNCXR_0624_1.png", 1],
+    ["CXR_png/CHNCXR_0661_1.png", 1],
+    ["CXR_png/CHNCXR_0525_1.png", 1],
+    ["CXR_png/CHNCXR_0560_1.png", 1],
+    ["CXR_png/CHNCXR_0586_1.png", 1],
+    ["CXR_png/CHNCXR_0546_1.png", 1],
+    ["CXR_png/CHNCXR_0503_1.png", 1],
+    ["CXR_png/CHNCXR_0647_1.png", 1],
+    ["CXR_png/CHNCXR_0372_1.png", 1],
+    ["CXR_png/CHNCXR_0337_1.png", 1],
+    ["CXR_png/CHNCXR_0462_1.png", 1],
+    ["CXR_png/CHNCXR_0427_1.png", 1],
+    ["CXR_png/CHNCXR_0582_1.png", 1],
+    ["CXR_png/CHNCXR_0396_1.png", 1],
+    ["CXR_png/CHNCXR_0446_1.png", 1],
+    ["CXR_png/CHNCXR_0403_1.png", 1],
+    ["CXR_png/CHNCXR_0622_1.png", 1],
+    ["CXR_png/CHNCXR_0523_1.png", 1],
+    ["CXR_png/CHNCXR_0566_1.png", 1],
+    ["CXR_png/CHNCXR_0518_1.png", 1],
+    ["CXR_png/CHNCXR_0356_1.png", 1],
+    ["CXR_png/CHNCXR_0442_1.png", 1],
+    ["CXR_png/CHNCXR_0407_1.png", 1],
+    ["CXR_png/CHNCXR_0223_0.png", 0],
+    ["CXR_png/CHNCXR_0266_0.png", 0],
+    ["CXR_png/CHNCXR_0122_0.png", 0],
+    ["CXR_png/CHNCXR_0167_0.png", 0],
+    ["CXR_png/CHNCXR_0296_0.png", 0],
+    ["CXR_png/CHNCXR_0085_0.png", 0],
+    ["CXR_png/CHNCXR_0310_0.png", 0],
+    ["CXR_png/CHNCXR_0197_0.png", 0],
+    ["CXR_png/CHNCXR_0240_0.png", 0],
+    ["CXR_png/CHNCXR_0078_0.png", 0],
+    ["CXR_png/CHNCXR_0299_0.png", 0],
+    ["CXR_png/CHNCXR_0069_0.png", 0],
+    ["CXR_png/CHNCXR_0068_0.png", 0],
+    ["CXR_png/CHNCXR_0303_0.png", 0],
+    ["CXR_png/CHNCXR_0094_0.png", 0],
+    ["CXR_png/CHNCXR_0175_0.png", 0],
+    ["CXR_png/CHNCXR_0125_0.png", 0],
+    ["CXR_png/CHNCXR_0131_0.png", 0],
+    ["CXR_png/CHNCXR_0174_0.png", 0],
+    ["CXR_png/CHNCXR_0106_0.png", 0],
+    ["CXR_png/CHNCXR_0143_0.png", 0],
+    ["CXR_png/CHNCXR_0288_0.png", 0],
+    ["CXR_png/CHNCXR_0207_0.png", 0],
+    ["CXR_png/CHNCXR_0248_0.png", 0],
+    ["CXR_png/CHNCXR_0242_0.png", 0],
+    ["CXR_png/CHNCXR_0032_0.png", 0],
+    ["CXR_png/CHNCXR_0282_0.png", 0],
+    ["CXR_png/CHNCXR_0077_0.png", 0],
+    ["CXR_png/CHNCXR_0230_0.png", 0],
+    ["CXR_png/CHNCXR_0275_0.png", 0],
+    ["CXR_png/CHNCXR_0063_0.png", 0],
+    ["CXR_png/CHNCXR_0026_0.png", 0],
+    ["CXR_png/CHNCXR_0189_0.png", 0],
+    ["CXR_png/CHNCXR_0183_0.png", 0],
+    ["CXR_png/CHNCXR_0149_0.png", 0],
+    ["CXR_png/CHNCXR_0256_0.png", 0],
+    ["CXR_png/CHNCXR_0195_0.png", 0],
+    ["CXR_png/CHNCXR_0325_0.png", 0],
+    ["CXR_png/CHNCXR_0193_0.png", 0],
+    ["CXR_png/CHNCXR_0181_0.png", 0],
+    ["CXR_png/CHNCXR_0238_0.png", 0],
+    ["CXR_png/CHNCXR_0035_0.png", 0],
+    ["CXR_png/CHNCXR_0280_0.png", 0],
+    ["CXR_png/CHNCXR_0070_0.png", 0],
+    ["CXR_png/CHNCXR_0120_0.png", 0],
+    ["CXR_png/CHNCXR_0139_0.png", 0],
+    ["CXR_png/CHNCXR_0024_0.png", 0],
+    ["CXR_png/CHNCXR_0311_0.png", 0],
+    ["CXR_png/CHNCXR_0301_0.png", 0],
+    ["CXR_png/CHNCXR_0128_0.png", 0],
+    ["CXR_png/CHNCXR_0291_0.png", 0],
+    ["CXR_png/CHNCXR_0045_0.png", 0],
+    ["CXR_png/CHNCXR_0061_0.png", 0],
+    ["CXR_png/CHNCXR_0112_0.png", 0],
+    ["CXR_png/CHNCXR_0157_0.png", 0],
+    ["CXR_png/CHNCXR_0019_0.png", 0],
+    ["CXR_png/CHNCXR_0165_0.png", 0],
+    ["CXR_png/CHNCXR_0190_0.png", 0],
+    ["CXR_png/CHNCXR_0198_0.png", 0],
+    ["CXR_png/CHNCXR_0158_0.png", 0],
+    ["CXR_png/CHNCXR_0318_0.png", 0],
+    ["CXR_png/CHNCXR_0221_0.png", 0],
+    ["CXR_png/CHNCXR_0213_0.png", 0],
+    ["CXR_png/CHNCXR_0259_0.png", 0],
+    ["CXR_png/CHNCXR_0292_0.png", 0],
+    ["CXR_png/CHNCXR_0264_0.png", 0],
+    ["CXR_png/CHNCXR_0392_1.png", 1],
+    ["CXR_png/CHNCXR_0482_1.png", 1],
+    ["CXR_png/CHNCXR_0352_1.png", 1],
+    ["CXR_png/CHNCXR_0527_1.png", 1],
+    ["CXR_png/CHNCXR_0562_1.png", 1],
+    ["CXR_png/CHNCXR_0626_1.png", 1],
+    ["CXR_png/CHNCXR_0376_1.png", 1],
+    ["CXR_png/CHNCXR_0333_1.png", 1],
+    ["CXR_png/CHNCXR_0602_1.png", 1],
+    ["CXR_png/CHNCXR_0486_1.png", 1],
+    ["CXR_png/CHNCXR_0619_1.png", 1],
+    ["CXR_png/CHNCXR_0390_1.png", 1],
+    ["CXR_png/CHNCXR_0328_1.png", 1],
+    ["CXR_png/CHNCXR_0497_1.png", 1],
+    ["CXR_png/CHNCXR_0347_1.png", 1],
+    ["CXR_png/CHNCXR_0412_1.png", 1],
+    ["CXR_png/CHNCXR_0457_1.png", 1],
+    ["CXR_png/CHNCXR_0387_1.png", 1],
+    ["CXR_png/CHNCXR_0436_1.png", 1],
+    ["CXR_png/CHNCXR_0473_1.png", 1],
+    ["CXR_png/CHNCXR_0593_1.png", 1],
+    ["CXR_png/CHNCXR_0652_1.png", 1],
+    ["CXR_png/CHNCXR_0617_1.png", 1],
+    ["CXR_png/CHNCXR_0553_1.png", 1],
+    ["CXR_png/CHNCXR_0633_1.png", 1],
+    ["CXR_png/CHNCXR_0516_1.png", 1],
+    ["CXR_png/CHNCXR_0597_1.png", 1],
+    ["CXR_png/CHNCXR_0432_1.png", 1],
+    ["CXR_png/CHNCXR_0477_1.png", 1],
+    ["CXR_png/CHNCXR_0367_1.png", 1],
+    ["CXR_png/CHNCXR_0557_1.png", 1],
+    ["CXR_png/CHNCXR_0512_1.png", 1],
+    ["CXR_png/CHNCXR_0656_1.png", 1],
+    ["CXR_png/CHNCXR_0613_1.png", 1],
+    ["CXR_png/CHNCXR_0343_1.png", 1],
+    ["CXR_png/CHNCXR_0493_1.png", 1],
+    ["CXR_png/CHNCXR_0637_1.png", 1],
+    ["CXR_png/CHNCXR_0363_1.png", 1],
+    ["CXR_png/CHNCXR_0536_1.png", 1],
+    ["CXR_png/CHNCXR_0532_1.png", 1],
+    ["CXR_png/CHNCXR_0495_1.png", 1],
+    ["CXR_png/CHNCXR_0405_1.png", 1],
+    ["CXR_png/CHNCXR_0551_1.png", 1],
+    ["CXR_png/CHNCXR_0514_1.png", 1],
+    ["CXR_png/CHNCXR_0650_1.png", 1],
+    ["CXR_png/CHNCXR_0615_1.png", 1],
+    ["CXR_png/CHNCXR_0409_1.png", 1],
+    ["CXR_png/CHNCXR_0361_1.png", 1],
+    ["CXR_png/CHNCXR_0434_1.png", 1],
+    ["CXR_png/CHNCXR_0471_1.png", 1],
+    ["CXR_png/CHNCXR_0628_1.png", 1],
+    ["CXR_png/CHNCXR_0591_1.png", 1],
+    ["CXR_png/CHNCXR_0577_1.png", 1],
+    ["CXR_png/CHNCXR_0529_1.png", 1],
+    ["CXR_png/CHNCXR_0378_1.png", 1],
+    ["CXR_png/CHNCXR_0410_1.png", 1],
+    ["CXR_png/CHNCXR_0455_1.png", 1],
+    ["CXR_png/CHNCXR_0548_1.png", 1],
+    ["CXR_png/CHNCXR_0649_1.png", 1],
+    ["CXR_png/CHNCXR_0588_1.png", 1],
+    ["CXR_png/CHNCXR_0631_1.png", 1],
+    ["CXR_png/CHNCXR_0575_1.png", 1],
+    ["CXR_png/CHNCXR_0530_1.png", 1],
+    ["CXR_png/CHNCXR_0345_1.png", 1],
+    ["CXR_png/CHNCXR_0468_1.png", 1],
+    ["CXR_png/CHNCXR_0385_1.png", 1],
+    ["CXR_png/CHNCXR_0583_1.png", 1]
+  ]
+}
diff --git a/src/mednet/config/data/shenzhen/alltest.py b/src/mednet/config/data/shenzhen/alltest.py
new file mode 100644
index 0000000000000000000000000000000000000000..12a22e1e644e02d4bddaf5099191566dac3a0658
--- /dev/null
+++ b/src/mednet/config/data/shenzhen/alltest.py
@@ -0,0 +1,16 @@
+# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
+#
+# SPDX-License-Identifier: GPL-3.0-or-later
+"""Default Shenzhen TB database split.
+
+Database reference: [MONTGOMERY-SHENZHEN-2014]_
+
+* Test samples: 100% of the database
+
+See :py:class:`mednet.config.data.shenzhen.datamodule.DataModule` for
+technical details.
+"""
+
+from mednet.config.data.shenzhen.datamodule import DataModule
+
+datamodule = DataModule("alltest.json")
diff --git a/src/mednet/engine/evaluator.py b/src/mednet/engine/evaluator.py
index 64bbd76e11fabe603ab2a05ffd8adbe271b78371..bfd32cf94b5fb10a9fec000bc78711509211c155 100644
--- a/src/mednet/engine/evaluator.py
+++ b/src/mednet/engine/evaluator.py
@@ -5,6 +5,7 @@
 
 import contextlib
 import itertools
+import json
 import logging
 import typing
 
@@ -113,24 +114,25 @@ def maxf1_threshold(predictions: Iterable[BinaryPrediction]) -> float:
     return maxf1_threshold
 
 
-def _score_plot(
-    labels: numpy.typing.NDArray,
-    scores: numpy.typing.NDArray,
+def score_plot(
+    histograms: dict[str, dict[str, numpy.typing.NDArray]],
     title: str,
-    threshold: float,
+    threshold: float | None,
 ) -> matplotlib.figure.Figure:
     """Plot the normalized score distributions for all systems.
 
     Parameters
     ----------
-    labels
-        True labels (negatives and positives) for each entry in ``scores``.
-    scores
-        Likelihoods provided by the classification model, for each sample.
+    histograms
+        A dictionary containing all histograms that should be inserted into the
+        plot.  Each histogram should itself be setup as another dictionary
+        containing the keys ``hist`` and ``bin_edges`` as returned by
+        :py:func:`numpy.histogram`.
     title
         Title of the plot.
     threshold
-        Shows where the threshold is in the figure.
+        Shows where the threshold is in the figure.  If set to ``None``, then
+        does not show the threshold line.
 
     Returns
     -------
@@ -138,43 +140,55 @@ def _score_plot(
         A single (matplotlib) plot containing the score distribution, ready to
         be saved to disk or displayed.
     """
+    from matplotlib.ticker import MaxNLocator
 
     fig, ax = plt.subplots(1, 1)
     assert isinstance(fig, matplotlib.figure.Figure)
     ax = typing.cast(plt.Axes, ax)  # gets editor to behave
 
     # Here, we configure the "style" of our plot
-    ax.set_xlim([0, 1])
+    ax.set_xlim((0, 1))
     ax.set_title(title)
     ax.set_xlabel("Score")
-    ax.set_ylabel("Normalized count")
-    ax.grid(linestyle="--", linewidth=1, color="gray", alpha=0.2)
+    ax.set_ylabel("Count")
 
     # Only show ticks on the left and bottom spines
     ax.spines.right.set_visible(False)
     ax.spines.top.set_visible(False)
     ax.get_xaxis().tick_bottom()
     ax.get_yaxis().tick_left()
+    ax.get_yaxis().set_major_locator(MaxNLocator(integer=True))
 
-    positives = scores[labels > 0.5]
-    negatives = scores[labels < 0.5]
-
-    ax.hist(positives, bins="auto", label="positives", density=True, alpha=0.7)
-    ax.hist(negatives, bins="auto", label="negatives", density=True, alpha=0.7)
-
-    # Adds threshold line (dotted red)
-    ax.axvline(
-        threshold,  # type: ignore
-        color="red",
-        lw=2,
-        alpha=0.75,
-        ls="dotted",
-        label="threshold",
-    )
+    # Setup the grid
+    ax.grid(linestyle="--", linewidth=1, color="gray", alpha=0.2)
+    ax.get_xaxis().grid(False)
+
+    max_hist = 0
+    for name in histograms.keys():
+        hist = histograms[name]["hist"]
+        bin_edges = histograms[name]["bin_edges"]
+        width = 0.7 * (bin_edges[1] - bin_edges[0])
+        center = (bin_edges[:-1] + bin_edges[1:]) / 2
+        ax.bar(center, hist, align="center", width=width, label=name, alpha=0.7)
+        max_hist = max(max_hist, hist.max())
+
+    # Detach axes from the plot
+    ax.spines["left"].set_position(("data", -0.015))
+    ax.spines["bottom"].set_position(("data", -0.015 * max_hist))
+
+    if threshold is not None:
+        # Adds threshold line (dotted red)
+        ax.axvline(
+            threshold,  # type: ignore
+            color="red",
+            lw=2,
+            alpha=0.75,
+            ls="dotted",
+            label="threshold",
+        )
 
     # Adds a nice legend
     ax.legend(
-        title="Max F1-scores",
         fancybox=True,
         framealpha=0.7,
     )
@@ -188,12 +202,9 @@ def _score_plot(
 def run_binary(
     name: str,
     predictions: Iterable[BinaryPrediction],
+    binning: str | int,
     threshold_a_priori: float | None = None,
-) -> tuple[
-    dict[str, typing.Any],
-    dict[str, matplotlib.figure.Figure],
-    dict[str, typing.Any],
-]:
+) -> dict[str, typing.Any]:
     """Run inference and calculates measures for binary classification.
 
     Parameters
@@ -202,6 +213,10 @@ def run_binary(
         The name of subset to load.
     predictions
         A list of predictions to consider for measurement.
+    binning
+        The binning algorithm to use for computing the bin widths and
+        distribution for histograms.  Choose from algorithms supported by
+        :py:func:`numpy.histogram`.
     threshold_a_priori
         A threshold to use, evaluated *a priori*, if must report single values.
         If this value is not provided, an *a posteriori* threshold is calculated
@@ -209,17 +224,13 @@ def run_binary(
 
     Returns
     -------
-    tuple[
-    dict[str, typing.Any],
-    dict[str, matplotlib.figure.Figure],
-    dict[str, typing.Any]]
+    dict[str, typing.Any]
         A tuple containing the following entries:
 
         * summary: A dictionary containing the performance summary on the
-          specified threshold.
-        * figures: A dictionary of generated standalone figures.
-        * curves: A dictionary containing curves that can potentially be combined
-          with other prediction lists to make aggregate plots.
+          specified threshold, general performance curves (under the key
+          ``curves``), and score histograms (under the key
+          ``score-histograms``).
     """
 
     y_scores = numpy.array([k[2] for k in predictions])  # likelihoods
@@ -253,40 +264,66 @@ def run_binary(
             y_labels, y_predictions, pos_label=pos_label
         ),
         average_precision_score=sklearn.metrics.average_precision_score(
-            y_labels, y_predictions, pos_label=pos_label
+            y_labels, y_scores, pos_label=pos_label
         ),
         specificity=sklearn.metrics.recall_score(
             y_labels, y_predictions, pos_label=neg_label
         ),
         auc_score=sklearn.metrics.roc_auc_score(
             y_labels,
-            y_predictions,
+            y_scores,
         ),
         accuracy=sklearn.metrics.accuracy_score(y_labels, y_predictions),
     )
 
-    # figures: score distributions
-    figures = dict(
-        scores=_score_plot(
-            y_labels,
-            y_scores,
-            f"Score distribution (split: {name})",
-            use_threshold,
+    # curves: ROC and precision recall
+    summary["curves"] = dict(
+        roc=dict(
+            zip(
+                ("fpr", "tpr", "thresholds"),
+                sklearn.metrics.roc_curve(
+                    y_labels, y_scores, pos_label=pos_label
+                ),
+            )
+        ),
+        precision_recall=dict(
+            zip(
+                ("precision", "recall", "thresholds"),
+                sklearn.metrics.precision_recall_curve(
+                    y_labels, y_scores, pos_label=pos_label
+                ),
+            ),
         ),
     )
 
-    # curves: ROC and precision recall
-    curves = dict(
-        roc=sklearn.metrics.roc_curve(y_labels, y_scores, pos_label=pos_label),
-        precision_recall=sklearn.metrics.precision_recall_curve(
-            y_labels, y_scores, pos_label=pos_label
+    # score histograms
+    # what works: <integer>, doane*, scott, stone, rice*, sturges*, sqrt
+    # what does not work: auto, fd
+    summary["score-histograms"] = dict(
+        positives=dict(
+            zip(
+                ("hist", "bin_edges"),
+                numpy.histogram(
+                    y_scores[y_labels == pos_label], bins=binning, range=(0, 1)
+                ),
+            )
+        ),
+        negatives=dict(
+            zip(
+                ("hist", "bin_edges"),
+                numpy.histogram(
+                    y_scores[y_labels == neg_label],
+                    bins=binning,
+                    range=(0, 1),
+                ),
+            )
         ),
     )
 
-    return summary, figures, curves
+    return summary
 
 
-def aggregate_summaries(
+def tabulate_results(
     data: typing.Mapping[str, typing.Mapping[str, typing.Any]], fmt: str
 ) -> str:
     """Tabulate summaries from multiple splits.
@@ -379,14 +416,16 @@ def aggregate_roc(
 
     legend = []
 
-    for name, (fpr, tpr, _) in data.items():
+    for name, elements in data.items():
         # plots roc curve
-        _auc = sklearn.metrics.auc(fpr, tpr)
+        _auc = sklearn.metrics.auc(elements["fpr"], elements["tpr"])
         label = f"{name} (AUC={_auc:.2f})"
         color = next(colorcycler)
         style = next(linecycler)
 
-        (line,) = ax.plot(fpr, tpr, color=color, linestyle=style)
+        (line,) = ax.plot(
+            elements["fpr"], elements["tpr"], color=color, linestyle=style
+        )
         legend.append((line, label))
 
     if len(legend) > 1:
@@ -516,13 +555,20 @@ def aggregate_pr(
         axes.set_title(title)
         legend = []
 
-        for name, (prec, recall, _) in data.items():
-            _ap = credible.curves.average_metric([prec, recall])
+        for name, elements in data.items():
+            _ap = credible.curves.average_metric(
+                (elements["precision"], elements["recall"])
+            )
             label = f"{name} (AP={_ap:.2f})"
             color = next(colorcycler)
             style = next(linecycler)
 
-            (line,) = axes.plot(recall, prec, color=color, linestyle=style)
+            (line,) = axes.plot(
+                elements["recall"],
+                elements["precision"],
+                color=color,
+                linestyle=style,
+            )
             legend.append((line, label))
 
         if len(legend) > 1:
@@ -535,3 +581,31 @@ def aggregate_pr(
             )
 
     return fig
+
+
+class NumpyJSONEncoder(json.JSONEncoder):
+    """Extends the standard JSON encoder to support Numpy arrays."""
+
+    def default(self, o: typing.Any) -> typing.Any:
+        """If input object is a ndarray it will be converted into a list.
+
+        Parameters
+        ----------
+        o
+            Input object to be JSON serialized.
+
+        Returns
+        -------
+            A serializable representation of object ``o``.
+        """
+
+        if isinstance(o, numpy.ndarray):
+            try:
+                retval = o.tolist()
+            except TypeError:
+                pass
+            else:
+                return retval
+
+        # Let the base class default method raise the TypeError
+        return super().default(o)
diff --git a/src/mednet/scripts/evaluate.py b/src/mednet/scripts/evaluate.py
index 49f8ad4e89e3f6cd41c69550e31a58c663dc5afb..f37a6d26a7bd8f74d742b85d63d74fbb9474bf1a 100644
--- a/src/mednet/scripts/evaluate.py
+++ b/src/mednet/scripts/evaluate.py
@@ -70,16 +70,41 @@ logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s")
     the highest F1-score on that set) and then applied to the subsequent
     sets.  This value is not used for multi-class classification tasks.""",
     default=0.5,
-    show_default=False,
+    show_default=True,
     required=True,
     type=click.STRING,
     cls=ResourceOption,
 )
+@click.option(
+    "--binning",
+    "-b",
+    help="""The binning algorithm to use for computing the bin widths and
+    distribution for histograms.  Choose from algorithms supported by
+    :py:func:`numpy.histogram`, or a simple integer indicating the number of
+    bins to have in the interval ``[0, 1]``.""",
+    default="50",
+    show_default=True,
+    required=True,
+    type=click.STRING,
+    cls=ResourceOption,
+)
+@click.option(
+    "--plot/--no-plot",
+    "-P",
+    help="""If set, then also produces figures containing the plots of
+    performance curves and score histograms.""",
+    required=True,
+    show_default=True,
+    default=True,
+    cls=ResourceOption,
+)
 @verbosity_option(logger=logger, cls=ResourceOption, expose_value=False)
 def evaluate(
     predictions: pathlib.Path,
     output: pathlib.Path,
     threshold: str | float,
+    binning: str,
+    plot: bool,
     **_,  # ignored
 ) -> None:  # numpydoc ignore=PR01
     """Evaluate predictions (from a model) on a classification task."""
@@ -87,15 +112,15 @@ def evaluate(
     import json
     import typing
 
-    import matplotlib.figure
-
     from matplotlib.backends.backend_pdf import PdfPages
 
     from ..engine.evaluator import (
+        NumpyJSONEncoder,
         aggregate_pr,
         aggregate_roc,
-        aggregate_summaries,
         run_binary,
+        score_plot,
+        tabulate_results,
     )
     from .utils import execution_metadata, save_json_with_backup
 
@@ -126,29 +151,30 @@ def evaluate(
                 or can not be converted to a float. Check your input."""
             )
 
-    results: dict[
-        str,
-        tuple[
-            dict[str, typing.Any],
-            dict[str, matplotlib.figure.Figure],
-            dict[str, typing.Any],
-        ],
-    ] = dict()
+    results: dict[str, dict[str, typing.Any]] = dict()
     for k, v in predict_data.items():
         logger.info(f"Analyzing split `{k}`...")
         results[k] = run_binary(
             name=k,
             predictions=v,
+            binning=int(binning) if binning.isnumeric() else binning,
             threshold_a_priori=use_threshold,
         )
 
-    data = {k: v[0] for k, v in results.items()}
+    # records full result analysis to a JSON file
     logger.info(f"Saving evaluation results at `{output}`...")
     with output.open("w") as f:
-        json.dump(data, f, indent=2)
+        json.dump(results, f, indent=2, cls=NumpyJSONEncoder)
 
     # dump evaluation results in RST format to screen and file
-    table = aggregate_summaries(data, fmt="rst")
+    table_data = {}
+    for k, v in results.items():
+        table_data[k] = {
+            kk: vv
+            for kk, vv in v.items()
+            if kk not in ("curves", "score-histograms")
+        }
+    table = tabulate_results(table_data, fmt="rst")
     click.echo(table)
 
     table_path = output.with_suffix(".rst")
@@ -162,20 +188,23 @@ def evaluate(
     figure_path = output.with_suffix(".pdf")
     logger.info(f"Saving evaluation figures at `{figure_path}`...")
 
-    with PdfPages(figure_path) as pdf:
-        pr_curves = {k: v[2]["precision_recall"] for k, v in results.items()}
-        pr_fig = aggregate_pr(pr_curves)
-        pdf.savefig(pr_fig)
-
-        roc_curves = {k: v[2]["roc"] for k, v in results.items()}
-        roc_fig = aggregate_roc(roc_curves)
-        pdf.savefig(roc_fig)
-
-        # order ready-to-save figures by type instead of split
-        figures = {k: v[1] for k, v in results.items()}
-        keys = next(iter(figures.values())).keys()
-        figures_by_type = {k: [v[k] for v in figures.values()] for k in keys}
-
-        for group_figures in figures_by_type.values():
-            for f in group_figures:
-                pdf.savefig(f)
+    if plot:
+        with PdfPages(figure_path) as pdf:
+            pr_curves = {
+                k: v["curves"]["precision_recall"] for k, v in results.items()
+            }
+            pr_fig = aggregate_pr(pr_curves)
+            pdf.savefig(pr_fig)
+
+            roc_curves = {k: v["curves"]["roc"] for k, v in results.items()}
+            roc_fig = aggregate_roc(roc_curves)
+            pdf.savefig(roc_fig)
+
+            # score plots
+            for k, v in results.items():
+                score_fig = score_plot(
+                    v["score-histograms"],
+                    f"Score distribution (split: {k})",
+                    v["threshold"],
+                )
+                pdf.savefig(score_fig)
diff --git a/tests/test_evaluator.py b/tests/test_evaluator.py
index 1fc4ae635cf426bed9ededb502530ed0d2acb6c0..8a557278b2abc552cd095eec505fcab146814897 100644
--- a/tests/test_evaluator.py
+++ b/tests/test_evaluator.py
@@ -26,3 +26,109 @@ def test_centered_maxf1():
 
     assert maxf1 == 1.0
     assert threshold == 0.4
+
+
+def test_run_binary_1():
+    from mednet.engine.evaluator import run_binary
+    from mednet.models.typing import BinaryPrediction
+
+    predictions: list[BinaryPrediction] = [
+        # (name, target, predicted-value)
+        ("s0", 0, 0.1),
+        ("s2", 0, 0.8),
+        ("s3", 1, 0.9),
+        ("s3", 1, 0.4),
+    ]
+
+    results = run_binary(
+        "test", predictions, binning=10, threshold_a_priori=0.5
+    )
+
+    assert results["num_samples"] == 4
+    assert numpy.isclose(results["threshold"], 0.5)
+    assert not results["threshold_a_posteriori"]
+    assert numpy.isclose(results["precision"], 1 / 2)  # tp / (tp + fp)
+    assert numpy.isclose(results["recall"], 1 / 2)  # tp / (tp + fn)
+    assert numpy.isclose(
+        results["f1_score"], 2 * (1 / 2 * 1 / 2) / (1 / 2 + 1 / 2)
+    )  # 2 * (prec. * recall) / (prec. + recall)
+    assert numpy.isclose(
+        results["accuracy"], (1 + 1) / (1 + 1 + 1 + 1)
+    )  # (tp + tn) / (tp + fn + tn + fp)
+    assert numpy.isclose(results["specificity"], 1 / 2)  # tn / (tn + fp)
+
+    # threshold table:
+    # threshold |  TNR  | 1-TNR |  TPR
+    # ----------+-------+-------+---------
+    #  < 0.1    |  0    |  1    |  1
+    #    0.1    |  0.5  |  0.5  |  1
+    #    0.4    |  0.5  |  0.5  |  0.5
+    #    0.8    |  1    |  0    |  0.5
+    #    0.9    |  1    |  0    |  0
+    #  > 0.9    |  1    |  0    |  0
+    assert numpy.isclose(results["auc_score"], 0.75)
+
+    # threshold table:
+    # threshold |  Prec.  |  Recall
+    # ----------+---------+----------
+    #  < 0.1    |  0.5    |  1
+    #    0.1    |  2/3    |  1
+    #    0.4    |  0.5    |  0.5
+    #    0.8    |  1      |  0.5
+    #    0.9    |  0      |  0
+    #  > 0.9    |  0      |  0
+    assert numpy.isclose(results["average_precision_score"], 0.8333333)
+
+
+def test_run_binary_2():
+    from mednet.engine.evaluator import run_binary
+    from mednet.models.typing import BinaryPrediction
+
+    predictions: list[BinaryPrediction] = [
+        # (name, target, predicted-value)
+        ("s0", 0, 0.1),
+        ("s2", 0, 0.8),
+        ("s3", 1, 0.9),
+        ("s3", 1, 0.4),
+    ]
+
+    # a change in the threshold should not affect auc and average precision scores
+    results = run_binary(
+        "test", predictions, binning=10, threshold_a_priori=0.3
+    )
+
+    assert results["num_samples"] == 4
+    assert numpy.isclose(results["threshold"], 0.3)
+    assert not results["threshold_a_posteriori"]
+
+    assert numpy.isclose(results["precision"], 2 / 3)  # tp / (tp + fp)
+    assert numpy.isclose(results["recall"], 2 / 2)  # tp / (tp + fn)
+    assert numpy.isclose(
+        results["f1_score"], 2 * (2 / 3 * 2 / 2) / (2 / 3 + 2 / 2)
+    )  # 2 * (prec. * recall) / (prec. + recall)
+    assert numpy.isclose(
+        results["accuracy"], (2 + 1) / (2 + 0 + 1 + 1)
+    )  # (tp + tn) / (tp + fn + tn + fp)
+    assert numpy.isclose(results["specificity"], 1 / (1 + 1))  # tn / (tn + fp)
+
+    # threshold table:
+    # threshold |  TNR  | 1-TNR |  TPR
+    # ----------+-------+-------+---------
+    #  < 0.1    |  0    |  1    |  1
+    #    0.1    |  0.5  |  0.5  |  1
+    #    0.4    |  0.5  |  0.5  |  0.5
+    #    0.8    |  1    |  0    |  0.5
+    #    0.9    |  1    |  0    |  0
+    #  > 0.9    |  1    |  0    |  0
+    assert numpy.isclose(results["auc_score"], 0.75)
+
+    # threshold table:
+    # threshold |  Prec.  |  Recall
+    # ----------+---------+----------
+    #  < 0.1    |  0.5    |  1
+    #    0.1    |  2/3    |  1
+    #    0.4    |  0.5    |  0.5
+    #    0.8    |  1      |  0.5
+    #    0.9    |  0      |  0
+    #  > 0.9    |  0      |  0
+    assert numpy.isclose(results["average_precision_score"], 0.8333333)