diff --git a/pyproject.toml b/pyproject.toml index 3241d527546436d0feeeefa31bc4d8ed5a48495c..a114328012d653f01d6815f4e0396aa9c65a1df9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -77,88 +77,41 @@ alexnet_pre = "ptbench.configs.models.alexnet_pretrained" densenet = "ptbench.configs.models.densenet" densenet_pre = "ptbench.configs.models.densenet_pretrained" # montgomery dataset (and cross-validation folds) -montgomery = "ptbench.data.montgomery.default:datamodule" -montgomery_f0 = "ptbench.data.montgomery.fold_0:datamodule" -montgomery_f1 = "ptbench.data.montgomery.fold_1:datamodule" -montgomery_f2 = "ptbench.data.montgomery.fold_2:datamodule" -montgomery_f3 = "ptbench.data.montgomery.fold_3:datamodule" -montgomery_f4 = "ptbench.data.montgomery.fold_4:datamodule" -montgomery_f5 = "ptbench.data.montgomery.fold_5:datamodule" -montgomery_f6 = "ptbench.data.montgomery.fold_6:datamodule" -montgomery_f7 = "ptbench.data.montgomery.fold_7:datamodule" -montgomery_f8 = "ptbench.data.montgomery.fold_8:datamodule" -montgomery_f9 = "ptbench.data.montgomery.fold_9:datamodule" -# extended montgomery dataset (with radiological signs) -montgomery_rs = "ptbench.configs.datasets.montgomery_RS.default" -montgomery_rs_f0 = "ptbench.configs.datasets.montgomery_RS.fold_0" -montgomery_rs_f1 = "ptbench.configs.datasets.montgomery_RS.fold_1" -montgomery_rs_f2 = "ptbench.configs.datasets.montgomery_RS.fold_2" -montgomery_rs_f3 = "ptbench.configs.datasets.montgomery_RS.fold_3" -montgomery_rs_f4 = "ptbench.configs.datasets.montgomery_RS.fold_4" -montgomery_rs_f5 = "ptbench.configs.datasets.montgomery_RS.fold_5" -montgomery_rs_f6 = "ptbench.configs.datasets.montgomery_RS.fold_6" -montgomery_rs_f7 = "ptbench.configs.datasets.montgomery_RS.fold_7" -montgomery_rs_f8 = "ptbench.configs.datasets.montgomery_RS.fold_8" -montgomery_rs_f9 = "ptbench.configs.datasets.montgomery_RS.fold_9" +montgomery = "ptbench.data.montgomery.datamodules:default" +montgomery_f0 = "ptbench.data.montgomery.datamodules:fold_0" +montgomery_f1 = "ptbench.data.montgomery.datamodules:fold_1" +montgomery_f2 = "ptbench.data.montgomery.datamodules:fold_2" +montgomery_f3 = "ptbench.data.montgomery.datamodules:fold_3" +montgomery_f4 = "ptbench.data.montgomery.datamodules:fold_4" +montgomery_f5 = "ptbench.data.montgomery.datamodules:fold_5" +montgomery_f6 = "ptbench.data.montgomery.datamodules:fold_6" +montgomery_f7 = "ptbench.data.montgomery.datamodules:fold_7" +montgomery_f8 = "ptbench.data.montgomery.datamodules:fold_8" +montgomery_f9 = "ptbench.data.montgomery.datamodules:fold_9" # shenzhen dataset (and cross-validation folds) -shenzhen = "ptbench.data.shenzhen.default:datamodule" -shenzhen_f0 = "ptbench.data.shenzhen.fold_0:datamodule" -shenzhen_f1 = "ptbench.data.shenzhen.fold_1:datamodule" -shenzhen_f2 = "ptbench.data.shenzhen.fold_2:datamodule" -shenzhen_f3 = "ptbench.data.shenzhen.fold_3:datamodule" -shenzhen_f4 = "ptbench.data.shenzhen.fold_4:datamodule" -shenzhen_f5 = "ptbench.data.shenzhen.fold_5:datamodule" -shenzhen_f6 = "ptbench.data.shenzhen.fold_6:datamodule" -shenzhen_f7 = "ptbench.data.shenzhen.fold_7:datamodule" -shenzhen_f8 = "ptbench.data.shenzhen.fold_8:datamodule" -shenzhen_f9 = "ptbench.data.shenzhen.fold_9:datamodule" -# extended shenzhen dataset (with radiological signs) -shenzhen_rs = "ptbench.data.shenzhen_RS.default" -shenzhen_rs_f0 = "ptbench.configs.datasets.shenzhen_RS.fold_0" -shenzhen_rs_f1 = "ptbench.configs.datasets.shenzhen_RS.fold_1" -shenzhen_rs_f2 = "ptbench.configs.datasets.shenzhen_RS.fold_2" -shenzhen_rs_f3 = "ptbench.configs.datasets.shenzhen_RS.fold_3" -shenzhen_rs_f4 = "ptbench.configs.datasets.shenzhen_RS.fold_4" -shenzhen_rs_f5 = "ptbench.configs.datasets.shenzhen_RS.fold_5" -shenzhen_rs_f6 = "ptbench.configs.datasets.shenzhen_RS.fold_6" -shenzhen_rs_f7 = "ptbench.configs.datasets.shenzhen_RS.fold_7" -shenzhen_rs_f8 = "ptbench.configs.datasets.shenzhen_RS.fold_8" -shenzhen_rs_f9 = "ptbench.configs.datasets.shenzhen_RS.fold_9" +shenzhen = "ptbench.data.shenzhen.datamodules:default" +shenzhen_f0 = "ptbench.data.shenzhen.datamodules:fold_0" +shenzhen_f1 = "ptbench.data.shenzhen.datamodules:fold_1" +shenzhen_f2 = "ptbench.data.shenzhen.datamodules:fold_2" +shenzhen_f3 = "ptbench.data.shenzhen.datamodules:fold_3" +shenzhen_f4 = "ptbench.data.shenzhen.datamodules:fold_4" +shenzhen_f5 = "ptbench.data.shenzhen.datamodules:fold_5" +shenzhen_f6 = "ptbench.data.shenzhen.datamodules:fold_6" +shenzhen_f7 = "ptbench.data.shenzhen.datamodules:fold_7" +shenzhen_f8 = "ptbench.data.shenzhen.datamodules:fold_8" +shenzhen_f9 = "ptbench.data.shenzhen.datamodules:fold_9" # indian dataset (and cross-validation folds) -indian = "ptbench.data.indian.default" -indian_rgb = "ptbench.data.indian.rgb" -indian_f0 = "ptbench.data.indian.fold_0" -indian_f1 = "ptbench.data.indian.fold_1" -indian_f2 = "ptbench.data.indian.fold_2" -indian_f3 = "ptbench.data.indian.fold_3" -indian_f4 = "ptbench.data.indian.fold_4" -indian_f5 = "ptbench.data.indian.fold_5" -indian_f6 = "ptbench.data.indian.fold_6" -indian_f7 = "ptbench.data.indian.fold_7" -indian_f8 = "ptbench.data.indian.fold_8" -indian_f9 = "ptbench.data.indian.fold_9" -indian_f0_rgb = "ptbench.data.indian.fold_0_rgb" -indian_f1_rgb = "ptbench.data.indian.fold_1_rgb" -indian_f2_rgb = "ptbench.data.indian.fold_2_rgb" -indian_f3_rgb = "ptbench.data.indian.fold_3_rgb" -indian_f4_rgb = "ptbench.data.indian.fold_4_rgb" -indian_f5_rgb = "ptbench.data.indian.fold_5_rgb" -indian_f6_rgb = "ptbench.data.indian.fold_6_rgb" -indian_f7_rgb = "ptbench.data.indian.fold_7_rgb" -indian_f8_rgb = "ptbench.data.indian.fold_8_rgb" -indian_f9_rgb = "ptbench.data.indian.fold_9_rgb" -# extended indian dataset (with radiological signs) -indian_rs = "ptbench.configs.datasets.indian_RS.default" -indian_rs_f0 = "ptbench.configs.datasets.indian_RS.fold_0" -indian_rs_f1 = "ptbench.configs.datasets.indian_RS.fold_1" -indian_rs_f2 = "ptbench.configs.datasets.indian_RS.fold_2" -indian_rs_f3 = "ptbench.configs.datasets.indian_RS.fold_3" -indian_rs_f4 = "ptbench.configs.datasets.indian_RS.fold_4" -indian_rs_f5 = "ptbench.configs.datasets.indian_RS.fold_5" -indian_rs_f6 = "ptbench.configs.datasets.indian_RS.fold_6" -indian_rs_f7 = "ptbench.configs.datasets.indian_RS.fold_7" -indian_rs_f8 = "ptbench.configs.datasets.indian_RS.fold_8" -indian_rs_f9 = "ptbench.configs.datasets.indian_RS.fold_9" +indian = "ptbench.data.indian.datamodules:default" +indian_f0 = "ptbench.data.indian.datamodules:fold_0" +indian_f1 = "ptbench.data.indian.datamodules:fold_1" +indian_f2 = "ptbench.data.indian.datamodules:fold_2" +indian_f3 = "ptbench.data.indian.datamodules:fold_3" +indian_f4 = "ptbench.data.indian.datamodules:fold_4" +indian_f5 = "ptbench.data.indian.datamodules:fold_5" +indian_f6 = "ptbench.data.indian.datamodules:fold_6" +indian_f7 = "ptbench.data.indian.datamodules:fold_7" +indian_f8 = "ptbench.data.indian.datamodules:fold_8" +indian_f9 = "ptbench.data.indian.datamodules:fold_9" # TBX11K simplified dataset split 1 (and cross-validation folds) tbx11k_simplified = "ptbench.data.tbx11k_simplified.default" tbx11k_simplified_rgb = "ptbench.data.tbx11k_simplified.rgb" @@ -182,18 +135,6 @@ tbx11k_simplified_f6_rgb = "ptbench.data.tbx11k_simplified.fold_6_rgb" tbx11k_simplified_f7_rgb = "ptbench.data.tbx11k_simplified.fold_7_rgb" tbx11k_simplified_f8_rgb = "ptbench.data.tbx11k_simplified.fold_8_rgb" tbx11k_simplified_f9_rgb = "ptbench.data.tbx11k_simplified.fold_9_rgb" -# extended TBX11K simplified dataset split 1 (with radiological signs) -tbx11k_simplified_rs = "ptbench.configs.datasets.tbx11k_simplified_RS.default" -tbx11k_simplified_rs_f0 = "ptbench.configs.datasets.tbx11k_simplified_RS.fold_0" -tbx11k_simplified_rs_f1 = "ptbench.configs.datasets.tbx11k_simplified_RS.fold_1" -tbx11k_simplified_rs_f2 = "ptbench.configs.datasets.tbx11k_simplified_RS.fold_2" -tbx11k_simplified_rs_f3 = "ptbench.configs.datasets.tbx11k_simplified_RS.fold_3" -tbx11k_simplified_rs_f4 = "ptbench.configs.datasets.tbx11k_simplified_RS.fold_4" -tbx11k_simplified_rs_f5 = "ptbench.configs.datasets.tbx11k_simplified_RS.fold_5" -tbx11k_simplified_rs_f6 = "ptbench.configs.datasets.tbx11k_simplified_RS.fold_6" -tbx11k_simplified_rs_f7 = "ptbench.configs.datasets.tbx11k_simplified_RS.fold_7" -tbx11k_simplified_rs_f8 = "ptbench.configs.datasets.tbx11k_simplified_RS.fold_8" -tbx11k_simplified_rs_f9 = "ptbench.configs.datasets.tbx11k_simplified_RS.fold_9" # TBX11K simplified dataset split 2 (and cross-validation folds) tbx11k_simplified_v2 = "ptbench.data.tbx11k_simplified_v2.default" tbx11k_simplified_v2_rgb = "ptbench.data.tbx11k_simplified_v2.rgb" @@ -217,18 +158,6 @@ tbx11k_simplified_v2_f6_rgb = "ptbench.data.tbx11k_simplified_v2.fold_6_rgb" tbx11k_simplified_v2_f7_rgb = "ptbench.data.tbx11k_simplified_v2.fold_7_rgb" tbx11k_simplified_v2_f8_rgb = "ptbench.data.tbx11k_simplified_v2.fold_8_rgb" tbx11k_simplified_v2_f9_rgb = "ptbench.data.tbx11k_simplified_v2.fold_9_rgb" -# extended TBX11K simplified dataset split 2 (with radiological signs) -tbx11k_simplified_v2_rs = "ptbench.configs.datasets.tbx11k_simplified_v2_RS.default" -tbx11k_simplified_v2_rs_f0 = "ptbench.configs.datasets.tbx11k_simplified_v2_RS.fold_0" -tbx11k_simplified_v2_rs_f1 = "ptbench.configs.datasets.tbx11k_simplified_v2_RS.fold_1" -tbx11k_simplified_v2_rs_f2 = "ptbench.configs.datasets.tbx11k_simplified_v2_RS.fold_2" -tbx11k_simplified_v2_rs_f3 = "ptbench.configs.datasets.tbx11k_simplified_v2_RS.fold_3" -tbx11k_simplified_v2_rs_f4 = "ptbench.configs.datasets.tbx11k_simplified_v2_RS.fold_4" -tbx11k_simplified_v2_rs_f5 = "ptbench.configs.datasets.tbx11k_simplified_v2_RS.fold_5" -tbx11k_simplified_v2_rs_f6 = "ptbench.configs.datasets.tbx11k_simplified_v2_RS.fold_6" -tbx11k_simplified_v2_rs_f7 = "ptbench.configs.datasets.tbx11k_simplified_v2_RS.fold_7" -tbx11k_simplified_v2_rs_f8 = "ptbench.configs.datasets.tbx11k_simplified_v2_RS.fold_8" -tbx11k_simplified_v2_rs_f9 = "ptbench.configs.datasets.tbx11k_simplified_v2_RS.fold_9" # montgomery-shenzhen aggregated dataset mc_ch = "ptbench.data.mc_ch.default" mc_ch_rgb = "ptbench.data.mc_ch.rgb" @@ -252,19 +181,6 @@ mc_ch_f6_rgb = "ptbench.data.mc_ch.fold_6_rgb" mc_ch_f7_rgb = "ptbench.data.mc_ch.fold_7_rgb" mc_ch_f8_rgb = "ptbench.data.mc_ch.fold_8_rgb" mc_ch_f9_rgb = "ptbench.data.mc_ch.fold_9_rgb" -# extended montgomery-shenzhen aggregated dataset -# (with radiological signs) -mc_ch_rs = "ptbench.configs.datasets.mc_ch_RS.default" -mc_ch_rs_f0 = "ptbench.configs.datasets.mc_ch_RS.fold_0" -mc_ch_rs_f1 = "ptbench.configs.datasets.mc_ch_RS.fold_1" -mc_ch_rs_f2 = "ptbench.configs.datasets.mc_ch_RS.fold_2" -mc_ch_rs_f3 = "ptbench.configs.datasets.mc_ch_RS.fold_3" -mc_ch_rs_f4 = "ptbench.configs.datasets.mc_ch_RS.fold_4" -mc_ch_rs_f5 = "ptbench.configs.datasets.mc_ch_RS.fold_5" -mc_ch_rs_f6 = "ptbench.configs.datasets.mc_ch_RS.fold_6" -mc_ch_rs_f7 = "ptbench.configs.datasets.mc_ch_RS.fold_7" -mc_ch_rs_f8 = "ptbench.configs.datasets.mc_ch_RS.fold_8" -mc_ch_rs_f9 = "ptbench.configs.datasets.mc_ch_RS.fold_9" # montgomery-shenzhen-indian aggregated dataset mc_ch_in = "ptbench.data.mc_ch_in.default" mc_ch_in_rgb = "ptbench.data.mc_ch_in.rgb" @@ -288,19 +204,6 @@ mc_ch_in_f6_rgb = "ptbench.data.mc_ch_in.fold_6_rgb" mc_ch_in_f7_rgb = "ptbench.data.mc_ch_in.fold_7_rgb" mc_ch_in_f8_rgb = "ptbench.data.mc_ch_in.fold_8_rgb" mc_ch_in_f9_rgb = "ptbench.data.mc_ch_in.fold_9_rgb" -# extended montgomery-shenzhen-indian aggregated dataset -# (with radiological signs) -mc_ch_in_rs = "ptbench.configs.datasets.mc_ch_in_RS.default" -mc_ch_in_rs_f0 = "ptbench.configs.datasets.mc_ch_in_RS.fold_0" -mc_ch_in_rs_f1 = "ptbench.configs.datasets.mc_ch_in_RS.fold_1" -mc_ch_in_rs_f2 = "ptbench.configs.datasets.mc_ch_in_RS.fold_2" -mc_ch_in_rs_f3 = "ptbench.configs.datasets.mc_ch_in_RS.fold_3" -mc_ch_in_rs_f4 = "ptbench.configs.datasets.mc_ch_in_RS.fold_4" -mc_ch_in_rs_f5 = "ptbench.configs.datasets.mc_ch_in_RS.fold_5" -mc_ch_in_rs_f6 = "ptbench.configs.datasets.mc_ch_in_RS.fold_6" -mc_ch_in_rs_f7 = "ptbench.configs.datasets.mc_ch_in_RS.fold_7" -mc_ch_in_rs_f8 = "ptbench.configs.datasets.mc_ch_in_RS.fold_8" -mc_ch_in_rs_f9 = "ptbench.configs.datasets.mc_ch_in_RS.fold_9" # montgomery-shenzhen-indian-tbx11k aggregated dataset mc_ch_in_11k = "ptbench.data.mc_ch_in_11k.default" mc_ch_in_11k_rgb = "ptbench.data.mc_ch_in_11k.rgb" @@ -324,19 +227,6 @@ mc_ch_in_11k_f6_rgb = "ptbench.data.mc_ch_in_11k.fold_6_rgb" mc_ch_in_11k_f7_rgb = "ptbench.data.mc_ch_in_11k.fold_7_rgb" mc_ch_in_11k_f8_rgb = "ptbench.data.mc_ch_in_11k.fold_8_rgb" mc_ch_in_11k_f9_rgb = "ptbench.data.mc_ch_in_11k.fold_9_rgb" -# extended montgomery-shenzhen-indian-tbx11k aggregated dataset -# (with radiological signs) -mc_ch_in_11k_rs = "ptbench.configs.datasets.mc_ch_in_11k_RS.default" -mc_ch_in_11k_rs_f0 = "ptbench.configs.datasets.mc_ch_in_11k_RS.fold_0" -mc_ch_in_11k_rs_f1 = "ptbench.configs.datasets.mc_ch_in_11k_RS.fold_1" -mc_ch_in_11k_rs_f2 = "ptbench.configs.datasets.mc_ch_in_11k_RS.fold_2" -mc_ch_in_11k_rs_f3 = "ptbench.configs.datasets.mc_ch_in_11k_RS.fold_3" -mc_ch_in_11k_rs_f4 = "ptbench.configs.datasets.mc_ch_in_11k_RS.fold_4" -mc_ch_in_11k_rs_f5 = "ptbench.configs.datasets.mc_ch_in_11k_RS.fold_5" -mc_ch_in_11k_rs_f6 = "ptbench.configs.datasets.mc_ch_in_11k_RS.fold_6" -mc_ch_in_11k_rs_f7 = "ptbench.configs.datasets.mc_ch_in_11k_RS.fold_7" -mc_ch_in_11k_rs_f8 = "ptbench.configs.datasets.mc_ch_in_11k_RS.fold_8" -mc_ch_in_11k_rs_f9 = "ptbench.configs.datasets.mc_ch_in_11k_RS.fold_9" # montgomery-shenzhen-indian-tbx11kv2 aggregated dataset mc_ch_in_11kv2 = "ptbench.data.mc_ch_in_11kv2.default" mc_ch_in_11kv2_rgb = "ptbench.data.mc_ch_in_11kv2.rgb" @@ -360,19 +250,6 @@ mc_ch_in_11kv2_f6_rgb = "ptbench.data.mc_ch_in_11kv2.fold_6_rgb" mc_ch_in_11kv2_f7_rgb = "ptbench.data.mc_ch_in_11kv2.fold_7_rgb" mc_ch_in_11kv2_f8_rgb = "ptbench.data.mc_ch_in_11kv2.fold_8_rgb" mc_ch_in_11kv2_f9_rgb = "ptbench.data.mc_ch_in_11kv2.fold_9_rgb" -# extended montgomery-shenzhen-indian-tbx11kv2 aggregated dataset -# (with radiological signs) -mc_ch_in_11kv2_rs = "ptbench.configs.datasets.mc_ch_in_11kv2_RS.default" -mc_ch_in_11kv2_rs_f0 = "ptbench.configs.datasets.mc_ch_in_11kv2_RS.fold_0" -mc_ch_in_11kv2_rs_f1 = "ptbench.configs.datasets.mc_ch_in_11kv2_RS.fold_1" -mc_ch_in_11kv2_rs_f2 = "ptbench.configs.datasets.mc_ch_in_11kv2_RS.fold_2" -mc_ch_in_11kv2_rs_f3 = "ptbench.configs.datasets.mc_ch_in_11kv2_RS.fold_3" -mc_ch_in_11kv2_rs_f4 = "ptbench.configs.datasets.mc_ch_in_11kv2_RS.fold_4" -mc_ch_in_11kv2_rs_f5 = "ptbench.configs.datasets.mc_ch_in_11kv2_RS.fold_5" -mc_ch_in_11kv2_rs_f6 = "ptbench.configs.datasets.mc_ch_in_11kv2_RS.fold_6" -mc_ch_in_11kv2_rs_f7 = "ptbench.configs.datasets.mc_ch_in_11kv2_RS.fold_7" -mc_ch_in_11kv2_rs_f8 = "ptbench.configs.datasets.mc_ch_in_11kv2_RS.fold_8" -mc_ch_in_11kv2_rs_f9 = "ptbench.configs.datasets.mc_ch_in_11kv2_RS.fold_9" # tbpoc dataset (and cross-validation folds) tbpoc_f0 = "ptbench.data.tbpoc.fold_0" tbpoc_f1 = "ptbench.data.tbpoc.fold_1" @@ -394,17 +271,6 @@ tbpoc_f6_rgb = "ptbench.data.tbpoc.fold_6_rgb" tbpoc_f7_rgb = "ptbench.data.tbpoc.fold_7_rgb" tbpoc_f8_rgb = "ptbench.data.tbpoc.fold_8_rgb" tbpoc_f9_rgb = "ptbench.data.tbpoc.fold_9_rgb" -# extended tbpoc dataset (with radiological signs) -tbpoc_rs_f0 = "ptbench.configs.datasets.tbpoc_RS.fold_0" -tbpoc_rs_f1 = "ptbench.configs.datasets.tbpoc_RS.fold_1" -tbpoc_rs_f2 = "ptbench.configs.datasets.tbpoc_RS.fold_2" -tbpoc_rs_f3 = "ptbench.configs.datasets.tbpoc_RS.fold_3" -tbpoc_rs_f4 = "ptbench.configs.datasets.tbpoc_RS.fold_4" -tbpoc_rs_f5 = "ptbench.configs.datasets.tbpoc_RS.fold_5" -tbpoc_rs_f6 = "ptbench.configs.datasets.tbpoc_RS.fold_6" -tbpoc_rs_f7 = "ptbench.configs.datasets.tbpoc_RS.fold_7" -tbpoc_rs_f8 = "ptbench.configs.datasets.tbpoc_RS.fold_8" -tbpoc_rs_f9 = "ptbench.configs.datasets.tbpoc_RS.fold_9" # hivtb dataset (and cross-validation folds) hivtb_f0 = "ptbench.data.hivtb.fold_0" hivtb_f1 = "ptbench.data.hivtb.fold_1" @@ -426,23 +292,9 @@ hivtb_f6_rgb = "ptbench.data.hivtb.fold_6_rgb" hivtb_f7_rgb = "ptbench.data.hivtb.fold_7_rgb" hivtb_f8_rgb = "ptbench.data.hivtb.fold_8_rgb" hivtb_f9_rgb = "ptbench.data.hivtb.fold_9_rgb" -# extended hivtb dataset (with radiological signs) -hivtb_rs_f0 = "ptbench.configs.datasets.hivtb_RS.fold_0" -hivtb_rs_f1 = "ptbench.configs.datasets.hivtb_RS.fold_1" -hivtb_rs_f2 = "ptbench.configs.datasets.hivtb_RS.fold_2" -hivtb_rs_f3 = "ptbench.configs.datasets.hivtb_RS.fold_3" -hivtb_rs_f4 = "ptbench.configs.datasets.hivtb_RS.fold_4" -hivtb_rs_f5 = "ptbench.configs.datasets.hivtb_RS.fold_5" -hivtb_rs_f6 = "ptbench.configs.datasets.hivtb_RS.fold_6" -hivtb_rs_f7 = "ptbench.configs.datasets.hivtb_RS.fold_7" -hivtb_rs_f8 = "ptbench.configs.datasets.hivtb_RS.fold_8" -hivtb_rs_f9 = "ptbench.configs.datasets.hivtb_RS.fold_9" # montgomery-shenzhen-indian-padchest aggregated dataset mc_ch_in_pc = "ptbench.data.mc_ch_in_pc.default" mc_ch_in_pc_rgb = "ptbench.data.mc_ch_in_pc.rgb" -# extended montgomery-shenzhen-indian-padchest aggregated dataset -# (with radiological signs) -mc_ch_in_pc_rs = "ptbench.configs.datasets.mc_ch_in_pc_RS.default" # NIH CXR14 (relabeled) nih_cxr14 = "ptbench.data.nih_cxr14_re.default" nih_cxr14_cm = "ptbench.data.nih_cxr14_re.cardiomegaly" @@ -454,8 +306,6 @@ padchest_tb_idiap = "ptbench.data.padchest.tb_idiap" padchest_no_tb_idiap = "ptbench.data.padchest.no_tb_idiap" padchest_tb_idiap_rgb = "ptbench.data.padchest.tb_idiap_rgb" padchest_cm_idiap = "ptbench.data.padchest.cardiomegaly_idiap" -# extended PadChestTB dataset (with radiological signs) -padchest_tb_idiap_rs = "ptbench.configs.datasets.padchest_RS.tb_idiap" [tool.setuptools] zip-safe = true diff --git a/src/ptbench/data/hivtb/fold_0_rgb.py b/src/ptbench/data/hivtb/fold_0_rgb.py deleted file mode 100644 index 3a5e13cb473c81a4b04ada39b7504551e952c6f2..0000000000000000000000000000000000000000 --- a/src/ptbench/data/hivtb/fold_0_rgb.py +++ /dev/null @@ -1,46 +0,0 @@ -# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""HIV-TB dataset for TB detection (cross validation fold 0) - -* Split reference: none (stratified kfolding) -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.hivtb` for dataset details -""" - -from clapper.logging import setup - -from .. import return_subsets -from ..base_datamodule import BaseDataModule -from . import _maker - -logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") - - -class DefaultModule(BaseDataModule): - def __init__( - self, - train_batch_size=1, - predict_batch_size=1, - drop_incomplete_batch=False, - multiproc_kwargs=None, - ): - super().__init__( - train_batch_size=train_batch_size, - predict_batch_size=predict_batch_size, - drop_incomplete_batch=drop_incomplete_batch, - multiproc_kwargs=multiproc_kwargs, - ) - - def setup(self, stage: str): - self.dataset = _maker("fold_0", RGB=True) - ( - self.train_dataset, - self.validation_dataset, - self.extra_validation_datasets, - self.predict_dataset, - ) = return_subsets(self.dataset) - - -datamodule = DefaultModule diff --git a/src/ptbench/data/hivtb/fold_1_rgb.py b/src/ptbench/data/hivtb/fold_1_rgb.py deleted file mode 100644 index 4b5af9ecbb5180957d08697d9dbed005d201be9c..0000000000000000000000000000000000000000 --- a/src/ptbench/data/hivtb/fold_1_rgb.py +++ /dev/null @@ -1,46 +0,0 @@ -# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""HIV-TB dataset for TB detection (cross validation fold 1) - -* Split reference: none (stratified kfolding) -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.hivtb` for dataset details -""" - -from clapper.logging import setup - -from .. import return_subsets -from ..base_datamodule import BaseDataModule -from . import _maker - -logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") - - -class DefaultModule(BaseDataModule): - def __init__( - self, - train_batch_size=1, - predict_batch_size=1, - drop_incomplete_batch=False, - multiproc_kwargs=None, - ): - super().__init__( - train_batch_size=train_batch_size, - predict_batch_size=predict_batch_size, - drop_incomplete_batch=drop_incomplete_batch, - multiproc_kwargs=multiproc_kwargs, - ) - - def setup(self, stage: str): - self.dataset = _maker("fold_1", RGB=True) - ( - self.train_dataset, - self.validation_dataset, - self.extra_validation_datasets, - self.predict_dataset, - ) = return_subsets(self.dataset) - - -datamodule = DefaultModule diff --git a/src/ptbench/data/hivtb/fold_2_rgb.py b/src/ptbench/data/hivtb/fold_2_rgb.py deleted file mode 100644 index bd72b8147b5277539efe92c5c4310234f563a49f..0000000000000000000000000000000000000000 --- a/src/ptbench/data/hivtb/fold_2_rgb.py +++ /dev/null @@ -1,47 +0,0 @@ -# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""HIV-TB dataset for TB detection (cross validation fold 2) - -* Split reference: none (stratified kfolding) -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.hivtb` for dataset details -""" - - -from clapper.logging import setup - -from .. import return_subsets -from ..base_datamodule import BaseDataModule -from . import _maker - -logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") - - -class DefaultModule(BaseDataModule): - def __init__( - self, - train_batch_size=1, - predict_batch_size=1, - drop_incomplete_batch=False, - multiproc_kwargs=None, - ): - super().__init__( - train_batch_size=train_batch_size, - predict_batch_size=predict_batch_size, - drop_incomplete_batch=drop_incomplete_batch, - multiproc_kwargs=multiproc_kwargs, - ) - - def setup(self, stage: str): - self.dataset = _maker("fold_2", RGB=True) - ( - self.train_dataset, - self.validation_dataset, - self.extra_validation_datasets, - self.predict_dataset, - ) = return_subsets(self.dataset) - - -datamodule = DefaultModule diff --git a/src/ptbench/data/hivtb/fold_3_rgb.py b/src/ptbench/data/hivtb/fold_3_rgb.py deleted file mode 100644 index 6c6a8d678afb30fefb0909de3979bada403e8c71..0000000000000000000000000000000000000000 --- a/src/ptbench/data/hivtb/fold_3_rgb.py +++ /dev/null @@ -1,46 +0,0 @@ -# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""HIV-TB dataset for TB detection (cross validation fold 3) - -* Split reference: none (stratified kfolding) -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.hivtb` for dataset details -""" - -from clapper.logging import setup - -from .. import return_subsets -from ..base_datamodule import BaseDataModule -from . import _maker - -logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") - - -class DefaultModule(BaseDataModule): - def __init__( - self, - train_batch_size=1, - predict_batch_size=1, - drop_incomplete_batch=False, - multiproc_kwargs=None, - ): - super().__init__( - train_batch_size=train_batch_size, - predict_batch_size=predict_batch_size, - drop_incomplete_batch=drop_incomplete_batch, - multiproc_kwargs=multiproc_kwargs, - ) - - def setup(self, stage: str): - self.dataset = _maker("fold_3", RGB=True) - ( - self.train_dataset, - self.validation_dataset, - self.extra_validation_datasets, - self.predict_dataset, - ) = return_subsets(self.dataset) - - -datamodule = DefaultModule diff --git a/src/ptbench/data/hivtb/fold_4_rgb.py b/src/ptbench/data/hivtb/fold_4_rgb.py deleted file mode 100644 index 99a56b4108e362f0469a06e5f2c3ad6b7d7d35da..0000000000000000000000000000000000000000 --- a/src/ptbench/data/hivtb/fold_4_rgb.py +++ /dev/null @@ -1,46 +0,0 @@ -# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""HIV-TB dataset for TB detection (cross validation fold 4) - -* Split reference: none (stratified kfolding) -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.hivtb` for dataset details -""" - -from clapper.logging import setup - -from .. import return_subsets -from ..base_datamodule import BaseDataModule -from . import _maker - -logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") - - -class DefaultModule(BaseDataModule): - def __init__( - self, - train_batch_size=1, - predict_batch_size=1, - drop_incomplete_batch=False, - multiproc_kwargs=None, - ): - super().__init__( - train_batch_size=train_batch_size, - predict_batch_size=predict_batch_size, - drop_incomplete_batch=drop_incomplete_batch, - multiproc_kwargs=multiproc_kwargs, - ) - - def setup(self, stage: str): - self.dataset = _maker("fold_4", RGB=True) - ( - self.train_dataset, - self.validation_dataset, - self.extra_validation_datasets, - self.predict_dataset, - ) = return_subsets(self.dataset) - - -datamodule = DefaultModule diff --git a/src/ptbench/data/hivtb/fold_5_rgb.py b/src/ptbench/data/hivtb/fold_5_rgb.py deleted file mode 100644 index 2033c03ba93c859bf99e5d74b844ecb2d39c08ac..0000000000000000000000000000000000000000 --- a/src/ptbench/data/hivtb/fold_5_rgb.py +++ /dev/null @@ -1,46 +0,0 @@ -# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""HIV-TB dataset for TB detection (cross validation fold 5) - -* Split reference: none (stratified kfolding) -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.hivtb` for dataset details -""" - -from clapper.logging import setup - -from .. import return_subsets -from ..base_datamodule import BaseDataModule -from . import _maker - -logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") - - -class DefaultModule(BaseDataModule): - def __init__( - self, - train_batch_size=1, - predict_batch_size=1, - drop_incomplete_batch=False, - multiproc_kwargs=None, - ): - super().__init__( - train_batch_size=train_batch_size, - predict_batch_size=predict_batch_size, - drop_incomplete_batch=drop_incomplete_batch, - multiproc_kwargs=multiproc_kwargs, - ) - - def setup(self, stage: str): - self.dataset = _maker("fold_5", RGB=True) - ( - self.train_dataset, - self.validation_dataset, - self.extra_validation_datasets, - self.predict_dataset, - ) = return_subsets(self.dataset) - - -datamodule = DefaultModule diff --git a/src/ptbench/data/hivtb/fold_6_rgb.py b/src/ptbench/data/hivtb/fold_6_rgb.py deleted file mode 100644 index 16bc6df5069847263cf05154836fb9919bdd2a86..0000000000000000000000000000000000000000 --- a/src/ptbench/data/hivtb/fold_6_rgb.py +++ /dev/null @@ -1,46 +0,0 @@ -# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""HIV-TB dataset for TB detection (cross validation fold 6) - -* Split reference: none (stratified kfolding) -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.hivtb` for dataset details -""" - -from clapper.logging import setup - -from .. import return_subsets -from ..base_datamodule import BaseDataModule -from . import _maker - -logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") - - -class DefaultModule(BaseDataModule): - def __init__( - self, - train_batch_size=1, - predict_batch_size=1, - drop_incomplete_batch=False, - multiproc_kwargs=None, - ): - super().__init__( - train_batch_size=train_batch_size, - predict_batch_size=predict_batch_size, - drop_incomplete_batch=drop_incomplete_batch, - multiproc_kwargs=multiproc_kwargs, - ) - - def setup(self, stage: str): - self.dataset = _maker("fold_6", RGB=True) - ( - self.train_dataset, - self.validation_dataset, - self.extra_validation_datasets, - self.predict_dataset, - ) = return_subsets(self.dataset) - - -datamodule = DefaultModule diff --git a/src/ptbench/data/hivtb/fold_7_rgb.py b/src/ptbench/data/hivtb/fold_7_rgb.py deleted file mode 100644 index 292ef81e0511ff576c3ea23650421a100a81bd27..0000000000000000000000000000000000000000 --- a/src/ptbench/data/hivtb/fold_7_rgb.py +++ /dev/null @@ -1,46 +0,0 @@ -# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""HIV-TB dataset for TB detection (cross validation fold 7) - -* Split reference: none (stratified kfolding) -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.hivtb` for dataset details -""" - -from clapper.logging import setup - -from .. import return_subsets -from ..base_datamodule import BaseDataModule -from . import _maker - -logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") - - -class DefaultModule(BaseDataModule): - def __init__( - self, - train_batch_size=1, - predict_batch_size=1, - drop_incomplete_batch=False, - multiproc_kwargs=None, - ): - super().__init__( - train_batch_size=train_batch_size, - predict_batch_size=predict_batch_size, - drop_incomplete_batch=drop_incomplete_batch, - multiproc_kwargs=multiproc_kwargs, - ) - - def setup(self, stage: str): - self.dataset = _maker("fold_7", RGB=True) - ( - self.train_dataset, - self.validation_dataset, - self.extra_validation_datasets, - self.predict_dataset, - ) = return_subsets(self.dataset) - - -datamodule = DefaultModule diff --git a/src/ptbench/data/hivtb/fold_8_rgb.py b/src/ptbench/data/hivtb/fold_8_rgb.py deleted file mode 100644 index eca72655c656c1262137130afea68cbd890577e0..0000000000000000000000000000000000000000 --- a/src/ptbench/data/hivtb/fold_8_rgb.py +++ /dev/null @@ -1,46 +0,0 @@ -# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""HIV-TB dataset for TB detection (cross validation fold 8) - -* Split reference: none (stratified kfolding) -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.hivtb` for dataset details -""" - -from clapper.logging import setup - -from .. import return_subsets -from ..base_datamodule import BaseDataModule -from . import _maker - -logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") - - -class DefaultModule(BaseDataModule): - def __init__( - self, - train_batch_size=1, - predict_batch_size=1, - drop_incomplete_batch=False, - multiproc_kwargs=None, - ): - super().__init__( - train_batch_size=train_batch_size, - predict_batch_size=predict_batch_size, - drop_incomplete_batch=drop_incomplete_batch, - multiproc_kwargs=multiproc_kwargs, - ) - - def setup(self, stage: str): - self.dataset = _maker("fold_8", RGB=True) - ( - self.train_dataset, - self.validation_dataset, - self.extra_validation_datasets, - self.predict_dataset, - ) = return_subsets(self.dataset) - - -datamodule = DefaultModule diff --git a/src/ptbench/data/hivtb/fold_9_rgb.py b/src/ptbench/data/hivtb/fold_9_rgb.py deleted file mode 100644 index 3a12fc23aa92245d070196ac0cfe07e09f204ffe..0000000000000000000000000000000000000000 --- a/src/ptbench/data/hivtb/fold_9_rgb.py +++ /dev/null @@ -1,46 +0,0 @@ -# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""HIV-TB dataset for TB detection (cross validation fold 9) - -* Split reference: none (stratified kfolding) -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.hivtb` for dataset details -""" - -from clapper.logging import setup - -from .. import return_subsets -from ..base_datamodule import BaseDataModule -from . import _maker - -logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") - - -class DefaultModule(BaseDataModule): - def __init__( - self, - train_batch_size=1, - predict_batch_size=1, - drop_incomplete_batch=False, - multiproc_kwargs=None, - ): - super().__init__( - train_batch_size=train_batch_size, - predict_batch_size=predict_batch_size, - drop_incomplete_batch=drop_incomplete_batch, - multiproc_kwargs=multiproc_kwargs, - ) - - def setup(self, stage: str): - self.dataset = _maker("fold_9", RGB=True) - ( - self.train_dataset, - self.validation_dataset, - self.extra_validation_datasets, - self.predict_dataset, - ) = return_subsets(self.dataset) - - -datamodule = DefaultModule diff --git a/src/ptbench/data/hivtb_RS/__init__.py b/src/ptbench/data/hivtb_RS/__init__.py deleted file mode 100644 index 92b13d42016eaa95ed5011b7dceac89a6b363454..0000000000000000000000000000000000000000 --- a/src/ptbench/data/hivtb_RS/__init__.py +++ /dev/null @@ -1,52 +0,0 @@ -# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""HIV-TB dataset for computer-aided diagnosis (only BMP files) - -* Reference: [HIV-TB-2019]_ -* Original resolution (height x width or width x height): 2048 x 2500 -* Split reference: none -* Stratified kfold protocol: - - * Training samples: 72% of TB and healthy CXR (including labels) - * Validation samples: 18% of TB and healthy CXR (including labels) - * Test samples: 10% of TB and healthy CXR (including labels) -""" - - -import importlib.resources - -from ..dataset import JSONDataset -from ..loader import make_delayed - -_protocols = [ - importlib.resources.files(__name__).joinpath("fold_0.json.bz2"), - importlib.resources.files(__name__).joinpath("fold_1.json.bz2"), - importlib.resources.files(__name__).joinpath("fold_2.json.bz2"), - importlib.resources.files(__name__).joinpath("fold_3.json.bz2"), - importlib.resources.files(__name__).joinpath("fold_4.json.bz2"), - importlib.resources.files(__name__).joinpath("fold_5.json.bz2"), - importlib.resources.files(__name__).joinpath("fold_6.json.bz2"), - importlib.resources.files(__name__).joinpath("fold_7.json.bz2"), - importlib.resources.files(__name__).joinpath("fold_8.json.bz2"), - importlib.resources.files(__name__).joinpath("fold_9.json.bz2"), -] - - -def _raw_data_loader(sample): - return dict(data=sample["data"], label=sample["label"]) - - -def _loader(context, sample): - # "context" is ignored in this case - database is homogeneous - # we returned delayed samples to avoid loading all images at once - return make_delayed(sample, _raw_data_loader, key=sample["filename"]) - - -dataset = JSONDataset( - protocols=_protocols, - fieldnames=("filename", "label", "data"), - loader=_loader, -) -"""Extended HIV-TB dataset object.""" diff --git a/src/ptbench/data/hivtb_RS/fold_0.json.bz2 b/src/ptbench/data/hivtb_RS/fold_0.json.bz2 deleted file mode 100644 index d838a546d30d2a0ad73235e88ba84e45c048bcda..0000000000000000000000000000000000000000 Binary files a/src/ptbench/data/hivtb_RS/fold_0.json.bz2 and /dev/null differ diff --git a/src/ptbench/data/hivtb_RS/fold_1.json.bz2 b/src/ptbench/data/hivtb_RS/fold_1.json.bz2 deleted file mode 100644 index 0184404e1cb0c50136b8c08c42347bc4a77dbdcc..0000000000000000000000000000000000000000 Binary files a/src/ptbench/data/hivtb_RS/fold_1.json.bz2 and /dev/null differ diff --git a/src/ptbench/data/hivtb_RS/fold_2.json.bz2 b/src/ptbench/data/hivtb_RS/fold_2.json.bz2 deleted file mode 100644 index 915c359fd3c9115e468e97078e9ca10da6acfcb1..0000000000000000000000000000000000000000 Binary files a/src/ptbench/data/hivtb_RS/fold_2.json.bz2 and /dev/null differ diff --git a/src/ptbench/data/hivtb_RS/fold_3.json.bz2 b/src/ptbench/data/hivtb_RS/fold_3.json.bz2 deleted file mode 100644 index 6d334b4319a8d1e2e197529a14c7c5e29f30b735..0000000000000000000000000000000000000000 Binary files a/src/ptbench/data/hivtb_RS/fold_3.json.bz2 and /dev/null differ diff --git a/src/ptbench/data/hivtb_RS/fold_4.json.bz2 b/src/ptbench/data/hivtb_RS/fold_4.json.bz2 deleted file mode 100644 index 2706c244fe5e794fddeae00929d732fdb64101b7..0000000000000000000000000000000000000000 Binary files a/src/ptbench/data/hivtb_RS/fold_4.json.bz2 and /dev/null differ diff --git a/src/ptbench/data/hivtb_RS/fold_5.json.bz2 b/src/ptbench/data/hivtb_RS/fold_5.json.bz2 deleted file mode 100644 index 4bae23a17294083649e5b185848d979202c52bf4..0000000000000000000000000000000000000000 Binary files a/src/ptbench/data/hivtb_RS/fold_5.json.bz2 and /dev/null differ diff --git a/src/ptbench/data/hivtb_RS/fold_6.json.bz2 b/src/ptbench/data/hivtb_RS/fold_6.json.bz2 deleted file mode 100644 index c8366325a5c34a3aec4d73f8016a598d3ddf61b5..0000000000000000000000000000000000000000 Binary files a/src/ptbench/data/hivtb_RS/fold_6.json.bz2 and /dev/null differ diff --git a/src/ptbench/data/hivtb_RS/fold_7.json.bz2 b/src/ptbench/data/hivtb_RS/fold_7.json.bz2 deleted file mode 100644 index c16ed2114154e50447092191125206d73ed6d681..0000000000000000000000000000000000000000 Binary files a/src/ptbench/data/hivtb_RS/fold_7.json.bz2 and /dev/null differ diff --git a/src/ptbench/data/hivtb_RS/fold_8.json.bz2 b/src/ptbench/data/hivtb_RS/fold_8.json.bz2 deleted file mode 100644 index 21718aa2f288ed50775af7587cf9b61e14bdcb20..0000000000000000000000000000000000000000 Binary files a/src/ptbench/data/hivtb_RS/fold_8.json.bz2 and /dev/null differ diff --git a/src/ptbench/data/hivtb_RS/fold_9.json.bz2 b/src/ptbench/data/hivtb_RS/fold_9.json.bz2 deleted file mode 100644 index 30072f4eecd9f4e8c163c4f12dcf84e9f6638760..0000000000000000000000000000000000000000 Binary files a/src/ptbench/data/hivtb_RS/fold_9.json.bz2 and /dev/null differ diff --git a/src/ptbench/data/indian/default.py b/src/ptbench/data/indian/datamodules.py similarity index 100% rename from src/ptbench/data/indian/default.py rename to src/ptbench/data/indian/datamodules.py diff --git a/src/ptbench/data/indian/fold_0_rgb.py b/src/ptbench/data/indian/fold_0_rgb.py deleted file mode 100644 index 7f0ded1ece831dfb64ebf0ce47a8ea0c37132990..0000000000000000000000000000000000000000 --- a/src/ptbench/data/indian/fold_0_rgb.py +++ /dev/null @@ -1,46 +0,0 @@ -# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""Indian dataset for TB detection (cross validation fold 0, RGB) - -* Split reference: [INDIAN-2013]_ with 20% of train set for the validation set -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.indian` for dataset details -""" - -from clapper.logging import setup - -from .. import return_subsets -from ..base_datamodule import BaseDataModule -from . import _maker - -logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") - - -class Fold0Module(BaseDataModule): - def __init__( - self, - train_batch_size=1, - predict_batch_size=1, - drop_incomplete_batch=False, - multiproc_kwargs=None, - ): - super().__init__( - train_batch_size=train_batch_size, - predict_batch_size=predict_batch_size, - drop_incomplete_batch=drop_incomplete_batch, - multiproc_kwargs=multiproc_kwargs, - ) - - def setup(self, stage: str): - self.dataset = _maker("fold_0", RGB=True) - ( - self.train_dataset, - self.validation_dataset, - self.extra_validation_datasets, - self.predict_dataset, - ) = return_subsets(self.dataset) - - -datamodule = Fold0Module diff --git a/src/ptbench/data/indian/fold_1_rgb.py b/src/ptbench/data/indian/fold_1_rgb.py deleted file mode 100644 index bb0160a1ac6f5c74be093d9330579503bb675205..0000000000000000000000000000000000000000 --- a/src/ptbench/data/indian/fold_1_rgb.py +++ /dev/null @@ -1,46 +0,0 @@ -# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""Indian dataset for TB detection (cross validation fold 1, RGB) - -* Split reference: [INDIAN-2013]_ with 20% of train set for the validation set -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.indian` for dataset details -""" - -from clapper.logging import setup - -from .. import return_subsets -from ..base_datamodule import BaseDataModule -from . import _maker - -logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") - - -class Fold0Module(BaseDataModule): - def __init__( - self, - train_batch_size=1, - predict_batch_size=1, - drop_incomplete_batch=False, - multiproc_kwargs=None, - ): - super().__init__( - train_batch_size=train_batch_size, - predict_batch_size=predict_batch_size, - drop_incomplete_batch=drop_incomplete_batch, - multiproc_kwargs=multiproc_kwargs, - ) - - def setup(self, stage: str): - self.dataset = _maker("fold_1", RGB=True) - ( - self.train_dataset, - self.validation_dataset, - self.extra_validation_datasets, - self.predict_dataset, - ) = return_subsets(self.dataset) - - -datamodule = Fold0Module diff --git a/src/ptbench/data/indian/fold_2_rgb.py b/src/ptbench/data/indian/fold_2_rgb.py deleted file mode 100644 index ff40668a7aebf7196b1082c6753db6931b13aceb..0000000000000000000000000000000000000000 --- a/src/ptbench/data/indian/fold_2_rgb.py +++ /dev/null @@ -1,46 +0,0 @@ -# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""Indian dataset for TB detection (cross validation fold 2, RGB) - -* Split reference: [INDIAN-2013]_ with 20% of train set for the validation set -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.indian` for dataset details -""" - -from clapper.logging import setup - -from .. import return_subsets -from ..base_datamodule import BaseDataModule -from . import _maker - -logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") - - -class Fold0Module(BaseDataModule): - def __init__( - self, - train_batch_size=1, - predict_batch_size=1, - drop_incomplete_batch=False, - multiproc_kwargs=None, - ): - super().__init__( - train_batch_size=train_batch_size, - predict_batch_size=predict_batch_size, - drop_incomplete_batch=drop_incomplete_batch, - multiproc_kwargs=multiproc_kwargs, - ) - - def setup(self, stage: str): - self.dataset = _maker("fold_2", RGB=True) - ( - self.train_dataset, - self.validation_dataset, - self.extra_validation_datasets, - self.predict_dataset, - ) = return_subsets(self.dataset) - - -datamodule = Fold0Module diff --git a/src/ptbench/data/indian/fold_3_rgb.py b/src/ptbench/data/indian/fold_3_rgb.py deleted file mode 100644 index b4f58041d85fa345afe66d93712a9a53780844b4..0000000000000000000000000000000000000000 --- a/src/ptbench/data/indian/fold_3_rgb.py +++ /dev/null @@ -1,46 +0,0 @@ -# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""Indian dataset for TB detection (cross validation fold 3, RGB) - -* Split reference: [INDIAN-2013]_ with 20% of train set for the validation set -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.indian` for dataset details -""" - -from clapper.logging import setup - -from .. import return_subsets -from ..base_datamodule import BaseDataModule -from . import _maker - -logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") - - -class Fold0Module(BaseDataModule): - def __init__( - self, - train_batch_size=1, - predict_batch_size=1, - drop_incomplete_batch=False, - multiproc_kwargs=None, - ): - super().__init__( - train_batch_size=train_batch_size, - predict_batch_size=predict_batch_size, - drop_incomplete_batch=drop_incomplete_batch, - multiproc_kwargs=multiproc_kwargs, - ) - - def setup(self, stage: str): - self.dataset = _maker("fold_3", RGB=True) - ( - self.train_dataset, - self.validation_dataset, - self.extra_validation_datasets, - self.predict_dataset, - ) = return_subsets(self.dataset) - - -datamodule = Fold0Module diff --git a/src/ptbench/data/indian/fold_4_rgb.py b/src/ptbench/data/indian/fold_4_rgb.py deleted file mode 100644 index 41ccb27cf09de3abee93a08ab8c43a3ad580ae4d..0000000000000000000000000000000000000000 --- a/src/ptbench/data/indian/fold_4_rgb.py +++ /dev/null @@ -1,46 +0,0 @@ -# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""Indian dataset for TB detection (cross validation fold 4, RGB) - -* Split reference: [INDIAN-2013]_ with 20% of train set for the validation set -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.indian` for dataset details -""" - -from clapper.logging import setup - -from .. import return_subsets -from ..base_datamodule import BaseDataModule -from . import _maker - -logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") - - -class Fold0Module(BaseDataModule): - def __init__( - self, - train_batch_size=1, - predict_batch_size=1, - drop_incomplete_batch=False, - multiproc_kwargs=None, - ): - super().__init__( - train_batch_size=train_batch_size, - predict_batch_size=predict_batch_size, - drop_incomplete_batch=drop_incomplete_batch, - multiproc_kwargs=multiproc_kwargs, - ) - - def setup(self, stage: str): - self.dataset = _maker("fold_4", RGB=True) - ( - self.train_dataset, - self.validation_dataset, - self.extra_validation_datasets, - self.predict_dataset, - ) = return_subsets(self.dataset) - - -datamodule = Fold0Module diff --git a/src/ptbench/data/indian/fold_5_rgb.py b/src/ptbench/data/indian/fold_5_rgb.py deleted file mode 100644 index 5dd71156847c85f927f8fa137f7b7ddf9e51c42a..0000000000000000000000000000000000000000 --- a/src/ptbench/data/indian/fold_5_rgb.py +++ /dev/null @@ -1,46 +0,0 @@ -# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""Indian dataset for TB detection (cross validation fold 5, RGB) - -* Split reference: [INDIAN-2013]_ with 20% of train set for the validation set -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.indian` for dataset details -""" - -from clapper.logging import setup - -from .. import return_subsets -from ..base_datamodule import BaseDataModule -from . import _maker - -logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") - - -class Fold0Module(BaseDataModule): - def __init__( - self, - train_batch_size=1, - predict_batch_size=1, - drop_incomplete_batch=False, - multiproc_kwargs=None, - ): - super().__init__( - train_batch_size=train_batch_size, - predict_batch_size=predict_batch_size, - drop_incomplete_batch=drop_incomplete_batch, - multiproc_kwargs=multiproc_kwargs, - ) - - def setup(self, stage: str): - self.dataset = _maker("fold_5", RGB=True) - ( - self.train_dataset, - self.validation_dataset, - self.extra_validation_datasets, - self.predict_dataset, - ) = return_subsets(self.dataset) - - -datamodule = Fold0Module diff --git a/src/ptbench/data/indian/fold_6_rgb.py b/src/ptbench/data/indian/fold_6_rgb.py deleted file mode 100644 index b32d3dedffd2768845d89bde9b083368c7296b2d..0000000000000000000000000000000000000000 --- a/src/ptbench/data/indian/fold_6_rgb.py +++ /dev/null @@ -1,46 +0,0 @@ -# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""Indian dataset for TB detection (cross validation fold 6, RGB) - -* Split reference: [INDIAN-2013]_ with 20% of train set for the validation set -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.indian` for dataset details -""" - -from clapper.logging import setup - -from .. import return_subsets -from ..base_datamodule import BaseDataModule -from . import _maker - -logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") - - -class Fold0Module(BaseDataModule): - def __init__( - self, - train_batch_size=1, - predict_batch_size=1, - drop_incomplete_batch=False, - multiproc_kwargs=None, - ): - super().__init__( - train_batch_size=train_batch_size, - predict_batch_size=predict_batch_size, - drop_incomplete_batch=drop_incomplete_batch, - multiproc_kwargs=multiproc_kwargs, - ) - - def setup(self, stage: str): - self.dataset = _maker("fold_6", RGB=True) - ( - self.train_dataset, - self.validation_dataset, - self.extra_validation_datasets, - self.predict_dataset, - ) = return_subsets(self.dataset) - - -datamodule = Fold0Module diff --git a/src/ptbench/data/indian/fold_7_rgb.py b/src/ptbench/data/indian/fold_7_rgb.py deleted file mode 100644 index 7d5eb33daeda23eb8eb19fc48855d68aa46a57cd..0000000000000000000000000000000000000000 --- a/src/ptbench/data/indian/fold_7_rgb.py +++ /dev/null @@ -1,46 +0,0 @@ -# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""Indian dataset for TB detection (cross validation fold 7, RGB) - -* Split reference: [INDIAN-2013]_ with 20% of train set for the validation set -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.indian` for dataset details -""" - -from clapper.logging import setup - -from .. import return_subsets -from ..base_datamodule import BaseDataModule -from . import _maker - -logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") - - -class Fold0Module(BaseDataModule): - def __init__( - self, - train_batch_size=1, - predict_batch_size=1, - drop_incomplete_batch=False, - multiproc_kwargs=None, - ): - super().__init__( - train_batch_size=train_batch_size, - predict_batch_size=predict_batch_size, - drop_incomplete_batch=drop_incomplete_batch, - multiproc_kwargs=multiproc_kwargs, - ) - - def setup(self, stage: str): - self.dataset = _maker("fold_7", RGB=True) - ( - self.train_dataset, - self.validation_dataset, - self.extra_validation_datasets, - self.predict_dataset, - ) = return_subsets(self.dataset) - - -datamodule = Fold0Module diff --git a/src/ptbench/data/indian/fold_8_rgb.py b/src/ptbench/data/indian/fold_8_rgb.py deleted file mode 100644 index 5c228a285dcca15171755830311cda9701125a50..0000000000000000000000000000000000000000 --- a/src/ptbench/data/indian/fold_8_rgb.py +++ /dev/null @@ -1,46 +0,0 @@ -# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""Indian dataset for TB detection (cross validation fold 8, RGB) - -* Split reference: [INDIAN-2013]_ with 20% of train set for the validation set -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.indian` for dataset details -""" - -from clapper.logging import setup - -from .. import return_subsets -from ..base_datamodule import BaseDataModule -from . import _maker - -logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") - - -class Fold0Module(BaseDataModule): - def __init__( - self, - train_batch_size=1, - predict_batch_size=1, - drop_incomplete_batch=False, - multiproc_kwargs=None, - ): - super().__init__( - train_batch_size=train_batch_size, - predict_batch_size=predict_batch_size, - drop_incomplete_batch=drop_incomplete_batch, - multiproc_kwargs=multiproc_kwargs, - ) - - def setup(self, stage: str): - self.dataset = _maker("fold_8", RGB=True) - ( - self.train_dataset, - self.validation_dataset, - self.extra_validation_datasets, - self.predict_dataset, - ) = return_subsets(self.dataset) - - -datamodule = Fold0Module diff --git a/src/ptbench/data/indian/fold_9_rgb.py b/src/ptbench/data/indian/fold_9_rgb.py deleted file mode 100644 index 220938488b729cd17fac1adbb51ca592af557d84..0000000000000000000000000000000000000000 --- a/src/ptbench/data/indian/fold_9_rgb.py +++ /dev/null @@ -1,46 +0,0 @@ -# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""Indian dataset for TB detection (cross validation fold 9, RGB) - -* Split reference: [INDIAN-2013]_ with 20% of train set for the validation set -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.indian` for dataset details -""" - -from clapper.logging import setup - -from .. import return_subsets -from ..base_datamodule import BaseDataModule -from . import _maker - -logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") - - -class Fold0Module(BaseDataModule): - def __init__( - self, - train_batch_size=1, - predict_batch_size=1, - drop_incomplete_batch=False, - multiproc_kwargs=None, - ): - super().__init__( - train_batch_size=train_batch_size, - predict_batch_size=predict_batch_size, - drop_incomplete_batch=drop_incomplete_batch, - multiproc_kwargs=multiproc_kwargs, - ) - - def setup(self, stage: str): - self.dataset = _maker("fold_9", RGB=True) - ( - self.train_dataset, - self.validation_dataset, - self.extra_validation_datasets, - self.predict_dataset, - ) = return_subsets(self.dataset) - - -datamodule = Fold0Module diff --git a/src/ptbench/data/indian_RS/__init__.py b/src/ptbench/data/indian_RS/__init__.py deleted file mode 100644 index d41dee4f86b62a5b55be79272f5dd1fa0be09894..0000000000000000000000000000000000000000 --- a/src/ptbench/data/indian_RS/__init__.py +++ /dev/null @@ -1,52 +0,0 @@ -# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""Indian collection dataset for computer-aided diagnosis (extended with -DensenetRS predictions) - -The Indian collection database has been established to foster research -in computer-aided diagnosis of pulmonary diseases with a special -focus on pulmonary tuberculosis (TB). - -* Reference: [INDIAN-2013]_ -* Original resolution (height x width or width x height): more than 1024 x 1024 -* Split reference: [INDIAN-2013]_ with 20% of train set for the validation set -""" - -import importlib.resources - -from ..dataset import JSONDataset -from ..loader import make_delayed - -_protocols = [ - importlib.resources.files(__name__).joinpath("default.json.bz2"), - importlib.resources.files(__name__).joinpath("fold_0.json.bz2"), - importlib.resources.files(__name__).joinpath("fold_1.json.bz2"), - importlib.resources.files(__name__).joinpath("fold_2.json.bz2"), - importlib.resources.files(__name__).joinpath("fold_3.json.bz2"), - importlib.resources.files(__name__).joinpath("fold_4.json.bz2"), - importlib.resources.files(__name__).joinpath("fold_5.json.bz2"), - importlib.resources.files(__name__).joinpath("fold_6.json.bz2"), - importlib.resources.files(__name__).joinpath("fold_7.json.bz2"), - importlib.resources.files(__name__).joinpath("fold_8.json.bz2"), - importlib.resources.files(__name__).joinpath("fold_9.json.bz2"), -] - - -def _raw_data_loader(sample): - return dict(data=sample["data"], label=sample["label"]) - - -def _loader(context, sample): - # "context" is ignored in this case - database is homogeneous - # we returned delayed samples to avoid loading all images at once - return make_delayed(sample, _raw_data_loader, key=sample["filename"]) - - -dataset = JSONDataset( - protocols=_protocols, - fieldnames=("filename", "label", "data"), - loader=_loader, -) -"""Extended Indian dataset object.""" diff --git a/src/ptbench/data/indian_RS/default.json.bz2 b/src/ptbench/data/indian_RS/default.json.bz2 deleted file mode 100644 index eee4672a70e75c32fee791ba141f2cf03209d93c..0000000000000000000000000000000000000000 Binary files a/src/ptbench/data/indian_RS/default.json.bz2 and /dev/null differ diff --git a/src/ptbench/data/indian_RS/fold_0.json.bz2 b/src/ptbench/data/indian_RS/fold_0.json.bz2 deleted file mode 100644 index 39c2a5a7bc81c6e3d8d772ff553f8699a562dd73..0000000000000000000000000000000000000000 Binary files a/src/ptbench/data/indian_RS/fold_0.json.bz2 and /dev/null differ diff --git a/src/ptbench/data/indian_RS/fold_1.json.bz2 b/src/ptbench/data/indian_RS/fold_1.json.bz2 deleted file mode 100644 index 67438356e58d26b54a720953dd018f578277c168..0000000000000000000000000000000000000000 Binary files a/src/ptbench/data/indian_RS/fold_1.json.bz2 and /dev/null differ diff --git a/src/ptbench/data/indian_RS/fold_2.json.bz2 b/src/ptbench/data/indian_RS/fold_2.json.bz2 deleted file mode 100644 index 7a9891cad73590ee1b23a4d37fcd0d550f4185fb..0000000000000000000000000000000000000000 Binary files a/src/ptbench/data/indian_RS/fold_2.json.bz2 and /dev/null differ diff --git a/src/ptbench/data/indian_RS/fold_3.json.bz2 b/src/ptbench/data/indian_RS/fold_3.json.bz2 deleted file mode 100644 index e13acf5d58d38614841314fbf07c6b38ae6b667b..0000000000000000000000000000000000000000 Binary files a/src/ptbench/data/indian_RS/fold_3.json.bz2 and /dev/null differ diff --git a/src/ptbench/data/indian_RS/fold_4.json.bz2 b/src/ptbench/data/indian_RS/fold_4.json.bz2 deleted file mode 100644 index 4547afc36de31349c6bbeec585a93a1e9066e8db..0000000000000000000000000000000000000000 Binary files a/src/ptbench/data/indian_RS/fold_4.json.bz2 and /dev/null differ diff --git a/src/ptbench/data/indian_RS/fold_5.json.bz2 b/src/ptbench/data/indian_RS/fold_5.json.bz2 deleted file mode 100644 index aa53c7738c49af2e2f2d29b5956dc616bbe651c5..0000000000000000000000000000000000000000 Binary files a/src/ptbench/data/indian_RS/fold_5.json.bz2 and /dev/null differ diff --git a/src/ptbench/data/indian_RS/fold_6.json.bz2 b/src/ptbench/data/indian_RS/fold_6.json.bz2 deleted file mode 100644 index 12df78407977f764a1f6c0fe9153ef7909b59c99..0000000000000000000000000000000000000000 Binary files a/src/ptbench/data/indian_RS/fold_6.json.bz2 and /dev/null differ diff --git a/src/ptbench/data/indian_RS/fold_7.json.bz2 b/src/ptbench/data/indian_RS/fold_7.json.bz2 deleted file mode 100644 index 396748c4a15e42e4d541ae16eaed8d6d9db87651..0000000000000000000000000000000000000000 Binary files a/src/ptbench/data/indian_RS/fold_7.json.bz2 and /dev/null differ diff --git a/src/ptbench/data/indian_RS/fold_8.json.bz2 b/src/ptbench/data/indian_RS/fold_8.json.bz2 deleted file mode 100644 index 00c1adab35c39fbe7ff20a54dbdea41d4429441d..0000000000000000000000000000000000000000 Binary files a/src/ptbench/data/indian_RS/fold_8.json.bz2 and /dev/null differ diff --git a/src/ptbench/data/indian_RS/fold_9.json.bz2 b/src/ptbench/data/indian_RS/fold_9.json.bz2 deleted file mode 100644 index a868b5522be5112f922e7b6c2cbd6a5131ce9e79..0000000000000000000000000000000000000000 Binary files a/src/ptbench/data/indian_RS/fold_9.json.bz2 and /dev/null differ diff --git a/src/ptbench/data/mc_ch/fold_0_rgb.py b/src/ptbench/data/mc_ch/fold_0_rgb.py deleted file mode 100644 index 21cadfd4ab64859e18dec6d331079e30d632f505..0000000000000000000000000000000000000000 --- a/src/ptbench/data/mc_ch/fold_0_rgb.py +++ /dev/null @@ -1,74 +0,0 @@ -# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""Aggregated dataset composed of Montgomery and Shenzhen datasets (cross -validation fold 0, RGB)""" - - -from clapper.logging import setup -from torch.utils.data.dataset import ConcatDataset - -from .. import return_subsets -from ..base_datamodule import BaseDataModule, get_dataset_from_module -from ..montgomery.fold_0_rgb import datamodule as mc_datamodule -from ..shenzhen.fold_0_rgb import datamodule as ch_datamodule - -logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") - - -class DefaultModule(BaseDataModule): - def __init__( - self, - train_batch_size=1, - predict_batch_size=1, - drop_incomplete_batch=False, - multiproc_kwargs=None, - ): - self.train_batch_size = train_batch_size - self.predict_batch_size = predict_batch_size - self.drop_incomplete_batch = drop_incomplete_batch - self.multiproc_kwargs = multiproc_kwargs - - super().__init__( - train_batch_size=train_batch_size, - predict_batch_size=predict_batch_size, - drop_incomplete_batch=drop_incomplete_batch, - multiproc_kwargs=multiproc_kwargs, - ) - - def setup(self, stage: str): - # Instantiate other datamodules and get their datasets - module_args = { - "train_batch_size": self.train_batch_size, - "predict_batch_size": self.predict_batch_size, - "drop_incomplete_batch": self.drop_incomplete_batch, - "multiproc_kwargs": self.multiproc_kwargs, - } - - mc = get_dataset_from_module(mc_datamodule, stage, **module_args) - ch = get_dataset_from_module(ch_datamodule, stage, **module_args) - - # Combine datasets - self.dataset = {} - self.dataset["__train__"] = ConcatDataset( - [mc["__train__"], ch["__train__"]] - ) - self.dataset["train"] = ConcatDataset([mc["train"], ch["train"]]) - self.dataset["__valid__"] = ConcatDataset( - [mc["__valid__"], ch["__valid__"]] - ) - self.dataset["validation"] = ConcatDataset( - [mc["validation"], ch["validation"]] - ) - self.dataset["test"] = ConcatDataset([mc["test"], ch["test"]]) - - ( - self.train_dataset, - self.validation_dataset, - self.extra_validation_datasets, - self.predict_dataset, - ) = return_subsets(self.dataset) - - -datamodule = DefaultModule diff --git a/src/ptbench/data/mc_ch/fold_1_rgb.py b/src/ptbench/data/mc_ch/fold_1_rgb.py deleted file mode 100644 index f83ac9a6c4c7d53d3b0a9d53cfb61321ed195e30..0000000000000000000000000000000000000000 --- a/src/ptbench/data/mc_ch/fold_1_rgb.py +++ /dev/null @@ -1,73 +0,0 @@ -# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""Aggregated dataset composed of Montgomery and Shenzhen datasets (cross -validation fold 1, RGB)""" - -from clapper.logging import setup -from torch.utils.data.dataset import ConcatDataset - -from .. import return_subsets -from ..base_datamodule import BaseDataModule, get_dataset_from_module -from ..montgomery.fold_1_rgb import datamodule as mc_datamodule -from ..shenzhen.fold_1_rgb import datamodule as ch_datamodule - -logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") - - -class DefaultModule(BaseDataModule): - def __init__( - self, - train_batch_size=1, - predict_batch_size=1, - drop_incomplete_batch=False, - multiproc_kwargs=None, - ): - self.train_batch_size = train_batch_size - self.predict_batch_size = predict_batch_size - self.drop_incomplete_batch = drop_incomplete_batch - self.multiproc_kwargs = multiproc_kwargs - - super().__init__( - train_batch_size=train_batch_size, - predict_batch_size=predict_batch_size, - drop_incomplete_batch=drop_incomplete_batch, - multiproc_kwargs=multiproc_kwargs, - ) - - def setup(self, stage: str): - # Instantiate other datamodules and get their datasets - module_args = { - "train_batch_size": self.train_batch_size, - "predict_batch_size": self.predict_batch_size, - "drop_incomplete_batch": self.drop_incomplete_batch, - "multiproc_kwargs": self.multiproc_kwargs, - } - - mc = get_dataset_from_module(mc_datamodule, stage, **module_args) - ch = get_dataset_from_module(ch_datamodule, stage, **module_args) - - # Combine datasets - self.dataset = {} - self.dataset["__train__"] = ConcatDataset( - [mc["__train__"], ch["__train__"]] - ) - self.dataset["train"] = ConcatDataset([mc["train"], ch["train"]]) - self.dataset["__valid__"] = ConcatDataset( - [mc["__valid__"], ch["__valid__"]] - ) - self.dataset["validation"] = ConcatDataset( - [mc["validation"], ch["validation"]] - ) - self.dataset["test"] = ConcatDataset([mc["test"], ch["test"]]) - - ( - self.train_dataset, - self.validation_dataset, - self.extra_validation_datasets, - self.predict_dataset, - ) = return_subsets(self.dataset) - - -datamodule = DefaultModule diff --git a/src/ptbench/data/mc_ch/fold_2_rgb.py b/src/ptbench/data/mc_ch/fold_2_rgb.py deleted file mode 100644 index 067c030e2aedb3e31e244929d018f08dac00f17d..0000000000000000000000000000000000000000 --- a/src/ptbench/data/mc_ch/fold_2_rgb.py +++ /dev/null @@ -1,73 +0,0 @@ -# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""Aggregated dataset composed of Montgomery and Shenzhen datasets (cross -validation fold 2, RGB)""" - -from clapper.logging import setup -from torch.utils.data.dataset import ConcatDataset - -from .. import return_subsets -from ..base_datamodule import BaseDataModule, get_dataset_from_module -from ..montgomery.fold_2_rgb import datamodule as mc_datamodule -from ..shenzhen.fold_2_rgb import datamodule as ch_datamodule - -logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") - - -class DefaultModule(BaseDataModule): - def __init__( - self, - train_batch_size=1, - predict_batch_size=1, - drop_incomplete_batch=False, - multiproc_kwargs=None, - ): - self.train_batch_size = train_batch_size - self.predict_batch_size = predict_batch_size - self.drop_incomplete_batch = drop_incomplete_batch - self.multiproc_kwargs = multiproc_kwargs - - super().__init__( - train_batch_size=train_batch_size, - predict_batch_size=predict_batch_size, - drop_incomplete_batch=drop_incomplete_batch, - multiproc_kwargs=multiproc_kwargs, - ) - - def setup(self, stage: str): - # Instantiate other datamodules and get their datasets - module_args = { - "train_batch_size": self.train_batch_size, - "predict_batch_size": self.predict_batch_size, - "drop_incomplete_batch": self.drop_incomplete_batch, - "multiproc_kwargs": self.multiproc_kwargs, - } - - mc = get_dataset_from_module(mc_datamodule, stage, **module_args) - ch = get_dataset_from_module(ch_datamodule, stage, **module_args) - - # Combine datasets - self.dataset = {} - self.dataset["__train__"] = ConcatDataset( - [mc["__train__"], ch["__train__"]] - ) - self.dataset["train"] = ConcatDataset([mc["train"], ch["train"]]) - self.dataset["__valid__"] = ConcatDataset( - [mc["__valid__"], ch["__valid__"]] - ) - self.dataset["validation"] = ConcatDataset( - [mc["validation"], ch["validation"]] - ) - self.dataset["test"] = ConcatDataset([mc["test"], ch["test"]]) - - ( - self.train_dataset, - self.validation_dataset, - self.extra_validation_datasets, - self.predict_dataset, - ) = return_subsets(self.dataset) - - -datamodule = DefaultModule diff --git a/src/ptbench/data/mc_ch/fold_3_rgb.py b/src/ptbench/data/mc_ch/fold_3_rgb.py deleted file mode 100644 index 482b5e62fa46d8e939a0682d96db458d6beba27b..0000000000000000000000000000000000000000 --- a/src/ptbench/data/mc_ch/fold_3_rgb.py +++ /dev/null @@ -1,73 +0,0 @@ -# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""Aggregated dataset composed of Montgomery and Shenzhen datasets (cross -validation fold 3, RGB)""" - -from clapper.logging import setup -from torch.utils.data.dataset import ConcatDataset - -from .. import return_subsets -from ..base_datamodule import BaseDataModule, get_dataset_from_module -from ..montgomery.fold_3_rgb import datamodule as mc_datamodule -from ..shenzhen.fold_3_rgb import datamodule as ch_datamodule - -logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") - - -class DefaultModule(BaseDataModule): - def __init__( - self, - train_batch_size=1, - predict_batch_size=1, - drop_incomplete_batch=False, - multiproc_kwargs=None, - ): - self.train_batch_size = train_batch_size - self.predict_batch_size = predict_batch_size - self.drop_incomplete_batch = drop_incomplete_batch - self.multiproc_kwargs = multiproc_kwargs - - super().__init__( - train_batch_size=train_batch_size, - predict_batch_size=predict_batch_size, - drop_incomplete_batch=drop_incomplete_batch, - multiproc_kwargs=multiproc_kwargs, - ) - - def setup(self, stage: str): - # Instantiate other datamodules and get their datasets - module_args = { - "train_batch_size": self.train_batch_size, - "predict_batch_size": self.predict_batch_size, - "drop_incomplete_batch": self.drop_incomplete_batch, - "multiproc_kwargs": self.multiproc_kwargs, - } - - mc = get_dataset_from_module(mc_datamodule, stage, **module_args) - ch = get_dataset_from_module(ch_datamodule, stage, **module_args) - - # Combine datasets - self.dataset = {} - self.dataset["__train__"] = ConcatDataset( - [mc["__train__"], ch["__train__"]] - ) - self.dataset["train"] = ConcatDataset([mc["train"], ch["train"]]) - self.dataset["__valid__"] = ConcatDataset( - [mc["__valid__"], ch["__valid__"]] - ) - self.dataset["validation"] = ConcatDataset( - [mc["validation"], ch["validation"]] - ) - self.dataset["test"] = ConcatDataset([mc["test"], ch["test"]]) - - ( - self.train_dataset, - self.validation_dataset, - self.extra_validation_datasets, - self.predict_dataset, - ) = return_subsets(self.dataset) - - -datamodule = DefaultModule diff --git a/src/ptbench/data/mc_ch/fold_4_rgb.py b/src/ptbench/data/mc_ch/fold_4_rgb.py deleted file mode 100644 index a6b638cd09b9eac182172469eb671e4da854b5e6..0000000000000000000000000000000000000000 --- a/src/ptbench/data/mc_ch/fold_4_rgb.py +++ /dev/null @@ -1,73 +0,0 @@ -# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""Aggregated dataset composed of Montgomery and Shenzhen datasets (cross -validation fold 4, RGB)""" - -from clapper.logging import setup -from torch.utils.data.dataset import ConcatDataset - -from .. import return_subsets -from ..base_datamodule import BaseDataModule, get_dataset_from_module -from ..montgomery.fold_4_rgb import datamodule as mc_datamodule -from ..shenzhen.fold_4_rgb import datamodule as ch_datamodule - -logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") - - -class DefaultModule(BaseDataModule): - def __init__( - self, - train_batch_size=1, - predict_batch_size=1, - drop_incomplete_batch=False, - multiproc_kwargs=None, - ): - self.train_batch_size = train_batch_size - self.predict_batch_size = predict_batch_size - self.drop_incomplete_batch = drop_incomplete_batch - self.multiproc_kwargs = multiproc_kwargs - - super().__init__( - train_batch_size=train_batch_size, - predict_batch_size=predict_batch_size, - drop_incomplete_batch=drop_incomplete_batch, - multiproc_kwargs=multiproc_kwargs, - ) - - def setup(self, stage: str): - # Instantiate other datamodules and get their datasets - module_args = { - "train_batch_size": self.train_batch_size, - "predict_batch_size": self.predict_batch_size, - "drop_incomplete_batch": self.drop_incomplete_batch, - "multiproc_kwargs": self.multiproc_kwargs, - } - - mc = get_dataset_from_module(mc_datamodule, stage, **module_args) - ch = get_dataset_from_module(ch_datamodule, stage, **module_args) - - # Combine datasets - self.dataset = {} - self.dataset["__train__"] = ConcatDataset( - [mc["__train__"], ch["__train__"]] - ) - self.dataset["train"] = ConcatDataset([mc["train"], ch["train"]]) - self.dataset["__valid__"] = ConcatDataset( - [mc["__valid__"], ch["__valid__"]] - ) - self.dataset["validation"] = ConcatDataset( - [mc["validation"], ch["validation"]] - ) - self.dataset["test"] = ConcatDataset([mc["test"], ch["test"]]) - - ( - self.train_dataset, - self.validation_dataset, - self.extra_validation_datasets, - self.predict_dataset, - ) = return_subsets(self.dataset) - - -datamodule = DefaultModule diff --git a/src/ptbench/data/mc_ch/fold_5_rgb.py b/src/ptbench/data/mc_ch/fold_5_rgb.py deleted file mode 100644 index 48bf3935ee99b9cd18f4bd055f1009cc8b13f9fc..0000000000000000000000000000000000000000 --- a/src/ptbench/data/mc_ch/fold_5_rgb.py +++ /dev/null @@ -1,73 +0,0 @@ -# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""Aggregated dataset composed of Montgomery and Shenzhen datasets (cross -validation fold 5, RGB)""" - -from clapper.logging import setup -from torch.utils.data.dataset import ConcatDataset - -from .. import return_subsets -from ..base_datamodule import BaseDataModule, get_dataset_from_module -from ..montgomery.fold_5_rgb import datamodule as mc_datamodule -from ..shenzhen.fold_5_rgb import datamodule as ch_datamodule - -logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") - - -class DefaultModule(BaseDataModule): - def __init__( - self, - train_batch_size=1, - predict_batch_size=1, - drop_incomplete_batch=False, - multiproc_kwargs=None, - ): - self.train_batch_size = train_batch_size - self.predict_batch_size = predict_batch_size - self.drop_incomplete_batch = drop_incomplete_batch - self.multiproc_kwargs = multiproc_kwargs - - super().__init__( - train_batch_size=train_batch_size, - predict_batch_size=predict_batch_size, - drop_incomplete_batch=drop_incomplete_batch, - multiproc_kwargs=multiproc_kwargs, - ) - - def setup(self, stage: str): - # Instantiate other datamodules and get their datasets - module_args = { - "train_batch_size": self.train_batch_size, - "predict_batch_size": self.predict_batch_size, - "drop_incomplete_batch": self.drop_incomplete_batch, - "multiproc_kwargs": self.multiproc_kwargs, - } - - mc = get_dataset_from_module(mc_datamodule, stage, **module_args) - ch = get_dataset_from_module(ch_datamodule, stage, **module_args) - - # Combine datasets - self.dataset = {} - self.dataset["__train__"] = ConcatDataset( - [mc["__train__"], ch["__train__"]] - ) - self.dataset["train"] = ConcatDataset([mc["train"], ch["train"]]) - self.dataset["__valid__"] = ConcatDataset( - [mc["__valid__"], ch["__valid__"]] - ) - self.dataset["validation"] = ConcatDataset( - [mc["validation"], ch["validation"]] - ) - self.dataset["test"] = ConcatDataset([mc["test"], ch["test"]]) - - ( - self.train_dataset, - self.validation_dataset, - self.extra_validation_datasets, - self.predict_dataset, - ) = return_subsets(self.dataset) - - -datamodule = DefaultModule diff --git a/src/ptbench/data/mc_ch/fold_6_rgb.py b/src/ptbench/data/mc_ch/fold_6_rgb.py deleted file mode 100644 index 00b8d5a1a136566dc372090cb71b9daeaee2d6cb..0000000000000000000000000000000000000000 --- a/src/ptbench/data/mc_ch/fold_6_rgb.py +++ /dev/null @@ -1,73 +0,0 @@ -# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""Aggregated dataset composed of Montgomery and Shenzhen datasets (cross -validation fold 6, RGB)""" - -from clapper.logging import setup -from torch.utils.data.dataset import ConcatDataset - -from .. import return_subsets -from ..base_datamodule import BaseDataModule, get_dataset_from_module -from ..montgomery.fold_6_rgb import datamodule as mc_datamodule -from ..shenzhen.fold_6_rgb import datamodule as ch_datamodule - -logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") - - -class DefaultModule(BaseDataModule): - def __init__( - self, - train_batch_size=1, - predict_batch_size=1, - drop_incomplete_batch=False, - multiproc_kwargs=None, - ): - self.train_batch_size = train_batch_size - self.predict_batch_size = predict_batch_size - self.drop_incomplete_batch = drop_incomplete_batch - self.multiproc_kwargs = multiproc_kwargs - - super().__init__( - train_batch_size=train_batch_size, - predict_batch_size=predict_batch_size, - drop_incomplete_batch=drop_incomplete_batch, - multiproc_kwargs=multiproc_kwargs, - ) - - def setup(self, stage: str): - # Instantiate other datamodules and get their datasets - module_args = { - "train_batch_size": self.train_batch_size, - "predict_batch_size": self.predict_batch_size, - "drop_incomplete_batch": self.drop_incomplete_batch, - "multiproc_kwargs": self.multiproc_kwargs, - } - - mc = get_dataset_from_module(mc_datamodule, stage, **module_args) - ch = get_dataset_from_module(ch_datamodule, stage, **module_args) - - # Combine datasets - self.dataset = {} - self.dataset["__train__"] = ConcatDataset( - [mc["__train__"], ch["__train__"]] - ) - self.dataset["train"] = ConcatDataset([mc["train"], ch["train"]]) - self.dataset["__valid__"] = ConcatDataset( - [mc["__valid__"], ch["__valid__"]] - ) - self.dataset["validation"] = ConcatDataset( - [mc["validation"], ch["validation"]] - ) - self.dataset["test"] = ConcatDataset([mc["test"], ch["test"]]) - - ( - self.train_dataset, - self.validation_dataset, - self.extra_validation_datasets, - self.predict_dataset, - ) = return_subsets(self.dataset) - - -datamodule = DefaultModule diff --git a/src/ptbench/data/mc_ch/fold_7_rgb.py b/src/ptbench/data/mc_ch/fold_7_rgb.py deleted file mode 100644 index a1c3a8f257280f3c4e1871920fd2bafb1fb2a8e2..0000000000000000000000000000000000000000 --- a/src/ptbench/data/mc_ch/fold_7_rgb.py +++ /dev/null @@ -1,73 +0,0 @@ -# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""Aggregated dataset composed of Montgomery and Shenzhen datasets (cross -validation fold 7, RGB)""" - -from clapper.logging import setup -from torch.utils.data.dataset import ConcatDataset - -from .. import return_subsets -from ..base_datamodule import BaseDataModule, get_dataset_from_module -from ..montgomery.fold_7_rgb import datamodule as mc_datamodule -from ..shenzhen.fold_7_rgb import datamodule as ch_datamodule - -logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") - - -class DefaultModule(BaseDataModule): - def __init__( - self, - train_batch_size=1, - predict_batch_size=1, - drop_incomplete_batch=False, - multiproc_kwargs=None, - ): - self.train_batch_size = train_batch_size - self.predict_batch_size = predict_batch_size - self.drop_incomplete_batch = drop_incomplete_batch - self.multiproc_kwargs = multiproc_kwargs - - super().__init__( - train_batch_size=train_batch_size, - predict_batch_size=predict_batch_size, - drop_incomplete_batch=drop_incomplete_batch, - multiproc_kwargs=multiproc_kwargs, - ) - - def setup(self, stage: str): - # Instantiate other datamodules and get their datasets - module_args = { - "train_batch_size": self.train_batch_size, - "predict_batch_size": self.predict_batch_size, - "drop_incomplete_batch": self.drop_incomplete_batch, - "multiproc_kwargs": self.multiproc_kwargs, - } - - mc = get_dataset_from_module(mc_datamodule, stage, **module_args) - ch = get_dataset_from_module(ch_datamodule, stage, **module_args) - - # Combine datasets - self.dataset = {} - self.dataset["__train__"] = ConcatDataset( - [mc["__train__"], ch["__train__"]] - ) - self.dataset["train"] = ConcatDataset([mc["train"], ch["train"]]) - self.dataset["__valid__"] = ConcatDataset( - [mc["__valid__"], ch["__valid__"]] - ) - self.dataset["validation"] = ConcatDataset( - [mc["validation"], ch["validation"]] - ) - self.dataset["test"] = ConcatDataset([mc["test"], ch["test"]]) - - ( - self.train_dataset, - self.validation_dataset, - self.extra_validation_datasets, - self.predict_dataset, - ) = return_subsets(self.dataset) - - -datamodule = DefaultModule diff --git a/src/ptbench/data/mc_ch/fold_8_rgb.py b/src/ptbench/data/mc_ch/fold_8_rgb.py deleted file mode 100644 index c91e8a322653fd7c8f63d7064da382f3a3253f1e..0000000000000000000000000000000000000000 --- a/src/ptbench/data/mc_ch/fold_8_rgb.py +++ /dev/null @@ -1,73 +0,0 @@ -# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""Aggregated dataset composed of Montgomery and Shenzhen datasets (cross -validation fold 8, RGB)""" - -from clapper.logging import setup -from torch.utils.data.dataset import ConcatDataset - -from .. import return_subsets -from ..base_datamodule import BaseDataModule, get_dataset_from_module -from ..montgomery.fold_8_rgb import datamodule as mc_datamodule -from ..shenzhen.fold_8_rgb import datamodule as ch_datamodule - -logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") - - -class DefaultModule(BaseDataModule): - def __init__( - self, - train_batch_size=1, - predict_batch_size=1, - drop_incomplete_batch=False, - multiproc_kwargs=None, - ): - self.train_batch_size = train_batch_size - self.predict_batch_size = predict_batch_size - self.drop_incomplete_batch = drop_incomplete_batch - self.multiproc_kwargs = multiproc_kwargs - - super().__init__( - train_batch_size=train_batch_size, - predict_batch_size=predict_batch_size, - drop_incomplete_batch=drop_incomplete_batch, - multiproc_kwargs=multiproc_kwargs, - ) - - def setup(self, stage: str): - # Instantiate other datamodules and get their datasets - module_args = { - "train_batch_size": self.train_batch_size, - "predict_batch_size": self.predict_batch_size, - "drop_incomplete_batch": self.drop_incomplete_batch, - "multiproc_kwargs": self.multiproc_kwargs, - } - - mc = get_dataset_from_module(mc_datamodule, stage, **module_args) - ch = get_dataset_from_module(ch_datamodule, stage, **module_args) - - # Combine datasets - self.dataset = {} - self.dataset["__train__"] = ConcatDataset( - [mc["__train__"], ch["__train__"]] - ) - self.dataset["train"] = ConcatDataset([mc["train"], ch["train"]]) - self.dataset["__valid__"] = ConcatDataset( - [mc["__valid__"], ch["__valid__"]] - ) - self.dataset["validation"] = ConcatDataset( - [mc["validation"], ch["validation"]] - ) - self.dataset["test"] = ConcatDataset([mc["test"], ch["test"]]) - - ( - self.train_dataset, - self.validation_dataset, - self.extra_validation_datasets, - self.predict_dataset, - ) = return_subsets(self.dataset) - - -datamodule = DefaultModule diff --git a/src/ptbench/data/mc_ch/fold_9_rgb.py b/src/ptbench/data/mc_ch/fold_9_rgb.py deleted file mode 100644 index db847b2256407ad1d4f5542e04100578f02b18f6..0000000000000000000000000000000000000000 --- a/src/ptbench/data/mc_ch/fold_9_rgb.py +++ /dev/null @@ -1,73 +0,0 @@ -# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""Aggregated dataset composed of Montgomery and Shenzhen datasets (cross -validation fold 9, RGB)""" - -from clapper.logging import setup -from torch.utils.data.dataset import ConcatDataset - -from .. import return_subsets -from ..base_datamodule import BaseDataModule, get_dataset_from_module -from ..montgomery.fold_9_rgb import datamodule as mc_datamodule -from ..shenzhen.fold_9_rgb import datamodule as ch_datamodule - -logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") - - -class DefaultModule(BaseDataModule): - def __init__( - self, - train_batch_size=1, - predict_batch_size=1, - drop_incomplete_batch=False, - multiproc_kwargs=None, - ): - self.train_batch_size = train_batch_size - self.predict_batch_size = predict_batch_size - self.drop_incomplete_batch = drop_incomplete_batch - self.multiproc_kwargs = multiproc_kwargs - - super().__init__( - train_batch_size=train_batch_size, - predict_batch_size=predict_batch_size, - drop_incomplete_batch=drop_incomplete_batch, - multiproc_kwargs=multiproc_kwargs, - ) - - def setup(self, stage: str): - # Instantiate other datamodules and get their datasets - module_args = { - "train_batch_size": self.train_batch_size, - "predict_batch_size": self.predict_batch_size, - "drop_incomplete_batch": self.drop_incomplete_batch, - "multiproc_kwargs": self.multiproc_kwargs, - } - - mc = get_dataset_from_module(mc_datamodule, stage, **module_args) - ch = get_dataset_from_module(ch_datamodule, stage, **module_args) - - # Combine datasets - self.dataset = {} - self.dataset["__train__"] = ConcatDataset( - [mc["__train__"], ch["__train__"]] - ) - self.dataset["train"] = ConcatDataset([mc["train"], ch["train"]]) - self.dataset["__valid__"] = ConcatDataset( - [mc["__valid__"], ch["__valid__"]] - ) - self.dataset["validation"] = ConcatDataset( - [mc["validation"], ch["validation"]] - ) - self.dataset["test"] = ConcatDataset([mc["test"], ch["test"]]) - - ( - self.train_dataset, - self.validation_dataset, - self.extra_validation_datasets, - self.predict_dataset, - ) = return_subsets(self.dataset) - - -datamodule = DefaultModule diff --git a/src/ptbench/data/mc_ch_in/fold_0_rgb.py b/src/ptbench/data/mc_ch_in/fold_0_rgb.py deleted file mode 100644 index 7d7a1fed0a26c70dc4e771efcb05210e15414b34..0000000000000000000000000000000000000000 --- a/src/ptbench/data/mc_ch_in/fold_0_rgb.py +++ /dev/null @@ -1,82 +0,0 @@ -# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""Aggregated dataset composed of Montgomery, Shenzhen and Indian datasets -(cross validation fold 0, RGB)""" - -from clapper.logging import setup -from torch.utils.data.dataset import ConcatDataset - -from .. import return_subsets -from ..base_datamodule import BaseDataModule, get_dataset_from_module -from ..indian.fold_0_rgb import datamodule as indian_datamodule -from ..montgomery.fold_0_rgb import datamodule as mc_datamodule -from ..shenzhen.fold_0_rgb import datamodule as ch_datamodule - -logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") - - -class DefaultModule(BaseDataModule): - def __init__( - self, - train_batch_size=1, - predict_batch_size=1, - drop_incomplete_batch=False, - multiproc_kwargs=None, - ): - self.train_batch_size = train_batch_size - self.predict_batch_size = predict_batch_size - self.drop_incomplete_batch = drop_incomplete_batch - self.multiproc_kwargs = multiproc_kwargs - - super().__init__( - train_batch_size=train_batch_size, - predict_batch_size=predict_batch_size, - drop_incomplete_batch=drop_incomplete_batch, - multiproc_kwargs=multiproc_kwargs, - ) - - def setup(self, stage: str): - # Instantiate other datamodules and get their datasets - - module_args = { - "train_batch_size": self.train_batch_size, - "predict_batch_size": self.predict_batch_size, - "drop_incomplete_batch": self.drop_incomplete_batch, - "multiproc_kwargs": self.multiproc_kwargs, - } - - mc = get_dataset_from_module(mc_datamodule, stage, **module_args) - ch = get_dataset_from_module(ch_datamodule, stage, **module_args) - indian = get_dataset_from_module( - indian_datamodule, stage, **module_args - ) - - # Combine datasets - self.dataset = {} - self.dataset["__train__"] = ConcatDataset( - [mc["__train__"], ch["__train__"], indian["__train__"]] - ) - self.dataset["train"] = ConcatDataset( - [mc["train"], ch["train"], indian["train"]] - ) - self.dataset["__valid__"] = ConcatDataset( - [mc["__valid__"], ch["__valid__"], indian["__valid__"]] - ) - self.dataset["validation"] = ConcatDataset( - [mc["validation"], ch["validation"], indian["validation"]] - ) - self.dataset["test"] = ConcatDataset( - [mc["test"], ch["test"], indian["test"]] - ) - - ( - self.train_dataset, - self.validation_dataset, - self.extra_validation_datasets, - self.predict_dataset, - ) = return_subsets(self.dataset) - - -datamodule = DefaultModule diff --git a/src/ptbench/data/mc_ch_in/fold_1_rgb.py b/src/ptbench/data/mc_ch_in/fold_1_rgb.py deleted file mode 100644 index c7d194711aca1000896a05240ee02fcfda3ad331..0000000000000000000000000000000000000000 --- a/src/ptbench/data/mc_ch_in/fold_1_rgb.py +++ /dev/null @@ -1,82 +0,0 @@ -# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""Aggregated dataset composed of Montgomery, Shenzhen and Indian datasets -(cross validation fold 1, RGB)""" - -from clapper.logging import setup -from torch.utils.data.dataset import ConcatDataset - -from .. import return_subsets -from ..base_datamodule import BaseDataModule, get_dataset_from_module -from ..indian.fold_1_rgb import datamodule as indian_datamodule -from ..montgomery.fold_1_rgb import datamodule as mc_datamodule -from ..shenzhen.fold_1_rgb import datamodule as ch_datamodule - -logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") - - -class DefaultModule(BaseDataModule): - def __init__( - self, - train_batch_size=1, - predict_batch_size=1, - drop_incomplete_batch=False, - multiproc_kwargs=None, - ): - self.train_batch_size = train_batch_size - self.predict_batch_size = predict_batch_size - self.drop_incomplete_batch = drop_incomplete_batch - self.multiproc_kwargs = multiproc_kwargs - - super().__init__( - train_batch_size=train_batch_size, - predict_batch_size=predict_batch_size, - drop_incomplete_batch=drop_incomplete_batch, - multiproc_kwargs=multiproc_kwargs, - ) - - def setup(self, stage: str): - # Instantiate other datamodules and get their datasets - - module_args = { - "train_batch_size": self.train_batch_size, - "predict_batch_size": self.predict_batch_size, - "drop_incomplete_batch": self.drop_incomplete_batch, - "multiproc_kwargs": self.multiproc_kwargs, - } - - mc = get_dataset_from_module(mc_datamodule, stage, **module_args) - ch = get_dataset_from_module(ch_datamodule, stage, **module_args) - indian = get_dataset_from_module( - indian_datamodule, stage, **module_args - ) - - # Combine datasets - self.dataset = {} - self.dataset["__train__"] = ConcatDataset( - [mc["__train__"], ch["__train__"], indian["__train__"]] - ) - self.dataset["train"] = ConcatDataset( - [mc["train"], ch["train"], indian["train"]] - ) - self.dataset["__valid__"] = ConcatDataset( - [mc["__valid__"], ch["__valid__"], indian["__valid__"]] - ) - self.dataset["validation"] = ConcatDataset( - [mc["validation"], ch["validation"], indian["validation"]] - ) - self.dataset["test"] = ConcatDataset( - [mc["test"], ch["test"], indian["test"]] - ) - - ( - self.train_dataset, - self.validation_dataset, - self.extra_validation_datasets, - self.predict_dataset, - ) = return_subsets(self.dataset) - - -datamodule = DefaultModule diff --git a/src/ptbench/data/mc_ch_in/fold_2_rgb.py b/src/ptbench/data/mc_ch_in/fold_2_rgb.py deleted file mode 100644 index f2bdebffb373f788e10ccae760bdd487b632066e..0000000000000000000000000000000000000000 --- a/src/ptbench/data/mc_ch_in/fold_2_rgb.py +++ /dev/null @@ -1,82 +0,0 @@ -# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""Aggregated dataset composed of Montgomery, Shenzhen and Indian datasets -(cross validation fold 2, RGB)""" - -from clapper.logging import setup -from torch.utils.data.dataset import ConcatDataset - -from .. import return_subsets -from ..base_datamodule import BaseDataModule, get_dataset_from_module -from ..indian.fold_2_rgb import datamodule as indian_datamodule -from ..montgomery.fold_2_rgb import datamodule as mc_datamodule -from ..shenzhen.fold_2_rgb import datamodule as ch_datamodule - -logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") - - -class DefaultModule(BaseDataModule): - def __init__( - self, - train_batch_size=1, - predict_batch_size=1, - drop_incomplete_batch=False, - multiproc_kwargs=None, - ): - self.train_batch_size = train_batch_size - self.predict_batch_size = predict_batch_size - self.drop_incomplete_batch = drop_incomplete_batch - self.multiproc_kwargs = multiproc_kwargs - - super().__init__( - train_batch_size=train_batch_size, - predict_batch_size=predict_batch_size, - drop_incomplete_batch=drop_incomplete_batch, - multiproc_kwargs=multiproc_kwargs, - ) - - def setup(self, stage: str): - # Instantiate other datamodules and get their datasets - - module_args = { - "train_batch_size": self.train_batch_size, - "predict_batch_size": self.predict_batch_size, - "drop_incomplete_batch": self.drop_incomplete_batch, - "multiproc_kwargs": self.multiproc_kwargs, - } - - mc = get_dataset_from_module(mc_datamodule, stage, **module_args) - ch = get_dataset_from_module(ch_datamodule, stage, **module_args) - indian = get_dataset_from_module( - indian_datamodule, stage, **module_args - ) - - # Combine datasets - self.dataset = {} - self.dataset["__train__"] = ConcatDataset( - [mc["__train__"], ch["__train__"], indian["__train__"]] - ) - self.dataset["train"] = ConcatDataset( - [mc["train"], ch["train"], indian["train"]] - ) - self.dataset["__valid__"] = ConcatDataset( - [mc["__valid__"], ch["__valid__"], indian["__valid__"]] - ) - self.dataset["validation"] = ConcatDataset( - [mc["validation"], ch["validation"], indian["validation"]] - ) - self.dataset["test"] = ConcatDataset( - [mc["test"], ch["test"], indian["test"]] - ) - - ( - self.train_dataset, - self.validation_dataset, - self.extra_validation_datasets, - self.predict_dataset, - ) = return_subsets(self.dataset) - - -datamodule = DefaultModule diff --git a/src/ptbench/data/mc_ch_in/fold_3_rgb.py b/src/ptbench/data/mc_ch_in/fold_3_rgb.py deleted file mode 100644 index 4646b1c8c7cb36739d79cce1a8e9cc7c4b66962a..0000000000000000000000000000000000000000 --- a/src/ptbench/data/mc_ch_in/fold_3_rgb.py +++ /dev/null @@ -1,82 +0,0 @@ -# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""Aggregated dataset composed of Montgomery, Shenzhen and Indian datasets -(cross validation fold 3, RGB)""" - -from clapper.logging import setup -from torch.utils.data.dataset import ConcatDataset - -from .. import return_subsets -from ..base_datamodule import BaseDataModule, get_dataset_from_module -from ..indian.fold_3_rgb import datamodule as indian_datamodule -from ..montgomery.fold_3_rgb import datamodule as mc_datamodule -from ..shenzhen.fold_3_rgb import datamodule as ch_datamodule - -logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") - - -class DefaultModule(BaseDataModule): - def __init__( - self, - train_batch_size=1, - predict_batch_size=1, - drop_incomplete_batch=False, - multiproc_kwargs=None, - ): - self.train_batch_size = train_batch_size - self.predict_batch_size = predict_batch_size - self.drop_incomplete_batch = drop_incomplete_batch - self.multiproc_kwargs = multiproc_kwargs - - super().__init__( - train_batch_size=train_batch_size, - predict_batch_size=predict_batch_size, - drop_incomplete_batch=drop_incomplete_batch, - multiproc_kwargs=multiproc_kwargs, - ) - - def setup(self, stage: str): - # Instantiate other datamodules and get their datasets - - module_args = { - "train_batch_size": self.train_batch_size, - "predict_batch_size": self.predict_batch_size, - "drop_incomplete_batch": self.drop_incomplete_batch, - "multiproc_kwargs": self.multiproc_kwargs, - } - - mc = get_dataset_from_module(mc_datamodule, stage, **module_args) - ch = get_dataset_from_module(ch_datamodule, stage, **module_args) - indian = get_dataset_from_module( - indian_datamodule, stage, **module_args - ) - - # Combine datasets - self.dataset = {} - self.dataset["__train__"] = ConcatDataset( - [mc["__train__"], ch["__train__"], indian["__train__"]] - ) - self.dataset["train"] = ConcatDataset( - [mc["train"], ch["train"], indian["train"]] - ) - self.dataset["__valid__"] = ConcatDataset( - [mc["__valid__"], ch["__valid__"], indian["__valid__"]] - ) - self.dataset["validation"] = ConcatDataset( - [mc["validation"], ch["validation"], indian["validation"]] - ) - self.dataset["test"] = ConcatDataset( - [mc["test"], ch["test"], indian["test"]] - ) - - ( - self.train_dataset, - self.validation_dataset, - self.extra_validation_datasets, - self.predict_dataset, - ) = return_subsets(self.dataset) - - -datamodule = DefaultModule diff --git a/src/ptbench/data/mc_ch_in/fold_4_rgb.py b/src/ptbench/data/mc_ch_in/fold_4_rgb.py deleted file mode 100644 index 118fd6234cf7a649c55011a48859c5b426b92734..0000000000000000000000000000000000000000 --- a/src/ptbench/data/mc_ch_in/fold_4_rgb.py +++ /dev/null @@ -1,82 +0,0 @@ -# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""Aggregated dataset composed of Montgomery, Shenzhen and Indian datasets -(cross validation fold 4, RGB)""" - -from clapper.logging import setup -from torch.utils.data.dataset import ConcatDataset - -from .. import return_subsets -from ..base_datamodule import BaseDataModule, get_dataset_from_module -from ..indian.fold_4_rgb import datamodule as indian_datamodule -from ..montgomery.fold_4_rgb import datamodule as mc_datamodule -from ..shenzhen.fold_4_rgb import datamodule as ch_datamodule - -logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") - - -class DefaultModule(BaseDataModule): - def __init__( - self, - train_batch_size=1, - predict_batch_size=1, - drop_incomplete_batch=False, - multiproc_kwargs=None, - ): - self.train_batch_size = train_batch_size - self.predict_batch_size = predict_batch_size - self.drop_incomplete_batch = drop_incomplete_batch - self.multiproc_kwargs = multiproc_kwargs - - super().__init__( - train_batch_size=train_batch_size, - predict_batch_size=predict_batch_size, - drop_incomplete_batch=drop_incomplete_batch, - multiproc_kwargs=multiproc_kwargs, - ) - - def setup(self, stage: str): - # Instantiate other datamodules and get their datasets - - module_args = { - "train_batch_size": self.train_batch_size, - "predict_batch_size": self.predict_batch_size, - "drop_incomplete_batch": self.drop_incomplete_batch, - "multiproc_kwargs": self.multiproc_kwargs, - } - - mc = get_dataset_from_module(mc_datamodule, stage, **module_args) - ch = get_dataset_from_module(ch_datamodule, stage, **module_args) - indian = get_dataset_from_module( - indian_datamodule, stage, **module_args - ) - - # Combine datasets - self.dataset = {} - self.dataset["__train__"] = ConcatDataset( - [mc["__train__"], ch["__train__"], indian["__train__"]] - ) - self.dataset["train"] = ConcatDataset( - [mc["train"], ch["train"], indian["train"]] - ) - self.dataset["__valid__"] = ConcatDataset( - [mc["__valid__"], ch["__valid__"], indian["__valid__"]] - ) - self.dataset["validation"] = ConcatDataset( - [mc["validation"], ch["validation"], indian["validation"]] - ) - self.dataset["test"] = ConcatDataset( - [mc["test"], ch["test"], indian["test"]] - ) - - ( - self.train_dataset, - self.validation_dataset, - self.extra_validation_datasets, - self.predict_dataset, - ) = return_subsets(self.dataset) - - -datamodule = DefaultModule diff --git a/src/ptbench/data/mc_ch_in/fold_5_rgb.py b/src/ptbench/data/mc_ch_in/fold_5_rgb.py deleted file mode 100644 index adcc5a6e4063fb3e3a834cb206a3af6ee3b4ec4e..0000000000000000000000000000000000000000 --- a/src/ptbench/data/mc_ch_in/fold_5_rgb.py +++ /dev/null @@ -1,82 +0,0 @@ -# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""Aggregated dataset composed of Montgomery, Shenzhen and Indian datasets -(cross validation fold 5, RGB)""" - -from clapper.logging import setup -from torch.utils.data.dataset import ConcatDataset - -from .. import return_subsets -from ..base_datamodule import BaseDataModule, get_dataset_from_module -from ..indian.fold_5_rgb import datamodule as indian_datamodule -from ..montgomery.fold_5_rgb import datamodule as mc_datamodule -from ..shenzhen.fold_5_rgb import datamodule as ch_datamodule - -logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") - - -class DefaultModule(BaseDataModule): - def __init__( - self, - train_batch_size=1, - predict_batch_size=1, - drop_incomplete_batch=False, - multiproc_kwargs=None, - ): - self.train_batch_size = train_batch_size - self.predict_batch_size = predict_batch_size - self.drop_incomplete_batch = drop_incomplete_batch - self.multiproc_kwargs = multiproc_kwargs - - super().__init__( - train_batch_size=train_batch_size, - predict_batch_size=predict_batch_size, - drop_incomplete_batch=drop_incomplete_batch, - multiproc_kwargs=multiproc_kwargs, - ) - - def setup(self, stage: str): - # Instantiate other datamodules and get their datasets - - module_args = { - "train_batch_size": self.train_batch_size, - "predict_batch_size": self.predict_batch_size, - "drop_incomplete_batch": self.drop_incomplete_batch, - "multiproc_kwargs": self.multiproc_kwargs, - } - - mc = get_dataset_from_module(mc_datamodule, stage, **module_args) - ch = get_dataset_from_module(ch_datamodule, stage, **module_args) - indian = get_dataset_from_module( - indian_datamodule, stage, **module_args - ) - - # Combine datasets - self.dataset = {} - self.dataset["__train__"] = ConcatDataset( - [mc["__train__"], ch["__train__"], indian["__train__"]] - ) - self.dataset["train"] = ConcatDataset( - [mc["train"], ch["train"], indian["train"]] - ) - self.dataset["__valid__"] = ConcatDataset( - [mc["__valid__"], ch["__valid__"], indian["__valid__"]] - ) - self.dataset["validation"] = ConcatDataset( - [mc["validation"], ch["validation"], indian["validation"]] - ) - self.dataset["test"] = ConcatDataset( - [mc["test"], ch["test"], indian["test"]] - ) - - ( - self.train_dataset, - self.validation_dataset, - self.extra_validation_datasets, - self.predict_dataset, - ) = return_subsets(self.dataset) - - -datamodule = DefaultModule diff --git a/src/ptbench/data/mc_ch_in/fold_6_rgb.py b/src/ptbench/data/mc_ch_in/fold_6_rgb.py deleted file mode 100644 index 3f5e8f823221488f980285f828f306eb7461cf2a..0000000000000000000000000000000000000000 --- a/src/ptbench/data/mc_ch_in/fold_6_rgb.py +++ /dev/null @@ -1,82 +0,0 @@ -# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""Aggregated dataset composed of Montgomery, Shenzhen and Indian datasets -(cross validation fold 6, RGB)""" - -from clapper.logging import setup -from torch.utils.data.dataset import ConcatDataset - -from .. import return_subsets -from ..base_datamodule import BaseDataModule, get_dataset_from_module -from ..indian.fold_6_rgb import datamodule as indian_datamodule -from ..montgomery.fold_6_rgb import datamodule as mc_datamodule -from ..shenzhen.fold_6_rgb import datamodule as ch_datamodule - -logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") - - -class DefaultModule(BaseDataModule): - def __init__( - self, - train_batch_size=1, - predict_batch_size=1, - drop_incomplete_batch=False, - multiproc_kwargs=None, - ): - self.train_batch_size = train_batch_size - self.predict_batch_size = predict_batch_size - self.drop_incomplete_batch = drop_incomplete_batch - self.multiproc_kwargs = multiproc_kwargs - - super().__init__( - train_batch_size=train_batch_size, - predict_batch_size=predict_batch_size, - drop_incomplete_batch=drop_incomplete_batch, - multiproc_kwargs=multiproc_kwargs, - ) - - def setup(self, stage: str): - # Instantiate other datamodules and get their datasets - - module_args = { - "train_batch_size": self.train_batch_size, - "predict_batch_size": self.predict_batch_size, - "drop_incomplete_batch": self.drop_incomplete_batch, - "multiproc_kwargs": self.multiproc_kwargs, - } - - mc = get_dataset_from_module(mc_datamodule, stage, **module_args) - ch = get_dataset_from_module(ch_datamodule, stage, **module_args) - indian = get_dataset_from_module( - indian_datamodule, stage, **module_args - ) - - # Combine datasets - self.dataset = {} - self.dataset["__train__"] = ConcatDataset( - [mc["__train__"], ch["__train__"], indian["__train__"]] - ) - self.dataset["train"] = ConcatDataset( - [mc["train"], ch["train"], indian["train"]] - ) - self.dataset["__valid__"] = ConcatDataset( - [mc["__valid__"], ch["__valid__"], indian["__valid__"]] - ) - self.dataset["validation"] = ConcatDataset( - [mc["validation"], ch["validation"], indian["validation"]] - ) - self.dataset["test"] = ConcatDataset( - [mc["test"], ch["test"], indian["test"]] - ) - - ( - self.train_dataset, - self.validation_dataset, - self.extra_validation_datasets, - self.predict_dataset, - ) = return_subsets(self.dataset) - - -datamodule = DefaultModule diff --git a/src/ptbench/data/mc_ch_in/fold_7_rgb.py b/src/ptbench/data/mc_ch_in/fold_7_rgb.py deleted file mode 100644 index b4949143d72cd8a4d481ffd94fd1d65586cc1b5e..0000000000000000000000000000000000000000 --- a/src/ptbench/data/mc_ch_in/fold_7_rgb.py +++ /dev/null @@ -1,82 +0,0 @@ -# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""Aggregated dataset composed of Montgomery, Shenzhen and Indian datasets -(cross validation fold 7, RGB)""" - -from clapper.logging import setup -from torch.utils.data.dataset import ConcatDataset - -from .. import return_subsets -from ..base_datamodule import BaseDataModule, get_dataset_from_module -from ..indian.fold_7_rgb import datamodule as indian_datamodule -from ..montgomery.fold_7_rgb import datamodule as mc_datamodule -from ..shenzhen.fold_7_rgb import datamodule as ch_datamodule - -logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") - - -class DefaultModule(BaseDataModule): - def __init__( - self, - train_batch_size=1, - predict_batch_size=1, - drop_incomplete_batch=False, - multiproc_kwargs=None, - ): - self.train_batch_size = train_batch_size - self.predict_batch_size = predict_batch_size - self.drop_incomplete_batch = drop_incomplete_batch - self.multiproc_kwargs = multiproc_kwargs - - super().__init__( - train_batch_size=train_batch_size, - predict_batch_size=predict_batch_size, - drop_incomplete_batch=drop_incomplete_batch, - multiproc_kwargs=multiproc_kwargs, - ) - - def setup(self, stage: str): - # Instantiate other datamodules and get their datasets - - module_args = { - "train_batch_size": self.train_batch_size, - "predict_batch_size": self.predict_batch_size, - "drop_incomplete_batch": self.drop_incomplete_batch, - "multiproc_kwargs": self.multiproc_kwargs, - } - - mc = get_dataset_from_module(mc_datamodule, stage, **module_args) - ch = get_dataset_from_module(ch_datamodule, stage, **module_args) - indian = get_dataset_from_module( - indian_datamodule, stage, **module_args - ) - - # Combine datasets - self.dataset = {} - self.dataset["__train__"] = ConcatDataset( - [mc["__train__"], ch["__train__"], indian["__train__"]] - ) - self.dataset["train"] = ConcatDataset( - [mc["train"], ch["train"], indian["train"]] - ) - self.dataset["__valid__"] = ConcatDataset( - [mc["__valid__"], ch["__valid__"], indian["__valid__"]] - ) - self.dataset["validation"] = ConcatDataset( - [mc["validation"], ch["validation"], indian["validation"]] - ) - self.dataset["test"] = ConcatDataset( - [mc["test"], ch["test"], indian["test"]] - ) - - ( - self.train_dataset, - self.validation_dataset, - self.extra_validation_datasets, - self.predict_dataset, - ) = return_subsets(self.dataset) - - -datamodule = DefaultModule diff --git a/src/ptbench/data/mc_ch_in/fold_8_rgb.py b/src/ptbench/data/mc_ch_in/fold_8_rgb.py deleted file mode 100644 index 1f2b28dc06d3866902c7922bb2c04dd39ac3ad18..0000000000000000000000000000000000000000 --- a/src/ptbench/data/mc_ch_in/fold_8_rgb.py +++ /dev/null @@ -1,82 +0,0 @@ -# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""Aggregated dataset composed of Montgomery, Shenzhen and Indian datasets -(cross validation fold 8, RGB)""" - -from clapper.logging import setup -from torch.utils.data.dataset import ConcatDataset - -from .. import return_subsets -from ..base_datamodule import BaseDataModule, get_dataset_from_module -from ..indian.fold_8_rgb import datamodule as indian_datamodule -from ..montgomery.fold_8_rgb import datamodule as mc_datamodule -from ..shenzhen.fold_8_rgb import datamodule as ch_datamodule - -logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") - - -class DefaultModule(BaseDataModule): - def __init__( - self, - train_batch_size=1, - predict_batch_size=1, - drop_incomplete_batch=False, - multiproc_kwargs=None, - ): - self.train_batch_size = train_batch_size - self.predict_batch_size = predict_batch_size - self.drop_incomplete_batch = drop_incomplete_batch - self.multiproc_kwargs = multiproc_kwargs - - super().__init__( - train_batch_size=train_batch_size, - predict_batch_size=predict_batch_size, - drop_incomplete_batch=drop_incomplete_batch, - multiproc_kwargs=multiproc_kwargs, - ) - - def setup(self, stage: str): - # Instantiate other datamodules and get their datasets - - module_args = { - "train_batch_size": self.train_batch_size, - "predict_batch_size": self.predict_batch_size, - "drop_incomplete_batch": self.drop_incomplete_batch, - "multiproc_kwargs": self.multiproc_kwargs, - } - - mc = get_dataset_from_module(mc_datamodule, stage, **module_args) - ch = get_dataset_from_module(ch_datamodule, stage, **module_args) - indian = get_dataset_from_module( - indian_datamodule, stage, **module_args - ) - - # Combine datasets - self.dataset = {} - self.dataset["__train__"] = ConcatDataset( - [mc["__train__"], ch["__train__"], indian["__train__"]] - ) - self.dataset["train"] = ConcatDataset( - [mc["train"], ch["train"], indian["train"]] - ) - self.dataset["__valid__"] = ConcatDataset( - [mc["__valid__"], ch["__valid__"], indian["__valid__"]] - ) - self.dataset["validation"] = ConcatDataset( - [mc["validation"], ch["validation"], indian["validation"]] - ) - self.dataset["test"] = ConcatDataset( - [mc["test"], ch["test"], indian["test"]] - ) - - ( - self.train_dataset, - self.validation_dataset, - self.extra_validation_datasets, - self.predict_dataset, - ) = return_subsets(self.dataset) - - -datamodule = DefaultModule diff --git a/src/ptbench/data/mc_ch_in/fold_9_rgb.py b/src/ptbench/data/mc_ch_in/fold_9_rgb.py deleted file mode 100644 index 0c79e285ec619cbf3dbfc090ed886637261f8081..0000000000000000000000000000000000000000 --- a/src/ptbench/data/mc_ch_in/fold_9_rgb.py +++ /dev/null @@ -1,82 +0,0 @@ -# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""Aggregated dataset composed of Montgomery, Shenzhen and Indian datasets -(cross validation fold 9, RGB)""" - -from clapper.logging import setup -from torch.utils.data.dataset import ConcatDataset - -from .. import return_subsets -from ..base_datamodule import BaseDataModule, get_dataset_from_module -from ..indian.fold_9_rgb import datamodule as indian_datamodule -from ..montgomery.fold_9_rgb import datamodule as mc_datamodule -from ..shenzhen.fold_9_rgb import datamodule as ch_datamodule - -logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") - - -class DefaultModule(BaseDataModule): - def __init__( - self, - train_batch_size=1, - predict_batch_size=1, - drop_incomplete_batch=False, - multiproc_kwargs=None, - ): - self.train_batch_size = train_batch_size - self.predict_batch_size = predict_batch_size - self.drop_incomplete_batch = drop_incomplete_batch - self.multiproc_kwargs = multiproc_kwargs - - super().__init__( - train_batch_size=train_batch_size, - predict_batch_size=predict_batch_size, - drop_incomplete_batch=drop_incomplete_batch, - multiproc_kwargs=multiproc_kwargs, - ) - - def setup(self, stage: str): - # Instantiate other datamodules and get their datasets - - module_args = { - "train_batch_size": self.train_batch_size, - "predict_batch_size": self.predict_batch_size, - "drop_incomplete_batch": self.drop_incomplete_batch, - "multiproc_kwargs": self.multiproc_kwargs, - } - - mc = get_dataset_from_module(mc_datamodule, stage, **module_args) - ch = get_dataset_from_module(ch_datamodule, stage, **module_args) - indian = get_dataset_from_module( - indian_datamodule, stage, **module_args - ) - - # Combine datasets - self.dataset = {} - self.dataset["__train__"] = ConcatDataset( - [mc["__train__"], ch["__train__"], indian["__train__"]] - ) - self.dataset["train"] = ConcatDataset( - [mc["train"], ch["train"], indian["train"]] - ) - self.dataset["__valid__"] = ConcatDataset( - [mc["__valid__"], ch["__valid__"], indian["__valid__"]] - ) - self.dataset["validation"] = ConcatDataset( - [mc["validation"], ch["validation"], indian["validation"]] - ) - self.dataset["test"] = ConcatDataset( - [mc["test"], ch["test"], indian["test"]] - ) - - ( - self.train_dataset, - self.validation_dataset, - self.extra_validation_datasets, - self.predict_dataset, - ) = return_subsets(self.dataset) - - -datamodule = DefaultModule diff --git a/src/ptbench/data/mc_ch_in_11k/fold_0_rgb.py b/src/ptbench/data/mc_ch_in_11k/fold_0_rgb.py deleted file mode 100644 index 7a03d33958cb790d62e4ac83eb13844be3ca25bf..0000000000000000000000000000000000000000 --- a/src/ptbench/data/mc_ch_in_11k/fold_0_rgb.py +++ /dev/null @@ -1,101 +0,0 @@ -# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""Aggregated dataset composed of Montgomery, Shenzhen, Indian and the default -TBX11K-simplified datasets (cross validation fold 0, RGB)""" - -from clapper.logging import setup -from torch.utils.data.dataset import ConcatDataset - -from .. import return_subsets -from ..base_datamodule import BaseDataModule, get_dataset_from_module -from ..indian.fold_0_rgb import datamodule as indian_datamodule -from ..montgomery.fold_0_rgb import datamodule as mc_datamodule -from ..shenzhen.fold_0_rgb import datamodule as ch_datamodule -from ..tbx11k_simplified.fold_0_rgb import datamodule as tbx11k_datamodule - -logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") - - -class DefaultModule(BaseDataModule): - def __init__( - self, - train_batch_size=1, - predict_batch_size=1, - drop_incomplete_batch=False, - multiproc_kwargs=None, - ): - self.train_batch_size = train_batch_size - self.predict_batch_size = predict_batch_size - self.drop_incomplete_batch = drop_incomplete_batch - self.multiproc_kwargs = multiproc_kwargs - - super().__init__( - train_batch_size=train_batch_size, - predict_batch_size=predict_batch_size, - drop_incomplete_batch=drop_incomplete_batch, - multiproc_kwargs=multiproc_kwargs, - ) - - def setup(self, stage: str): - # Instantiate other datamodules and get their datasets - - module_args = { - "train_batch_size": self.train_batch_size, - "predict_batch_size": self.predict_batch_size, - "drop_incomplete_batch": self.drop_incomplete_batch, - "multiproc_kwargs": self.multiproc_kwargs, - } - - mc = get_dataset_from_module(mc_datamodule, stage, **module_args) - ch = get_dataset_from_module(ch_datamodule, stage, **module_args) - indian = get_dataset_from_module( - indian_datamodule, stage, **module_args - ) - tbx11k = get_dataset_from_module( - tbx11k_datamodule, stage, **module_args - ) - - # Combine datasets - self.dataset = {} - self.dataset["__train__"] = ConcatDataset( - [ - mc["__train__"], - ch["__train__"], - indian["__train__"], - tbx11k["__train__"], - ] - ) - self.dataset["train"] = ConcatDataset( - [mc["train"], ch["train"], indian["train"], tbx11k["train"]] - ) - self.dataset["__valid__"] = ConcatDataset( - [ - mc["__valid__"], - ch["__valid__"], - indian["__valid__"], - tbx11k["__valid__"], - ] - ) - self.dataset["validation"] = ConcatDataset( - [ - mc["validation"], - ch["validation"], - indian["validation"], - tbx11k["validation"], - ] - ) - self.dataset["test"] = ConcatDataset( - [mc["test"], ch["test"], indian["test"], tbx11k["test"]] - ) - - ( - self.train_dataset, - self.validation_dataset, - self.extra_validation_datasets, - self.predict_dataset, - ) = return_subsets(self.dataset) - - -datamodule = DefaultModule diff --git a/src/ptbench/data/mc_ch_in_11k/fold_1_rgb.py b/src/ptbench/data/mc_ch_in_11k/fold_1_rgb.py deleted file mode 100644 index 1fdea4b18d3721053845cf0bf7c830cd2bf6fed6..0000000000000000000000000000000000000000 --- a/src/ptbench/data/mc_ch_in_11k/fold_1_rgb.py +++ /dev/null @@ -1,101 +0,0 @@ -# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""Aggregated dataset composed of Montgomery, Shenzhen, Indian and the default -TBX11K-simplified datasets (cross validation fold 1, RGB)""" - -from clapper.logging import setup -from torch.utils.data.dataset import ConcatDataset - -from .. import return_subsets -from ..base_datamodule import BaseDataModule, get_dataset_from_module -from ..indian.fold_1_rgb import datamodule as indian_datamodule -from ..montgomery.fold_1_rgb import datamodule as mc_datamodule -from ..shenzhen.fold_1_rgb import datamodule as ch_datamodule -from ..tbx11k_simplified.fold_1_rgb import datamodule as tbx11k_datamodule - -logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") - - -class DefaultModule(BaseDataModule): - def __init__( - self, - train_batch_size=1, - predict_batch_size=1, - drop_incomplete_batch=False, - multiproc_kwargs=None, - ): - self.train_batch_size = train_batch_size - self.predict_batch_size = predict_batch_size - self.drop_incomplete_batch = drop_incomplete_batch - self.multiproc_kwargs = multiproc_kwargs - - super().__init__( - train_batch_size=train_batch_size, - predict_batch_size=predict_batch_size, - drop_incomplete_batch=drop_incomplete_batch, - multiproc_kwargs=multiproc_kwargs, - ) - - def setup(self, stage: str): - # Instantiate other datamodules and get their datasets - - module_args = { - "train_batch_size": self.train_batch_size, - "predict_batch_size": self.predict_batch_size, - "drop_incomplete_batch": self.drop_incomplete_batch, - "multiproc_kwargs": self.multiproc_kwargs, - } - - mc = get_dataset_from_module(mc_datamodule, stage, **module_args) - ch = get_dataset_from_module(ch_datamodule, stage, **module_args) - indian = get_dataset_from_module( - indian_datamodule, stage, **module_args - ) - tbx11k = get_dataset_from_module( - tbx11k_datamodule, stage, **module_args - ) - - # Combine datasets - self.dataset = {} - self.dataset["__train__"] = ConcatDataset( - [ - mc["__train__"], - ch["__train__"], - indian["__train__"], - tbx11k["__train__"], - ] - ) - self.dataset["train"] = ConcatDataset( - [mc["train"], ch["train"], indian["train"], tbx11k["train"]] - ) - self.dataset["__valid__"] = ConcatDataset( - [ - mc["__valid__"], - ch["__valid__"], - indian["__valid__"], - tbx11k["__valid__"], - ] - ) - self.dataset["validation"] = ConcatDataset( - [ - mc["validation"], - ch["validation"], - indian["validation"], - tbx11k["validation"], - ] - ) - self.dataset["test"] = ConcatDataset( - [mc["test"], ch["test"], indian["test"], tbx11k["test"]] - ) - - ( - self.train_dataset, - self.validation_dataset, - self.extra_validation_datasets, - self.predict_dataset, - ) = return_subsets(self.dataset) - - -datamodule = DefaultModule diff --git a/src/ptbench/data/mc_ch_in_11k/fold_2_rgb.py b/src/ptbench/data/mc_ch_in_11k/fold_2_rgb.py deleted file mode 100644 index b4bd35f00113478bcb61cc566af0aa8eb86c0027..0000000000000000000000000000000000000000 --- a/src/ptbench/data/mc_ch_in_11k/fold_2_rgb.py +++ /dev/null @@ -1,101 +0,0 @@ -# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""Aggregated dataset composed of Montgomery, Shenzhen, Indian and the default -TBX11K-simplified datasets (cross validation fold 2, RGB)""" - -from clapper.logging import setup -from torch.utils.data.dataset import ConcatDataset - -from .. import return_subsets -from ..base_datamodule import BaseDataModule, get_dataset_from_module -from ..indian.fold_2_rgb import datamodule as indian_datamodule -from ..montgomery.fold_2_rgb import datamodule as mc_datamodule -from ..shenzhen.fold_2_rgb import datamodule as ch_datamodule -from ..tbx11k_simplified.fold_2_rgb import datamodule as tbx11k_datamodule - -logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") - - -class DefaultModule(BaseDataModule): - def __init__( - self, - train_batch_size=1, - predict_batch_size=1, - drop_incomplete_batch=False, - multiproc_kwargs=None, - ): - self.train_batch_size = train_batch_size - self.predict_batch_size = predict_batch_size - self.drop_incomplete_batch = drop_incomplete_batch - self.multiproc_kwargs = multiproc_kwargs - - super().__init__( - train_batch_size=train_batch_size, - predict_batch_size=predict_batch_size, - drop_incomplete_batch=drop_incomplete_batch, - multiproc_kwargs=multiproc_kwargs, - ) - - def setup(self, stage: str): - # Instantiate other datamodules and get their datasets - - module_args = { - "train_batch_size": self.train_batch_size, - "predict_batch_size": self.predict_batch_size, - "drop_incomplete_batch": self.drop_incomplete_batch, - "multiproc_kwargs": self.multiproc_kwargs, - } - - mc = get_dataset_from_module(mc_datamodule, stage, **module_args) - ch = get_dataset_from_module(ch_datamodule, stage, **module_args) - indian = get_dataset_from_module( - indian_datamodule, stage, **module_args - ) - tbx11k = get_dataset_from_module( - tbx11k_datamodule, stage, **module_args - ) - - # Combine datasets - self.dataset = {} - self.dataset["__train__"] = ConcatDataset( - [ - mc["__train__"], - ch["__train__"], - indian["__train__"], - tbx11k["__train__"], - ] - ) - self.dataset["train"] = ConcatDataset( - [mc["train"], ch["train"], indian["train"], tbx11k["train"]] - ) - self.dataset["__valid__"] = ConcatDataset( - [ - mc["__valid__"], - ch["__valid__"], - indian["__valid__"], - tbx11k["__valid__"], - ] - ) - self.dataset["validation"] = ConcatDataset( - [ - mc["validation"], - ch["validation"], - indian["validation"], - tbx11k["validation"], - ] - ) - self.dataset["test"] = ConcatDataset( - [mc["test"], ch["test"], indian["test"], tbx11k["test"]] - ) - - ( - self.train_dataset, - self.validation_dataset, - self.extra_validation_datasets, - self.predict_dataset, - ) = return_subsets(self.dataset) - - -datamodule = DefaultModule diff --git a/src/ptbench/data/mc_ch_in_11k/fold_3_rgb.py b/src/ptbench/data/mc_ch_in_11k/fold_3_rgb.py deleted file mode 100644 index 28cce948da793f97b3fa654cf41df72eee7d6392..0000000000000000000000000000000000000000 --- a/src/ptbench/data/mc_ch_in_11k/fold_3_rgb.py +++ /dev/null @@ -1,101 +0,0 @@ -# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""Aggregated dataset composed of Montgomery, Shenzhen, Indian and the default -TBX11K-simplified datasets (cross validation fold 3, RGB)""" - -from clapper.logging import setup -from torch.utils.data.dataset import ConcatDataset - -from .. import return_subsets -from ..base_datamodule import BaseDataModule, get_dataset_from_module -from ..indian.fold_3_rgb import datamodule as indian_datamodule -from ..montgomery.fold_3_rgb import datamodule as mc_datamodule -from ..shenzhen.fold_3_rgb import datamodule as ch_datamodule -from ..tbx11k_simplified.fold_3_rgb import datamodule as tbx11k_datamodule - -logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") - - -class DefaultModule(BaseDataModule): - def __init__( - self, - train_batch_size=1, - predict_batch_size=1, - drop_incomplete_batch=False, - multiproc_kwargs=None, - ): - self.train_batch_size = train_batch_size - self.predict_batch_size = predict_batch_size - self.drop_incomplete_batch = drop_incomplete_batch - self.multiproc_kwargs = multiproc_kwargs - - super().__init__( - train_batch_size=train_batch_size, - predict_batch_size=predict_batch_size, - drop_incomplete_batch=drop_incomplete_batch, - multiproc_kwargs=multiproc_kwargs, - ) - - def setup(self, stage: str): - # Instantiate other datamodules and get their datasets - - module_args = { - "train_batch_size": self.train_batch_size, - "predict_batch_size": self.predict_batch_size, - "drop_incomplete_batch": self.drop_incomplete_batch, - "multiproc_kwargs": self.multiproc_kwargs, - } - - mc = get_dataset_from_module(mc_datamodule, stage, **module_args) - ch = get_dataset_from_module(ch_datamodule, stage, **module_args) - indian = get_dataset_from_module( - indian_datamodule, stage, **module_args - ) - tbx11k = get_dataset_from_module( - tbx11k_datamodule, stage, **module_args - ) - - # Combine datasets - self.dataset = {} - self.dataset["__train__"] = ConcatDataset( - [ - mc["__train__"], - ch["__train__"], - indian["__train__"], - tbx11k["__train__"], - ] - ) - self.dataset["train"] = ConcatDataset( - [mc["train"], ch["train"], indian["train"], tbx11k["train"]] - ) - self.dataset["__valid__"] = ConcatDataset( - [ - mc["__valid__"], - ch["__valid__"], - indian["__valid__"], - tbx11k["__valid__"], - ] - ) - self.dataset["validation"] = ConcatDataset( - [ - mc["validation"], - ch["validation"], - indian["validation"], - tbx11k["validation"], - ] - ) - self.dataset["test"] = ConcatDataset( - [mc["test"], ch["test"], indian["test"], tbx11k["test"]] - ) - - ( - self.train_dataset, - self.validation_dataset, - self.extra_validation_datasets, - self.predict_dataset, - ) = return_subsets(self.dataset) - - -datamodule = DefaultModule diff --git a/src/ptbench/data/mc_ch_in_11k/fold_4_rgb.py b/src/ptbench/data/mc_ch_in_11k/fold_4_rgb.py deleted file mode 100644 index bbd6065ee32d93774ea56a538a3bfa160e23b049..0000000000000000000000000000000000000000 --- a/src/ptbench/data/mc_ch_in_11k/fold_4_rgb.py +++ /dev/null @@ -1,101 +0,0 @@ -# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""Aggregated dataset composed of Montgomery, Shenzhen, Indian and the default -TBX11K-simplified datasets (cross validation fold 4, RGB)""" - -from clapper.logging import setup -from torch.utils.data.dataset import ConcatDataset - -from .. import return_subsets -from ..base_datamodule import BaseDataModule, get_dataset_from_module -from ..indian.fold_4_rgb import datamodule as indian_datamodule -from ..montgomery.fold_4_rgb import datamodule as mc_datamodule -from ..shenzhen.fold_4_rgb import datamodule as ch_datamodule -from ..tbx11k_simplified.fold_4_rgb import datamodule as tbx11k_datamodule - -logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") - - -class DefaultModule(BaseDataModule): - def __init__( - self, - train_batch_size=1, - predict_batch_size=1, - drop_incomplete_batch=False, - multiproc_kwargs=None, - ): - self.train_batch_size = train_batch_size - self.predict_batch_size = predict_batch_size - self.drop_incomplete_batch = drop_incomplete_batch - self.multiproc_kwargs = multiproc_kwargs - - super().__init__( - train_batch_size=train_batch_size, - predict_batch_size=predict_batch_size, - drop_incomplete_batch=drop_incomplete_batch, - multiproc_kwargs=multiproc_kwargs, - ) - - def setup(self, stage: str): - # Instantiate other datamodules and get their datasets - - module_args = { - "train_batch_size": self.train_batch_size, - "predict_batch_size": self.predict_batch_size, - "drop_incomplete_batch": self.drop_incomplete_batch, - "multiproc_kwargs": self.multiproc_kwargs, - } - - mc = get_dataset_from_module(mc_datamodule, stage, **module_args) - ch = get_dataset_from_module(ch_datamodule, stage, **module_args) - indian = get_dataset_from_module( - indian_datamodule, stage, **module_args - ) - tbx11k = get_dataset_from_module( - tbx11k_datamodule, stage, **module_args - ) - - # Combine datasets - self.dataset = {} - self.dataset["__train__"] = ConcatDataset( - [ - mc["__train__"], - ch["__train__"], - indian["__train__"], - tbx11k["__train__"], - ] - ) - self.dataset["train"] = ConcatDataset( - [mc["train"], ch["train"], indian["train"], tbx11k["train"]] - ) - self.dataset["__valid__"] = ConcatDataset( - [ - mc["__valid__"], - ch["__valid__"], - indian["__valid__"], - tbx11k["__valid__"], - ] - ) - self.dataset["validation"] = ConcatDataset( - [ - mc["validation"], - ch["validation"], - indian["validation"], - tbx11k["validation"], - ] - ) - self.dataset["test"] = ConcatDataset( - [mc["test"], ch["test"], indian["test"], tbx11k["test"]] - ) - - ( - self.train_dataset, - self.validation_dataset, - self.extra_validation_datasets, - self.predict_dataset, - ) = return_subsets(self.dataset) - - -datamodule = DefaultModule diff --git a/src/ptbench/data/mc_ch_in_11k/fold_5_rgb.py b/src/ptbench/data/mc_ch_in_11k/fold_5_rgb.py deleted file mode 100644 index 6cf1a1fe4b6926d6848f3582fc5681e6057e3948..0000000000000000000000000000000000000000 --- a/src/ptbench/data/mc_ch_in_11k/fold_5_rgb.py +++ /dev/null @@ -1,101 +0,0 @@ -# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""Aggregated dataset composed of Montgomery, Shenzhen, Indian and the default -TBX11K-simplified datasets (cross validation fold 5, RGB)""" - -from clapper.logging import setup -from torch.utils.data.dataset import ConcatDataset - -from .. import return_subsets -from ..base_datamodule import BaseDataModule, get_dataset_from_module -from ..indian.fold_5_rgb import datamodule as indian_datamodule -from ..montgomery.fold_5_rgb import datamodule as mc_datamodule -from ..shenzhen.fold_5_rgb import datamodule as ch_datamodule -from ..tbx11k_simplified.fold_5_rgb import datamodule as tbx11k_datamodule - -logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") - - -class DefaultModule(BaseDataModule): - def __init__( - self, - train_batch_size=1, - predict_batch_size=1, - drop_incomplete_batch=False, - multiproc_kwargs=None, - ): - self.train_batch_size = train_batch_size - self.predict_batch_size = predict_batch_size - self.drop_incomplete_batch = drop_incomplete_batch - self.multiproc_kwargs = multiproc_kwargs - - super().__init__( - train_batch_size=train_batch_size, - predict_batch_size=predict_batch_size, - drop_incomplete_batch=drop_incomplete_batch, - multiproc_kwargs=multiproc_kwargs, - ) - - def setup(self, stage: str): - # Instantiate other datamodules and get their datasets - - module_args = { - "train_batch_size": self.train_batch_size, - "predict_batch_size": self.predict_batch_size, - "drop_incomplete_batch": self.drop_incomplete_batch, - "multiproc_kwargs": self.multiproc_kwargs, - } - - mc = get_dataset_from_module(mc_datamodule, stage, **module_args) - ch = get_dataset_from_module(ch_datamodule, stage, **module_args) - indian = get_dataset_from_module( - indian_datamodule, stage, **module_args - ) - tbx11k = get_dataset_from_module( - tbx11k_datamodule, stage, **module_args - ) - - # Combine datasets - self.dataset = {} - self.dataset["__train__"] = ConcatDataset( - [ - mc["__train__"], - ch["__train__"], - indian["__train__"], - tbx11k["__train__"], - ] - ) - self.dataset["train"] = ConcatDataset( - [mc["train"], ch["train"], indian["train"], tbx11k["train"]] - ) - self.dataset["__valid__"] = ConcatDataset( - [ - mc["__valid__"], - ch["__valid__"], - indian["__valid__"], - tbx11k["__valid__"], - ] - ) - self.dataset["validation"] = ConcatDataset( - [ - mc["validation"], - ch["validation"], - indian["validation"], - tbx11k["validation"], - ] - ) - self.dataset["test"] = ConcatDataset( - [mc["test"], ch["test"], indian["test"], tbx11k["test"]] - ) - - ( - self.train_dataset, - self.validation_dataset, - self.extra_validation_datasets, - self.predict_dataset, - ) = return_subsets(self.dataset) - - -datamodule = DefaultModule diff --git a/src/ptbench/data/mc_ch_in_11k/fold_6_rgb.py b/src/ptbench/data/mc_ch_in_11k/fold_6_rgb.py deleted file mode 100644 index de6abc58b0a4871627ed2973f0b999a0d620d2f9..0000000000000000000000000000000000000000 --- a/src/ptbench/data/mc_ch_in_11k/fold_6_rgb.py +++ /dev/null @@ -1,101 +0,0 @@ -# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""Aggregated dataset composed of Montgomery, Shenzhen, Indian and the default -TBX11K-simplified datasets (cross validation fold 6, RGB)""" - -from clapper.logging import setup -from torch.utils.data.dataset import ConcatDataset - -from .. import return_subsets -from ..base_datamodule import BaseDataModule, get_dataset_from_module -from ..indian.fold_6_rgb import datamodule as indian_datamodule -from ..montgomery.fold_6_rgb import datamodule as mc_datamodule -from ..shenzhen.fold_5_rgb import datamodule as ch_datamodule -from ..tbx11k_simplified.fold_6_rgb import datamodule as tbx11k_datamodule - -logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") - - -class DefaultModule(BaseDataModule): - def __init__( - self, - train_batch_size=1, - predict_batch_size=1, - drop_incomplete_batch=False, - multiproc_kwargs=None, - ): - self.train_batch_size = train_batch_size - self.predict_batch_size = predict_batch_size - self.drop_incomplete_batch = drop_incomplete_batch - self.multiproc_kwargs = multiproc_kwargs - - super().__init__( - train_batch_size=train_batch_size, - predict_batch_size=predict_batch_size, - drop_incomplete_batch=drop_incomplete_batch, - multiproc_kwargs=multiproc_kwargs, - ) - - def setup(self, stage: str): - # Instantiate other datamodules and get their datasets - - module_args = { - "train_batch_size": self.train_batch_size, - "predict_batch_size": self.predict_batch_size, - "drop_incomplete_batch": self.drop_incomplete_batch, - "multiproc_kwargs": self.multiproc_kwargs, - } - - mc = get_dataset_from_module(mc_datamodule, stage, **module_args) - ch = get_dataset_from_module(ch_datamodule, stage, **module_args) - indian = get_dataset_from_module( - indian_datamodule, stage, **module_args - ) - tbx11k = get_dataset_from_module( - tbx11k_datamodule, stage, **module_args - ) - - # Combine datasets - self.dataset = {} - self.dataset["__train__"] = ConcatDataset( - [ - mc["__train__"], - ch["__train__"], - indian["__train__"], - tbx11k["__train__"], - ] - ) - self.dataset["train"] = ConcatDataset( - [mc["train"], ch["train"], indian["train"], tbx11k["train"]] - ) - self.dataset["__valid__"] = ConcatDataset( - [ - mc["__valid__"], - ch["__valid__"], - indian["__valid__"], - tbx11k["__valid__"], - ] - ) - self.dataset["validation"] = ConcatDataset( - [ - mc["validation"], - ch["validation"], - indian["validation"], - tbx11k["validation"], - ] - ) - self.dataset["test"] = ConcatDataset( - [mc["test"], ch["test"], indian["test"], tbx11k["test"]] - ) - - ( - self.train_dataset, - self.validation_dataset, - self.extra_validation_datasets, - self.predict_dataset, - ) = return_subsets(self.dataset) - - -datamodule = DefaultModule diff --git a/src/ptbench/data/mc_ch_in_11k/fold_7_rgb.py b/src/ptbench/data/mc_ch_in_11k/fold_7_rgb.py deleted file mode 100644 index f163fe7ffaa1a82014b2fedb03dd89a626e541b9..0000000000000000000000000000000000000000 --- a/src/ptbench/data/mc_ch_in_11k/fold_7_rgb.py +++ /dev/null @@ -1,101 +0,0 @@ -# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""Aggregated dataset composed of Montgomery, Shenzhen, Indian and the default -TBX11K-simplified datasets (cross validation fold 7, RGB)""" - -from clapper.logging import setup -from torch.utils.data.dataset import ConcatDataset - -from .. import return_subsets -from ..base_datamodule import BaseDataModule, get_dataset_from_module -from ..indian.fold_7_rgb import datamodule as indian_datamodule -from ..montgomery.fold_7_rgb import datamodule as mc_datamodule -from ..shenzhen.fold_7_rgb import datamodule as ch_datamodule -from ..tbx11k_simplified.fold_7_rgb import datamodule as tbx11k_datamodule - -logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") - - -class DefaultModule(BaseDataModule): - def __init__( - self, - train_batch_size=1, - predict_batch_size=1, - drop_incomplete_batch=False, - multiproc_kwargs=None, - ): - self.train_batch_size = train_batch_size - self.predict_batch_size = predict_batch_size - self.drop_incomplete_batch = drop_incomplete_batch - self.multiproc_kwargs = multiproc_kwargs - - super().__init__( - train_batch_size=train_batch_size, - predict_batch_size=predict_batch_size, - drop_incomplete_batch=drop_incomplete_batch, - multiproc_kwargs=multiproc_kwargs, - ) - - def setup(self, stage: str): - # Instantiate other datamodules and get their datasets - - module_args = { - "train_batch_size": self.train_batch_size, - "predict_batch_size": self.predict_batch_size, - "drop_incomplete_batch": self.drop_incomplete_batch, - "multiproc_kwargs": self.multiproc_kwargs, - } - - mc = get_dataset_from_module(mc_datamodule, stage, **module_args) - ch = get_dataset_from_module(ch_datamodule, stage, **module_args) - indian = get_dataset_from_module( - indian_datamodule, stage, **module_args - ) - tbx11k = get_dataset_from_module( - tbx11k_datamodule, stage, **module_args - ) - - # Combine datasets - self.dataset = {} - self.dataset["__train__"] = ConcatDataset( - [ - mc["__train__"], - ch["__train__"], - indian["__train__"], - tbx11k["__train__"], - ] - ) - self.dataset["train"] = ConcatDataset( - [mc["train"], ch["train"], indian["train"], tbx11k["train"]] - ) - self.dataset["__valid__"] = ConcatDataset( - [ - mc["__valid__"], - ch["__valid__"], - indian["__valid__"], - tbx11k["__valid__"], - ] - ) - self.dataset["validation"] = ConcatDataset( - [ - mc["validation"], - ch["validation"], - indian["validation"], - tbx11k["validation"], - ] - ) - self.dataset["test"] = ConcatDataset( - [mc["test"], ch["test"], indian["test"], tbx11k["test"]] - ) - - ( - self.train_dataset, - self.validation_dataset, - self.extra_validation_datasets, - self.predict_dataset, - ) = return_subsets(self.dataset) - - -datamodule = DefaultModule diff --git a/src/ptbench/data/mc_ch_in_11k/fold_8_rgb.py b/src/ptbench/data/mc_ch_in_11k/fold_8_rgb.py deleted file mode 100644 index 503854beeac9e900c67b1f9bb230032832d25cf1..0000000000000000000000000000000000000000 --- a/src/ptbench/data/mc_ch_in_11k/fold_8_rgb.py +++ /dev/null @@ -1,101 +0,0 @@ -# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""Aggregated dataset composed of Montgomery, Shenzhen, Indian and the default -TBX11K-simplified datasets (cross validation fold 8, RGB)""" - -from clapper.logging import setup -from torch.utils.data.dataset import ConcatDataset - -from .. import return_subsets -from ..base_datamodule import BaseDataModule, get_dataset_from_module -from ..indian.fold_8_rgb import datamodule as indian_datamodule -from ..montgomery.fold_8_rgb import datamodule as mc_datamodule -from ..shenzhen.fold_8_rgb import datamodule as ch_datamodule -from ..tbx11k_simplified.fold_8_rgb import datamodule as tbx11k_datamodule - -logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") - - -class DefaultModule(BaseDataModule): - def __init__( - self, - train_batch_size=1, - predict_batch_size=1, - drop_incomplete_batch=False, - multiproc_kwargs=None, - ): - self.train_batch_size = train_batch_size - self.predict_batch_size = predict_batch_size - self.drop_incomplete_batch = drop_incomplete_batch - self.multiproc_kwargs = multiproc_kwargs - - super().__init__( - train_batch_size=train_batch_size, - predict_batch_size=predict_batch_size, - drop_incomplete_batch=drop_incomplete_batch, - multiproc_kwargs=multiproc_kwargs, - ) - - def setup(self, stage: str): - # Instantiate other datamodules and get their datasets - - module_args = { - "train_batch_size": self.train_batch_size, - "predict_batch_size": self.predict_batch_size, - "drop_incomplete_batch": self.drop_incomplete_batch, - "multiproc_kwargs": self.multiproc_kwargs, - } - - mc = get_dataset_from_module(mc_datamodule, stage, **module_args) - ch = get_dataset_from_module(ch_datamodule, stage, **module_args) - indian = get_dataset_from_module( - indian_datamodule, stage, **module_args - ) - tbx11k = get_dataset_from_module( - tbx11k_datamodule, stage, **module_args - ) - - # Combine datasets - self.dataset = {} - self.dataset["__train__"] = ConcatDataset( - [ - mc["__train__"], - ch["__train__"], - indian["__train__"], - tbx11k["__train__"], - ] - ) - self.dataset["train"] = ConcatDataset( - [mc["train"], ch["train"], indian["train"], tbx11k["train"]] - ) - self.dataset["__valid__"] = ConcatDataset( - [ - mc["__valid__"], - ch["__valid__"], - indian["__valid__"], - tbx11k["__valid__"], - ] - ) - self.dataset["validation"] = ConcatDataset( - [ - mc["validation"], - ch["validation"], - indian["validation"], - tbx11k["validation"], - ] - ) - self.dataset["test"] = ConcatDataset( - [mc["test"], ch["test"], indian["test"], tbx11k["test"]] - ) - - ( - self.train_dataset, - self.validation_dataset, - self.extra_validation_datasets, - self.predict_dataset, - ) = return_subsets(self.dataset) - - -datamodule = DefaultModule diff --git a/src/ptbench/data/mc_ch_in_11k/fold_9_rgb.py b/src/ptbench/data/mc_ch_in_11k/fold_9_rgb.py deleted file mode 100644 index 39bcadec8792861461d8a8f65908a14ba793c6ee..0000000000000000000000000000000000000000 --- a/src/ptbench/data/mc_ch_in_11k/fold_9_rgb.py +++ /dev/null @@ -1,101 +0,0 @@ -# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""Aggregated dataset composed of Montgomery, Shenzhen, Indian and the default -TBX11K-simplified datasets (cross validation fold 9, RGB)""" - -from clapper.logging import setup -from torch.utils.data.dataset import ConcatDataset - -from .. import return_subsets -from ..base_datamodule import BaseDataModule, get_dataset_from_module -from ..indian.fold_9_rgb import datamodule as indian_datamodule -from ..montgomery.fold_9_rgb import datamodule as mc_datamodule -from ..shenzhen.fold_9_rgb import datamodule as ch_datamodule -from ..tbx11k_simplified.fold_9_rgb import datamodule as tbx11k_datamodule - -logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") - - -class DefaultModule(BaseDataModule): - def __init__( - self, - train_batch_size=1, - predict_batch_size=1, - drop_incomplete_batch=False, - multiproc_kwargs=None, - ): - self.train_batch_size = train_batch_size - self.predict_batch_size = predict_batch_size - self.drop_incomplete_batch = drop_incomplete_batch - self.multiproc_kwargs = multiproc_kwargs - - super().__init__( - train_batch_size=train_batch_size, - predict_batch_size=predict_batch_size, - drop_incomplete_batch=drop_incomplete_batch, - multiproc_kwargs=multiproc_kwargs, - ) - - def setup(self, stage: str): - # Instantiate other datamodules and get their datasets - - module_args = { - "train_batch_size": self.train_batch_size, - "predict_batch_size": self.predict_batch_size, - "drop_incomplete_batch": self.drop_incomplete_batch, - "multiproc_kwargs": self.multiproc_kwargs, - } - - mc = get_dataset_from_module(mc_datamodule, stage, **module_args) - ch = get_dataset_from_module(ch_datamodule, stage, **module_args) - indian = get_dataset_from_module( - indian_datamodule, stage, **module_args - ) - tbx11k = get_dataset_from_module( - tbx11k_datamodule, stage, **module_args - ) - - # Combine datasets - self.dataset = {} - self.dataset["__train__"] = ConcatDataset( - [ - mc["__train__"], - ch["__train__"], - indian["__train__"], - tbx11k["__train__"], - ] - ) - self.dataset["train"] = ConcatDataset( - [mc["train"], ch["train"], indian["train"], tbx11k["train"]] - ) - self.dataset["__valid__"] = ConcatDataset( - [ - mc["__valid__"], - ch["__valid__"], - indian["__valid__"], - tbx11k["__valid__"], - ] - ) - self.dataset["validation"] = ConcatDataset( - [ - mc["validation"], - ch["validation"], - indian["validation"], - tbx11k["validation"], - ] - ) - self.dataset["test"] = ConcatDataset( - [mc["test"], ch["test"], indian["test"], tbx11k["test"]] - ) - - ( - self.train_dataset, - self.validation_dataset, - self.extra_validation_datasets, - self.predict_dataset, - ) = return_subsets(self.dataset) - - -datamodule = DefaultModule diff --git a/src/ptbench/data/mc_ch_in_11kv2/fold_0_rgb.py b/src/ptbench/data/mc_ch_in_11kv2/fold_0_rgb.py deleted file mode 100644 index c852a9a92ee5faf6a2accf83c3faa4169e829447..0000000000000000000000000000000000000000 --- a/src/ptbench/data/mc_ch_in_11kv2/fold_0_rgb.py +++ /dev/null @@ -1,101 +0,0 @@ -# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""Aggregated dataset composed of Montgomery, Shenzhen, Indian and the default -TBX11K-simplified datasets (cross validation fold 0, RGB)""" - -from clapper.logging import setup -from torch.utils.data.dataset import ConcatDataset - -from .. import return_subsets -from ..base_datamodule import BaseDataModule, get_dataset_from_module -from ..indian.fold_0_rgb import datamodule as indian_datamodule -from ..montgomery.fold_0_rgb import datamodule as mc_datamodule -from ..shenzhen.fold_0_rgb import datamodule as ch_datamodule -from ..tbx11k_simplified_v2.fold_0_rgb import datamodule as tbx11kv2_datamodule - -logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") - - -class DefaultModule(BaseDataModule): - def __init__( - self, - train_batch_size=1, - predict_batch_size=1, - drop_incomplete_batch=False, - multiproc_kwargs=None, - ): - self.train_batch_size = train_batch_size - self.predict_batch_size = predict_batch_size - self.drop_incomplete_batch = drop_incomplete_batch - self.multiproc_kwargs = multiproc_kwargs - - super().__init__( - train_batch_size=train_batch_size, - predict_batch_size=predict_batch_size, - drop_incomplete_batch=drop_incomplete_batch, - multiproc_kwargs=multiproc_kwargs, - ) - - def setup(self, stage: str): - # Instantiate other datamodules and get their datasets - - module_args = { - "train_batch_size": self.train_batch_size, - "predict_batch_size": self.predict_batch_size, - "drop_incomplete_batch": self.drop_incomplete_batch, - "multiproc_kwargs": self.multiproc_kwargs, - } - - mc = get_dataset_from_module(mc_datamodule, stage, **module_args) - ch = get_dataset_from_module(ch_datamodule, stage, **module_args) - indian = get_dataset_from_module( - indian_datamodule, stage, **module_args - ) - tbx11kv2 = get_dataset_from_module( - tbx11kv2_datamodule, stage, **module_args - ) - - # Combine datasets - self.dataset = {} - self.dataset["__train__"] = ConcatDataset( - [ - mc["__train__"], - ch["__train__"], - indian["__train__"], - tbx11kv2["__train__"], - ] - ) - self.dataset["train"] = ConcatDataset( - [mc["train"], ch["train"], indian["train"], tbx11kv2["train"]] - ) - self.dataset["__valid__"] = ConcatDataset( - [ - mc["__valid__"], - ch["__valid__"], - indian["__valid__"], - tbx11kv2["__valid__"], - ] - ) - self.dataset["validation"] = ConcatDataset( - [ - mc["validation"], - ch["validation"], - indian["validation"], - tbx11kv2["validation"], - ] - ) - self.dataset["test"] = ConcatDataset( - [mc["test"], ch["test"], indian["test"], tbx11kv2["test"]] - ) - - ( - self.train_dataset, - self.validation_dataset, - self.extra_validation_datasets, - self.predict_dataset, - ) = return_subsets(self.dataset) - - -datamodule = DefaultModule diff --git a/src/ptbench/data/mc_ch_in_11kv2/fold_1_rgb.py b/src/ptbench/data/mc_ch_in_11kv2/fold_1_rgb.py deleted file mode 100644 index 8f0c9174ffb8a7307fed77996fe0bbad655ad34c..0000000000000000000000000000000000000000 --- a/src/ptbench/data/mc_ch_in_11kv2/fold_1_rgb.py +++ /dev/null @@ -1,101 +0,0 @@ -# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""Aggregated dataset composed of Montgomery, Shenzhen, Indian and the default -TBX11K-simplified datasets (cross validation fold 1, RGB)""" - -from clapper.logging import setup -from torch.utils.data.dataset import ConcatDataset - -from .. import return_subsets -from ..base_datamodule import BaseDataModule, get_dataset_from_module -from ..indian.fold_1_rgb import datamodule as indian_datamodule -from ..montgomery.fold_1_rgb import datamodule as mc_datamodule -from ..shenzhen.fold_1_rgb import datamodule as ch_datamodule -from ..tbx11k_simplified_v2.fold_1_rgb import datamodule as tbx11kv2_datamodule - -logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") - - -class DefaultModule(BaseDataModule): - def __init__( - self, - train_batch_size=1, - predict_batch_size=1, - drop_incomplete_batch=False, - multiproc_kwargs=None, - ): - self.train_batch_size = train_batch_size - self.predict_batch_size = predict_batch_size - self.drop_incomplete_batch = drop_incomplete_batch - self.multiproc_kwargs = multiproc_kwargs - - super().__init__( - train_batch_size=train_batch_size, - predict_batch_size=predict_batch_size, - drop_incomplete_batch=drop_incomplete_batch, - multiproc_kwargs=multiproc_kwargs, - ) - - def setup(self, stage: str): - # Instantiate other datamodules and get their datasets - - module_args = { - "train_batch_size": self.train_batch_size, - "predict_batch_size": self.predict_batch_size, - "drop_incomplete_batch": self.drop_incomplete_batch, - "multiproc_kwargs": self.multiproc_kwargs, - } - - mc = get_dataset_from_module(mc_datamodule, stage, **module_args) - ch = get_dataset_from_module(ch_datamodule, stage, **module_args) - indian = get_dataset_from_module( - indian_datamodule, stage, **module_args - ) - tbx11kv2 = get_dataset_from_module( - tbx11kv2_datamodule, stage, **module_args - ) - - # Combine datasets - self.dataset = {} - self.dataset["__train__"] = ConcatDataset( - [ - mc["__train__"], - ch["__train__"], - indian["__train__"], - tbx11kv2["__train__"], - ] - ) - self.dataset["train"] = ConcatDataset( - [mc["train"], ch["train"], indian["train"], tbx11kv2["train"]] - ) - self.dataset["__valid__"] = ConcatDataset( - [ - mc["__valid__"], - ch["__valid__"], - indian["__valid__"], - tbx11kv2["__valid__"], - ] - ) - self.dataset["validation"] = ConcatDataset( - [ - mc["validation"], - ch["validation"], - indian["validation"], - tbx11kv2["validation"], - ] - ) - self.dataset["test"] = ConcatDataset( - [mc["test"], ch["test"], indian["test"], tbx11kv2["test"]] - ) - - ( - self.train_dataset, - self.validation_dataset, - self.extra_validation_datasets, - self.predict_dataset, - ) = return_subsets(self.dataset) - - -datamodule = DefaultModule diff --git a/src/ptbench/data/mc_ch_in_11kv2/fold_2_rgb.py b/src/ptbench/data/mc_ch_in_11kv2/fold_2_rgb.py deleted file mode 100644 index 5ecaa2a7e5a8ae7972a06eb1fa68c39cb575bba3..0000000000000000000000000000000000000000 --- a/src/ptbench/data/mc_ch_in_11kv2/fold_2_rgb.py +++ /dev/null @@ -1,101 +0,0 @@ -# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""Aggregated dataset composed of Montgomery, Shenzhen, Indian and the default -TBX11K-simplified datasets (cross validation fold 2, RGB)""" - -from clapper.logging import setup -from torch.utils.data.dataset import ConcatDataset - -from .. import return_subsets -from ..base_datamodule import BaseDataModule, get_dataset_from_module -from ..indian.fold_2_rgb import datamodule as indian_datamodule -from ..montgomery.fold_2_rgb import datamodule as mc_datamodule -from ..shenzhen.fold_2_rgb import datamodule as ch_datamodule -from ..tbx11k_simplified_v2.fold_2_rgb import datamodule as tbx11kv2_datamodule - -logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") - - -class DefaultModule(BaseDataModule): - def __init__( - self, - train_batch_size=1, - predict_batch_size=1, - drop_incomplete_batch=False, - multiproc_kwargs=None, - ): - self.train_batch_size = train_batch_size - self.predict_batch_size = predict_batch_size - self.drop_incomplete_batch = drop_incomplete_batch - self.multiproc_kwargs = multiproc_kwargs - - super().__init__( - train_batch_size=train_batch_size, - predict_batch_size=predict_batch_size, - drop_incomplete_batch=drop_incomplete_batch, - multiproc_kwargs=multiproc_kwargs, - ) - - def setup(self, stage: str): - # Instantiate other datamodules and get their datasets - - module_args = { - "train_batch_size": self.train_batch_size, - "predict_batch_size": self.predict_batch_size, - "drop_incomplete_batch": self.drop_incomplete_batch, - "multiproc_kwargs": self.multiproc_kwargs, - } - - mc = get_dataset_from_module(mc_datamodule, stage, **module_args) - ch = get_dataset_from_module(ch_datamodule, stage, **module_args) - indian = get_dataset_from_module( - indian_datamodule, stage, **module_args - ) - tbx11kv2 = get_dataset_from_module( - tbx11kv2_datamodule, stage, **module_args - ) - - # Combine datasets - self.dataset = {} - self.dataset["__train__"] = ConcatDataset( - [ - mc["__train__"], - ch["__train__"], - indian["__train__"], - tbx11kv2["__train__"], - ] - ) - self.dataset["train"] = ConcatDataset( - [mc["train"], ch["train"], indian["train"], tbx11kv2["train"]] - ) - self.dataset["__valid__"] = ConcatDataset( - [ - mc["__valid__"], - ch["__valid__"], - indian["__valid__"], - tbx11kv2["__valid__"], - ] - ) - self.dataset["validation"] = ConcatDataset( - [ - mc["validation"], - ch["validation"], - indian["validation"], - tbx11kv2["validation"], - ] - ) - self.dataset["test"] = ConcatDataset( - [mc["test"], ch["test"], indian["test"], tbx11kv2["test"]] - ) - - ( - self.train_dataset, - self.validation_dataset, - self.extra_validation_datasets, - self.predict_dataset, - ) = return_subsets(self.dataset) - - -datamodule = DefaultModule diff --git a/src/ptbench/data/mc_ch_in_11kv2/fold_3_rgb.py b/src/ptbench/data/mc_ch_in_11kv2/fold_3_rgb.py deleted file mode 100644 index 267d128e5dee412b933b350bde8ff8d60e6f771e..0000000000000000000000000000000000000000 --- a/src/ptbench/data/mc_ch_in_11kv2/fold_3_rgb.py +++ /dev/null @@ -1,101 +0,0 @@ -# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""Aggregated dataset composed of Montgomery, Shenzhen, Indian and the default -TBX11K-simplified datasets (cross validation fold 3, RGB)""" - -from clapper.logging import setup -from torch.utils.data.dataset import ConcatDataset - -from .. import return_subsets -from ..base_datamodule import BaseDataModule, get_dataset_from_module -from ..indian.fold_3_rgb import datamodule as indian_datamodule -from ..montgomery.fold_3_rgb import datamodule as mc_datamodule -from ..shenzhen.fold_3_rgb import datamodule as ch_datamodule -from ..tbx11k_simplified_v2.fold_3_rgb import datamodule as tbx11kv2_datamodule - -logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") - - -class DefaultModule(BaseDataModule): - def __init__( - self, - train_batch_size=1, - predict_batch_size=1, - drop_incomplete_batch=False, - multiproc_kwargs=None, - ): - self.train_batch_size = train_batch_size - self.predict_batch_size = predict_batch_size - self.drop_incomplete_batch = drop_incomplete_batch - self.multiproc_kwargs = multiproc_kwargs - - super().__init__( - train_batch_size=train_batch_size, - predict_batch_size=predict_batch_size, - drop_incomplete_batch=drop_incomplete_batch, - multiproc_kwargs=multiproc_kwargs, - ) - - def setup(self, stage: str): - # Instantiate other datamodules and get their datasets - - module_args = { - "train_batch_size": self.train_batch_size, - "predict_batch_size": self.predict_batch_size, - "drop_incomplete_batch": self.drop_incomplete_batch, - "multiproc_kwargs": self.multiproc_kwargs, - } - - mc = get_dataset_from_module(mc_datamodule, stage, **module_args) - ch = get_dataset_from_module(ch_datamodule, stage, **module_args) - indian = get_dataset_from_module( - indian_datamodule, stage, **module_args - ) - tbx11kv2 = get_dataset_from_module( - tbx11kv2_datamodule, stage, **module_args - ) - - # Combine datasets - self.dataset = {} - self.dataset["__train__"] = ConcatDataset( - [ - mc["__train__"], - ch["__train__"], - indian["__train__"], - tbx11kv2["__train__"], - ] - ) - self.dataset["train"] = ConcatDataset( - [mc["train"], ch["train"], indian["train"], tbx11kv2["train"]] - ) - self.dataset["__valid__"] = ConcatDataset( - [ - mc["__valid__"], - ch["__valid__"], - indian["__valid__"], - tbx11kv2["__valid__"], - ] - ) - self.dataset["validation"] = ConcatDataset( - [ - mc["validation"], - ch["validation"], - indian["validation"], - tbx11kv2["validation"], - ] - ) - self.dataset["test"] = ConcatDataset( - [mc["test"], ch["test"], indian["test"], tbx11kv2["test"]] - ) - - ( - self.train_dataset, - self.validation_dataset, - self.extra_validation_datasets, - self.predict_dataset, - ) = return_subsets(self.dataset) - - -datamodule = DefaultModule diff --git a/src/ptbench/data/mc_ch_in_11kv2/fold_4_rgb.py b/src/ptbench/data/mc_ch_in_11kv2/fold_4_rgb.py deleted file mode 100644 index 9bdc8c23043961b780f8b53d4349ce9cd4c7e3fd..0000000000000000000000000000000000000000 --- a/src/ptbench/data/mc_ch_in_11kv2/fold_4_rgb.py +++ /dev/null @@ -1,101 +0,0 @@ -# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""Aggregated dataset composed of Montgomery, Shenzhen, Indian and the default -TBX11K-simplified datasets (cross validation fold 4, RGB)""" - -from clapper.logging import setup -from torch.utils.data.dataset import ConcatDataset - -from .. import return_subsets -from ..base_datamodule import BaseDataModule, get_dataset_from_module -from ..indian.fold_4_rgb import datamodule as indian_datamodule -from ..montgomery.fold_4_rgb import datamodule as mc_datamodule -from ..shenzhen.fold_4_rgb import datamodule as ch_datamodule -from ..tbx11k_simplified_v2.fold_4_rgb import datamodule as tbx11kv2_datamodule - -logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") - - -class DefaultModule(BaseDataModule): - def __init__( - self, - train_batch_size=1, - predict_batch_size=1, - drop_incomplete_batch=False, - multiproc_kwargs=None, - ): - self.train_batch_size = train_batch_size - self.predict_batch_size = predict_batch_size - self.drop_incomplete_batch = drop_incomplete_batch - self.multiproc_kwargs = multiproc_kwargs - - super().__init__( - train_batch_size=train_batch_size, - predict_batch_size=predict_batch_size, - drop_incomplete_batch=drop_incomplete_batch, - multiproc_kwargs=multiproc_kwargs, - ) - - def setup(self, stage: str): - # Instantiate other datamodules and get their datasets - - module_args = { - "train_batch_size": self.train_batch_size, - "predict_batch_size": self.predict_batch_size, - "drop_incomplete_batch": self.drop_incomplete_batch, - "multiproc_kwargs": self.multiproc_kwargs, - } - - mc = get_dataset_from_module(mc_datamodule, stage, **module_args) - ch = get_dataset_from_module(ch_datamodule, stage, **module_args) - indian = get_dataset_from_module( - indian_datamodule, stage, **module_args - ) - tbx11kv2 = get_dataset_from_module( - tbx11kv2_datamodule, stage, **module_args - ) - - # Combine datasets - self.dataset = {} - self.dataset["__train__"] = ConcatDataset( - [ - mc["__train__"], - ch["__train__"], - indian["__train__"], - tbx11kv2["__train__"], - ] - ) - self.dataset["train"] = ConcatDataset( - [mc["train"], ch["train"], indian["train"], tbx11kv2["train"]] - ) - self.dataset["__valid__"] = ConcatDataset( - [ - mc["__valid__"], - ch["__valid__"], - indian["__valid__"], - tbx11kv2["__valid__"], - ] - ) - self.dataset["validation"] = ConcatDataset( - [ - mc["validation"], - ch["validation"], - indian["validation"], - tbx11kv2["validation"], - ] - ) - self.dataset["test"] = ConcatDataset( - [mc["test"], ch["test"], indian["test"], tbx11kv2["test"]] - ) - - ( - self.train_dataset, - self.validation_dataset, - self.extra_validation_datasets, - self.predict_dataset, - ) = return_subsets(self.dataset) - - -datamodule = DefaultModule diff --git a/src/ptbench/data/mc_ch_in_11kv2/fold_5_rgb.py b/src/ptbench/data/mc_ch_in_11kv2/fold_5_rgb.py deleted file mode 100644 index ce3ffd479d9264bd17188e89cfc8716f59caf25a..0000000000000000000000000000000000000000 --- a/src/ptbench/data/mc_ch_in_11kv2/fold_5_rgb.py +++ /dev/null @@ -1,101 +0,0 @@ -# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""Aggregated dataset composed of Montgomery, Shenzhen, Indian and the default -TBX11K-simplified datasets (cross validation fold 5, RGB)""" - -from clapper.logging import setup -from torch.utils.data.dataset import ConcatDataset - -from .. import return_subsets -from ..base_datamodule import BaseDataModule, get_dataset_from_module -from ..indian.fold_5_rgb import datamodule as indian_datamodule -from ..montgomery.fold_5_rgb import datamodule as mc_datamodule -from ..shenzhen.fold_5_rgb import datamodule as ch_datamodule -from ..tbx11k_simplified_v2.fold_5_rgb import datamodule as tbx11kv2_datamodule - -logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") - - -class DefaultModule(BaseDataModule): - def __init__( - self, - train_batch_size=1, - predict_batch_size=1, - drop_incomplete_batch=False, - multiproc_kwargs=None, - ): - self.train_batch_size = train_batch_size - self.predict_batch_size = predict_batch_size - self.drop_incomplete_batch = drop_incomplete_batch - self.multiproc_kwargs = multiproc_kwargs - - super().__init__( - train_batch_size=train_batch_size, - predict_batch_size=predict_batch_size, - drop_incomplete_batch=drop_incomplete_batch, - multiproc_kwargs=multiproc_kwargs, - ) - - def setup(self, stage: str): - # Instantiate other datamodules and get their datasets - - module_args = { - "train_batch_size": self.train_batch_size, - "predict_batch_size": self.predict_batch_size, - "drop_incomplete_batch": self.drop_incomplete_batch, - "multiproc_kwargs": self.multiproc_kwargs, - } - - mc = get_dataset_from_module(mc_datamodule, stage, **module_args) - ch = get_dataset_from_module(ch_datamodule, stage, **module_args) - indian = get_dataset_from_module( - indian_datamodule, stage, **module_args - ) - tbx11kv2 = get_dataset_from_module( - tbx11kv2_datamodule, stage, **module_args - ) - - # Combine datasets - self.dataset = {} - self.dataset["__train__"] = ConcatDataset( - [ - mc["__train__"], - ch["__train__"], - indian["__train__"], - tbx11kv2["__train__"], - ] - ) - self.dataset["train"] = ConcatDataset( - [mc["train"], ch["train"], indian["train"], tbx11kv2["train"]] - ) - self.dataset["__valid__"] = ConcatDataset( - [ - mc["__valid__"], - ch["__valid__"], - indian["__valid__"], - tbx11kv2["__valid__"], - ] - ) - self.dataset["validation"] = ConcatDataset( - [ - mc["validation"], - ch["validation"], - indian["validation"], - tbx11kv2["validation"], - ] - ) - self.dataset["test"] = ConcatDataset( - [mc["test"], ch["test"], indian["test"], tbx11kv2["test"]] - ) - - ( - self.train_dataset, - self.validation_dataset, - self.extra_validation_datasets, - self.predict_dataset, - ) = return_subsets(self.dataset) - - -datamodule = DefaultModule diff --git a/src/ptbench/data/mc_ch_in_11kv2/fold_6_rgb.py b/src/ptbench/data/mc_ch_in_11kv2/fold_6_rgb.py deleted file mode 100644 index bc892b7964c8565d70577b514be426be85073645..0000000000000000000000000000000000000000 --- a/src/ptbench/data/mc_ch_in_11kv2/fold_6_rgb.py +++ /dev/null @@ -1,101 +0,0 @@ -# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""Aggregated dataset composed of Montgomery, Shenzhen, Indian and the default -TBX11K-simplified datasets (cross validation fold 6, RGB)""" - -from clapper.logging import setup -from torch.utils.data.dataset import ConcatDataset - -from .. import return_subsets -from ..base_datamodule import BaseDataModule, get_dataset_from_module -from ..indian.fold_6_rgb import datamodule as indian_datamodule -from ..montgomery.fold_6_rgb import datamodule as mc_datamodule -from ..shenzhen.fold_5_rgb import datamodule as ch_datamodule -from ..tbx11k_simplified_v2.fold_6_rgb import datamodule as tbx11kv2_datamodule - -logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") - - -class DefaultModule(BaseDataModule): - def __init__( - self, - train_batch_size=1, - predict_batch_size=1, - drop_incomplete_batch=False, - multiproc_kwargs=None, - ): - self.train_batch_size = train_batch_size - self.predict_batch_size = predict_batch_size - self.drop_incomplete_batch = drop_incomplete_batch - self.multiproc_kwargs = multiproc_kwargs - - super().__init__( - train_batch_size=train_batch_size, - predict_batch_size=predict_batch_size, - drop_incomplete_batch=drop_incomplete_batch, - multiproc_kwargs=multiproc_kwargs, - ) - - def setup(self, stage: str): - # Instantiate other datamodules and get their datasets - - module_args = { - "train_batch_size": self.train_batch_size, - "predict_batch_size": self.predict_batch_size, - "drop_incomplete_batch": self.drop_incomplete_batch, - "multiproc_kwargs": self.multiproc_kwargs, - } - - mc = get_dataset_from_module(mc_datamodule, stage, **module_args) - ch = get_dataset_from_module(ch_datamodule, stage, **module_args) - indian = get_dataset_from_module( - indian_datamodule, stage, **module_args - ) - tbx11kv2 = get_dataset_from_module( - tbx11kv2_datamodule, stage, **module_args - ) - - # Combine datasets - self.dataset = {} - self.dataset["__train__"] = ConcatDataset( - [ - mc["__train__"], - ch["__train__"], - indian["__train__"], - tbx11kv2["__train__"], - ] - ) - self.dataset["train"] = ConcatDataset( - [mc["train"], ch["train"], indian["train"], tbx11kv2["train"]] - ) - self.dataset["__valid__"] = ConcatDataset( - [ - mc["__valid__"], - ch["__valid__"], - indian["__valid__"], - tbx11kv2["__valid__"], - ] - ) - self.dataset["validation"] = ConcatDataset( - [ - mc["validation"], - ch["validation"], - indian["validation"], - tbx11kv2["validation"], - ] - ) - self.dataset["test"] = ConcatDataset( - [mc["test"], ch["test"], indian["test"], tbx11kv2["test"]] - ) - - ( - self.train_dataset, - self.validation_dataset, - self.extra_validation_datasets, - self.predict_dataset, - ) = return_subsets(self.dataset) - - -datamodule = DefaultModule diff --git a/src/ptbench/data/mc_ch_in_11kv2/fold_7_rgb.py b/src/ptbench/data/mc_ch_in_11kv2/fold_7_rgb.py deleted file mode 100644 index 0fe1f28289a5a09f5fd321de5d8cd65c57fef4e0..0000000000000000000000000000000000000000 --- a/src/ptbench/data/mc_ch_in_11kv2/fold_7_rgb.py +++ /dev/null @@ -1,101 +0,0 @@ -# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""Aggregated dataset composed of Montgomery, Shenzhen, Indian and the default -TBX11K-simplified datasets (cross validation fold 7, RGB)""" - -from clapper.logging import setup -from torch.utils.data.dataset import ConcatDataset - -from .. import return_subsets -from ..base_datamodule import BaseDataModule, get_dataset_from_module -from ..indian.fold_7_rgb import datamodule as indian_datamodule -from ..montgomery.fold_7_rgb import datamodule as mc_datamodule -from ..shenzhen.fold_7_rgb import datamodule as ch_datamodule -from ..tbx11k_simplified_v2.fold_7_rgb import datamodule as tbx11kv2_datamodule - -logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") - - -class DefaultModule(BaseDataModule): - def __init__( - self, - train_batch_size=1, - predict_batch_size=1, - drop_incomplete_batch=False, - multiproc_kwargs=None, - ): - self.train_batch_size = train_batch_size - self.predict_batch_size = predict_batch_size - self.drop_incomplete_batch = drop_incomplete_batch - self.multiproc_kwargs = multiproc_kwargs - - super().__init__( - train_batch_size=train_batch_size, - predict_batch_size=predict_batch_size, - drop_incomplete_batch=drop_incomplete_batch, - multiproc_kwargs=multiproc_kwargs, - ) - - def setup(self, stage: str): - # Instantiate other datamodules and get their datasets - - module_args = { - "train_batch_size": self.train_batch_size, - "predict_batch_size": self.predict_batch_size, - "drop_incomplete_batch": self.drop_incomplete_batch, - "multiproc_kwargs": self.multiproc_kwargs, - } - - mc = get_dataset_from_module(mc_datamodule, stage, **module_args) - ch = get_dataset_from_module(ch_datamodule, stage, **module_args) - indian = get_dataset_from_module( - indian_datamodule, stage, **module_args - ) - tbx11kv2 = get_dataset_from_module( - tbx11kv2_datamodule, stage, **module_args - ) - - # Combine datasets - self.dataset = {} - self.dataset["__train__"] = ConcatDataset( - [ - mc["__train__"], - ch["__train__"], - indian["__train__"], - tbx11kv2["__train__"], - ] - ) - self.dataset["train"] = ConcatDataset( - [mc["train"], ch["train"], indian["train"], tbx11kv2["train"]] - ) - self.dataset["__valid__"] = ConcatDataset( - [ - mc["__valid__"], - ch["__valid__"], - indian["__valid__"], - tbx11kv2["__valid__"], - ] - ) - self.dataset["validation"] = ConcatDataset( - [ - mc["validation"], - ch["validation"], - indian["validation"], - tbx11kv2["validation"], - ] - ) - self.dataset["test"] = ConcatDataset( - [mc["test"], ch["test"], indian["test"], tbx11kv2["test"]] - ) - - ( - self.train_dataset, - self.validation_dataset, - self.extra_validation_datasets, - self.predict_dataset, - ) = return_subsets(self.dataset) - - -datamodule = DefaultModule diff --git a/src/ptbench/data/mc_ch_in_11kv2/fold_8_rgb.py b/src/ptbench/data/mc_ch_in_11kv2/fold_8_rgb.py deleted file mode 100644 index a330b2e6d12847bc6406d87e5d48ad626f802c77..0000000000000000000000000000000000000000 --- a/src/ptbench/data/mc_ch_in_11kv2/fold_8_rgb.py +++ /dev/null @@ -1,101 +0,0 @@ -# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""Aggregated dataset composed of Montgomery, Shenzhen, Indian and the default -TBX11K-simplified datasets (cross validation fold 8, RGB)""" - -from clapper.logging import setup -from torch.utils.data.dataset import ConcatDataset - -from .. import return_subsets -from ..base_datamodule import BaseDataModule, get_dataset_from_module -from ..indian.fold_8_rgb import datamodule as indian_datamodule -from ..montgomery.fold_8_rgb import datamodule as mc_datamodule -from ..shenzhen.fold_8_rgb import datamodule as ch_datamodule -from ..tbx11k_simplified_v2.fold_8_rgb import datamodule as tbx11kv2_datamodule - -logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") - - -class DefaultModule(BaseDataModule): - def __init__( - self, - train_batch_size=1, - predict_batch_size=1, - drop_incomplete_batch=False, - multiproc_kwargs=None, - ): - self.train_batch_size = train_batch_size - self.predict_batch_size = predict_batch_size - self.drop_incomplete_batch = drop_incomplete_batch - self.multiproc_kwargs = multiproc_kwargs - - super().__init__( - train_batch_size=train_batch_size, - predict_batch_size=predict_batch_size, - drop_incomplete_batch=drop_incomplete_batch, - multiproc_kwargs=multiproc_kwargs, - ) - - def setup(self, stage: str): - # Instantiate other datamodules and get their datasets - - module_args = { - "train_batch_size": self.train_batch_size, - "predict_batch_size": self.predict_batch_size, - "drop_incomplete_batch": self.drop_incomplete_batch, - "multiproc_kwargs": self.multiproc_kwargs, - } - - mc = get_dataset_from_module(mc_datamodule, stage, **module_args) - ch = get_dataset_from_module(ch_datamodule, stage, **module_args) - indian = get_dataset_from_module( - indian_datamodule, stage, **module_args - ) - tbx11kv2 = get_dataset_from_module( - tbx11kv2_datamodule, stage, **module_args - ) - - # Combine datasets - self.dataset = {} - self.dataset["__train__"] = ConcatDataset( - [ - mc["__train__"], - ch["__train__"], - indian["__train__"], - tbx11kv2["__train__"], - ] - ) - self.dataset["train"] = ConcatDataset( - [mc["train"], ch["train"], indian["train"], tbx11kv2["train"]] - ) - self.dataset["__valid__"] = ConcatDataset( - [ - mc["__valid__"], - ch["__valid__"], - indian["__valid__"], - tbx11kv2["__valid__"], - ] - ) - self.dataset["validation"] = ConcatDataset( - [ - mc["validation"], - ch["validation"], - indian["validation"], - tbx11kv2["validation"], - ] - ) - self.dataset["test"] = ConcatDataset( - [mc["test"], ch["test"], indian["test"], tbx11kv2["test"]] - ) - - ( - self.train_dataset, - self.validation_dataset, - self.extra_validation_datasets, - self.predict_dataset, - ) = return_subsets(self.dataset) - - -datamodule = DefaultModule diff --git a/src/ptbench/data/mc_ch_in_11kv2/fold_9_rgb.py b/src/ptbench/data/mc_ch_in_11kv2/fold_9_rgb.py deleted file mode 100644 index 93296414df6d566de6a48556074b185c045cac08..0000000000000000000000000000000000000000 --- a/src/ptbench/data/mc_ch_in_11kv2/fold_9_rgb.py +++ /dev/null @@ -1,101 +0,0 @@ -# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""Aggregated dataset composed of Montgomery, Shenzhen, Indian and the default -TBX11K-simplified datasets (cross validation fold 9, RGB)""" - -from clapper.logging import setup -from torch.utils.data.dataset import ConcatDataset - -from .. import return_subsets -from ..base_datamodule import BaseDataModule, get_dataset_from_module -from ..indian.fold_9_rgb import datamodule as indian_datamodule -from ..montgomery.fold_9_rgb import datamodule as mc_datamodule -from ..shenzhen.fold_9_rgb import datamodule as ch_datamodule -from ..tbx11k_simplified_v2.fold_9_rgb import datamodule as tbx11kv2_datamodule - -logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") - - -class DefaultModule(BaseDataModule): - def __init__( - self, - train_batch_size=1, - predict_batch_size=1, - drop_incomplete_batch=False, - multiproc_kwargs=None, - ): - self.train_batch_size = train_batch_size - self.predict_batch_size = predict_batch_size - self.drop_incomplete_batch = drop_incomplete_batch - self.multiproc_kwargs = multiproc_kwargs - - super().__init__( - train_batch_size=train_batch_size, - predict_batch_size=predict_batch_size, - drop_incomplete_batch=drop_incomplete_batch, - multiproc_kwargs=multiproc_kwargs, - ) - - def setup(self, stage: str): - # Instantiate other datamodules and get their datasets - - module_args = { - "train_batch_size": self.train_batch_size, - "predict_batch_size": self.predict_batch_size, - "drop_incomplete_batch": self.drop_incomplete_batch, - "multiproc_kwargs": self.multiproc_kwargs, - } - - mc = get_dataset_from_module(mc_datamodule, stage, **module_args) - ch = get_dataset_from_module(ch_datamodule, stage, **module_args) - indian = get_dataset_from_module( - indian_datamodule, stage, **module_args - ) - tbx11kv2 = get_dataset_from_module( - tbx11kv2_datamodule, stage, **module_args - ) - - # Combine datasets - self.dataset = {} - self.dataset["__train__"] = ConcatDataset( - [ - mc["__train__"], - ch["__train__"], - indian["__train__"], - tbx11kv2["__train__"], - ] - ) - self.dataset["train"] = ConcatDataset( - [mc["train"], ch["train"], indian["train"], tbx11kv2["train"]] - ) - self.dataset["__valid__"] = ConcatDataset( - [ - mc["__valid__"], - ch["__valid__"], - indian["__valid__"], - tbx11kv2["__valid__"], - ] - ) - self.dataset["validation"] = ConcatDataset( - [ - mc["validation"], - ch["validation"], - indian["validation"], - tbx11kv2["validation"], - ] - ) - self.dataset["test"] = ConcatDataset( - [mc["test"], ch["test"], indian["test"], tbx11kv2["test"]] - ) - - ( - self.train_dataset, - self.validation_dataset, - self.extra_validation_datasets, - self.predict_dataset, - ) = return_subsets(self.dataset) - - -datamodule = DefaultModule diff --git a/src/ptbench/data/montgomery_RS/__init__.py b/src/ptbench/data/montgomery_RS/__init__.py deleted file mode 100644 index baf5f7c129c3e675e88e18d938567d4607da7346..0000000000000000000000000000000000000000 --- a/src/ptbench/data/montgomery_RS/__init__.py +++ /dev/null @@ -1,57 +0,0 @@ -# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""Extended Montgomery dataset for computer-aided diagnosis (extended with -DensenetRS predictions) - -The Montgomery database has been established to foster research -in computer-aided diagnosis of pulmonary diseases with a special -focus on pulmonary tuberculosis (TB). - -* Reference: [MONTGOMERY-SHENZHEN-2014]_ -* Original resolution (height x width or width x height): 4020 x 4892 -* Split reference: none -* Protocol ``default``: - - * Training samples: 64% of TB and healthy CXR (including labels) - * Validation samples: 16% of TB and healthy CXR (including labels) - * Test samples: 20% of TB and healthy CXR (including labels) -""" - -import importlib.resources - -from ..dataset import JSONDataset -from ..loader import make_delayed - -_protocols = [ - importlib.resources.files(__name__).joinpath("default.json.bz2"), - importlib.resources.files(__name__).joinpath("fold_0.json.bz2"), - importlib.resources.files(__name__).joinpath("fold_1.json.bz2"), - importlib.resources.files(__name__).joinpath("fold_2.json.bz2"), - importlib.resources.files(__name__).joinpath("fold_3.json.bz2"), - importlib.resources.files(__name__).joinpath("fold_4.json.bz2"), - importlib.resources.files(__name__).joinpath("fold_5.json.bz2"), - importlib.resources.files(__name__).joinpath("fold_6.json.bz2"), - importlib.resources.files(__name__).joinpath("fold_7.json.bz2"), - importlib.resources.files(__name__).joinpath("fold_8.json.bz2"), - importlib.resources.files(__name__).joinpath("fold_9.json.bz2"), -] - - -def _raw_data_loader(sample): - return dict(data=sample["data"], label=sample["label"]) - - -def _loader(context, sample): - # "context" is ignored in this case - database is homogeneous - # we returned delayed samples to avoid loading all images at once - return make_delayed(sample, _raw_data_loader, key=sample["filename"]) - - -dataset = JSONDataset( - protocols=_protocols, - fieldnames=("filename", "label", "data"), - loader=_loader, -) -"""Extended Montgomery dataset object.""" diff --git a/src/ptbench/data/montgomery_RS/default.json.bz2 b/src/ptbench/data/montgomery_RS/default.json.bz2 deleted file mode 100644 index 6778ba85d1f5f4e3cf24106cd3b5e4e9f2eb773e..0000000000000000000000000000000000000000 Binary files a/src/ptbench/data/montgomery_RS/default.json.bz2 and /dev/null differ diff --git a/src/ptbench/data/montgomery_RS/fold_0.json.bz2 b/src/ptbench/data/montgomery_RS/fold_0.json.bz2 deleted file mode 100644 index 64a08eb460d9a9a0c0603f07ce13f9f8aa6e701c..0000000000000000000000000000000000000000 Binary files a/src/ptbench/data/montgomery_RS/fold_0.json.bz2 and /dev/null differ diff --git a/src/ptbench/data/montgomery_RS/fold_1.json.bz2 b/src/ptbench/data/montgomery_RS/fold_1.json.bz2 deleted file mode 100644 index e6fe85434104b17feb89417c569c8d0bfb72efd6..0000000000000000000000000000000000000000 Binary files a/src/ptbench/data/montgomery_RS/fold_1.json.bz2 and /dev/null differ diff --git a/src/ptbench/data/montgomery_RS/fold_2.json.bz2 b/src/ptbench/data/montgomery_RS/fold_2.json.bz2 deleted file mode 100644 index dd70b025928281ae7da04704cf4a968cb95ac62b..0000000000000000000000000000000000000000 Binary files a/src/ptbench/data/montgomery_RS/fold_2.json.bz2 and /dev/null differ diff --git a/src/ptbench/data/montgomery_RS/fold_3.json.bz2 b/src/ptbench/data/montgomery_RS/fold_3.json.bz2 deleted file mode 100644 index baece7b34d1e2c87e2a64c65cb487b0f9642f643..0000000000000000000000000000000000000000 Binary files a/src/ptbench/data/montgomery_RS/fold_3.json.bz2 and /dev/null differ diff --git a/src/ptbench/data/montgomery_RS/fold_4.json.bz2 b/src/ptbench/data/montgomery_RS/fold_4.json.bz2 deleted file mode 100644 index f0f539961197c11838344be3c89658b380fdb526..0000000000000000000000000000000000000000 Binary files a/src/ptbench/data/montgomery_RS/fold_4.json.bz2 and /dev/null differ diff --git a/src/ptbench/data/montgomery_RS/fold_5.json.bz2 b/src/ptbench/data/montgomery_RS/fold_5.json.bz2 deleted file mode 100644 index 5e445f0c1bf698b56750219c8217490381d17a13..0000000000000000000000000000000000000000 Binary files a/src/ptbench/data/montgomery_RS/fold_5.json.bz2 and /dev/null differ diff --git a/src/ptbench/data/montgomery_RS/fold_6.json.bz2 b/src/ptbench/data/montgomery_RS/fold_6.json.bz2 deleted file mode 100644 index 8924bcf525716bf1219f44a930f5eb4f4a0183ad..0000000000000000000000000000000000000000 Binary files a/src/ptbench/data/montgomery_RS/fold_6.json.bz2 and /dev/null differ diff --git a/src/ptbench/data/montgomery_RS/fold_7.json.bz2 b/src/ptbench/data/montgomery_RS/fold_7.json.bz2 deleted file mode 100644 index aa1ee218264b68a02fcca4551214a069eb87c5b5..0000000000000000000000000000000000000000 Binary files a/src/ptbench/data/montgomery_RS/fold_7.json.bz2 and /dev/null differ diff --git a/src/ptbench/data/montgomery_RS/fold_8.json.bz2 b/src/ptbench/data/montgomery_RS/fold_8.json.bz2 deleted file mode 100644 index 57371061aa85541e11b0f08a11cd3631d89e78e1..0000000000000000000000000000000000000000 Binary files a/src/ptbench/data/montgomery_RS/fold_8.json.bz2 and /dev/null differ diff --git a/src/ptbench/data/montgomery_RS/fold_9.json.bz2 b/src/ptbench/data/montgomery_RS/fold_9.json.bz2 deleted file mode 100644 index 19b93f40b16241030c47c6c1de18cfe6b7ae4379..0000000000000000000000000000000000000000 Binary files a/src/ptbench/data/montgomery_RS/fold_9.json.bz2 and /dev/null differ diff --git a/src/ptbench/data/padchest_RS/__init__.py b/src/ptbench/data/padchest_RS/__init__.py deleted file mode 100644 index 310b7556c0cb3b772a8ab203c20202fe7110fa6c..0000000000000000000000000000000000000000 --- a/src/ptbench/data/padchest_RS/__init__.py +++ /dev/null @@ -1,57 +0,0 @@ -# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""Padchest TB dataset for computer-aided diagnosis. - -A large chest x-ray image dataset with multi-label annotated reports. -This dataset includes more than 160,000 images from 67,000 patients that were -interpreted and reported by radiologists at Hospital San Juan (Spain) from 2009 -to 2017, covering six different position views and additional information on -image acquisition and patient demography. - -We keep only "PA" images here and only the "Tuberculosis" subset with an -equivalent number of "normal" images. - -* Reference: [PADCHEST-2019]_ -* Original resolution: variable, original size -* Labels: [PADCHEST-2019]_ -* Split reference: 64%/16%/20% -* Protocol ``default``: - - * Training samples: 160 - * Validation samples: 40 - * Test samples: 50 - -* Protocol `ìdiap``: - * Images path adapted to Idiap infrastructure - -* Labels: DensenetRS predictions -""" - -import importlib.resources - -from ..dataset import JSONDataset -from ..loader import make_delayed - -_protocols = [ - importlib.resources.files(__name__).joinpath("tb_idiap.json.bz2"), -] - - -def _raw_data_loader(sample): - return dict(data=sample["data"], label=sample["label"]) - - -def _loader(context, sample): - # "context" is ignored in this case - database is homogeneous - # we returned delayed samples to avoid loading all images at once - return make_delayed(sample, _raw_data_loader, key=sample["filename"]) - - -dataset = JSONDataset( - protocols=_protocols, - fieldnames=("filename", "label", "data"), - loader=_loader, -) -"""Padchest dataset object.""" diff --git a/src/ptbench/data/padchest_RS/tb_idiap.json.bz2 b/src/ptbench/data/padchest_RS/tb_idiap.json.bz2 deleted file mode 100644 index b95db5cd01b9e027c433ac773f7cfc0efb8b182b..0000000000000000000000000000000000000000 Binary files a/src/ptbench/data/padchest_RS/tb_idiap.json.bz2 and /dev/null differ diff --git a/src/ptbench/data/shenzhen_RS/__init__.py b/src/ptbench/data/shenzhen_RS/__init__.py deleted file mode 100644 index 2a671c9bd8074cae4206c9884431d7fad6b262d0..0000000000000000000000000000000000000000 --- a/src/ptbench/data/shenzhen_RS/__init__.py +++ /dev/null @@ -1,59 +0,0 @@ -# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""Shenzhen dataset for computer-aided diagnosis (extended with DensenetRS -predictions) - -The standard digital image database for Tuberculosis is created by the -National Library of Medicine, Maryland, USA in collaboration with Shenzhen -No.3 People’s Hospital, Guangdong Medical College, Shenzhen, China. -The Chest X-rays are from out-patient clinics, and were captured as part of -the daily routine using Philips DR Digital Diagnose systems. - -* Reference: [MONTGOMERY-SHENZHEN-2014]_ -* Original resolution (height x width or width x height): 3000 x 3000 or less -* Split reference: none -* Protocol ``default``: - - * Training samples: 64% of TB and healthy CXR (including labels) - * Validation samples: 16% of TB and healthy CXR (including labels) - * Test samples: 20% of TB and healthy CXR (including labels) -""" - -import importlib.resources - -from ..dataset import JSONDataset -from ..loader import make_delayed - -_protocols = [ - importlib.resources.files(__name__).joinpath("default.json.bz2"), - importlib.resources.files(__name__).joinpath("fold_0.json.bz2"), - importlib.resources.files(__name__).joinpath("fold_1.json.bz2"), - importlib.resources.files(__name__).joinpath("fold_2.json.bz2"), - importlib.resources.files(__name__).joinpath("fold_3.json.bz2"), - importlib.resources.files(__name__).joinpath("fold_4.json.bz2"), - importlib.resources.files(__name__).joinpath("fold_5.json.bz2"), - importlib.resources.files(__name__).joinpath("fold_6.json.bz2"), - importlib.resources.files(__name__).joinpath("fold_7.json.bz2"), - importlib.resources.files(__name__).joinpath("fold_8.json.bz2"), - importlib.resources.files(__name__).joinpath("fold_9.json.bz2"), -] - - -def _raw_data_loader(sample): - return dict(data=sample["data"], label=sample["label"]) - - -def _loader(context, sample): - # "context" is ignored in this case - database is homogeneous - # we returned delayed samples to avoid loading all images at once - return make_delayed(sample, _raw_data_loader, key=sample["filename"]) - - -dataset = JSONDataset( - protocols=_protocols, - fieldnames=("filename", "label", "data"), - loader=_loader, -) -"""Extended Shenzhen dataset object.""" diff --git a/src/ptbench/data/shenzhen_RS/default.json.bz2 b/src/ptbench/data/shenzhen_RS/default.json.bz2 deleted file mode 100644 index e8e1f1e8a21a04fe40d4d7f30d65385f3f636b76..0000000000000000000000000000000000000000 Binary files a/src/ptbench/data/shenzhen_RS/default.json.bz2 and /dev/null differ diff --git a/src/ptbench/data/shenzhen_RS/fold_0.json.bz2 b/src/ptbench/data/shenzhen_RS/fold_0.json.bz2 deleted file mode 100644 index b94a506c94c718c98501c55f3758c89b1e2956a9..0000000000000000000000000000000000000000 Binary files a/src/ptbench/data/shenzhen_RS/fold_0.json.bz2 and /dev/null differ diff --git a/src/ptbench/data/shenzhen_RS/fold_1.json.bz2 b/src/ptbench/data/shenzhen_RS/fold_1.json.bz2 deleted file mode 100644 index 1a104f3ddbf3ee2d31e6d3c2edf8bf27e13c3fe9..0000000000000000000000000000000000000000 Binary files a/src/ptbench/data/shenzhen_RS/fold_1.json.bz2 and /dev/null differ diff --git a/src/ptbench/data/shenzhen_RS/fold_2.json.bz2 b/src/ptbench/data/shenzhen_RS/fold_2.json.bz2 deleted file mode 100644 index a55e96049a92d643ee30facbca4a6ebb078fcb7a..0000000000000000000000000000000000000000 Binary files a/src/ptbench/data/shenzhen_RS/fold_2.json.bz2 and /dev/null differ diff --git a/src/ptbench/data/shenzhen_RS/fold_3.json.bz2 b/src/ptbench/data/shenzhen_RS/fold_3.json.bz2 deleted file mode 100644 index fb3d72b429c1cf5ebcd50f5f2cad0970ad375e34..0000000000000000000000000000000000000000 Binary files a/src/ptbench/data/shenzhen_RS/fold_3.json.bz2 and /dev/null differ diff --git a/src/ptbench/data/shenzhen_RS/fold_4.json.bz2 b/src/ptbench/data/shenzhen_RS/fold_4.json.bz2 deleted file mode 100644 index e02b73be779a170c99516df16535baec69c81285..0000000000000000000000000000000000000000 Binary files a/src/ptbench/data/shenzhen_RS/fold_4.json.bz2 and /dev/null differ diff --git a/src/ptbench/data/shenzhen_RS/fold_5.json.bz2 b/src/ptbench/data/shenzhen_RS/fold_5.json.bz2 deleted file mode 100644 index b8d4b459d3a750ef068fe7807af7c3fa19047961..0000000000000000000000000000000000000000 Binary files a/src/ptbench/data/shenzhen_RS/fold_5.json.bz2 and /dev/null differ diff --git a/src/ptbench/data/shenzhen_RS/fold_6.json.bz2 b/src/ptbench/data/shenzhen_RS/fold_6.json.bz2 deleted file mode 100644 index 646ceb6eb02cd84ebf0b46f45c435ea8aa656cda..0000000000000000000000000000000000000000 Binary files a/src/ptbench/data/shenzhen_RS/fold_6.json.bz2 and /dev/null differ diff --git a/src/ptbench/data/shenzhen_RS/fold_7.json.bz2 b/src/ptbench/data/shenzhen_RS/fold_7.json.bz2 deleted file mode 100644 index 68e2772ab452274b899bdb9c75d2f60286665a59..0000000000000000000000000000000000000000 Binary files a/src/ptbench/data/shenzhen_RS/fold_7.json.bz2 and /dev/null differ diff --git a/src/ptbench/data/shenzhen_RS/fold_8.json.bz2 b/src/ptbench/data/shenzhen_RS/fold_8.json.bz2 deleted file mode 100644 index a96cfdba868dac206aad2e35552407c446bbbd71..0000000000000000000000000000000000000000 Binary files a/src/ptbench/data/shenzhen_RS/fold_8.json.bz2 and /dev/null differ diff --git a/src/ptbench/data/shenzhen_RS/fold_9.json.bz2 b/src/ptbench/data/shenzhen_RS/fold_9.json.bz2 deleted file mode 100644 index f322aeb6dababfebe99989d870646d96c5c23d3c..0000000000000000000000000000000000000000 Binary files a/src/ptbench/data/shenzhen_RS/fold_9.json.bz2 and /dev/null differ diff --git a/src/ptbench/data/tbpoc/fold_0_rgb.py b/src/ptbench/data/tbpoc/fold_0_rgb.py deleted file mode 100644 index 9ecd78c5da10d2b02023ee2b5425c4644ceeca4b..0000000000000000000000000000000000000000 --- a/src/ptbench/data/tbpoc/fold_0_rgb.py +++ /dev/null @@ -1,46 +0,0 @@ -# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""TB-POC dataset for TB detection (cross validation fold 0) - -* Split reference: none (stratified kfolding) -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.tbpoc` for dataset details -""" - -from clapper.logging import setup - -from .. import return_subsets -from ..base_datamodule import BaseDataModule -from . import _maker - -logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") - - -class Fold0Module(BaseDataModule): - def __init__( - self, - train_batch_size=1, - predict_batch_size=1, - drop_incomplete_batch=False, - multiproc_kwargs=None, - ): - super().__init__( - train_batch_size=train_batch_size, - predict_batch_size=predict_batch_size, - drop_incomplete_batch=drop_incomplete_batch, - multiproc_kwargs=multiproc_kwargs, - ) - - def setup(self, stage: str): - self.dataset = _maker("fold_0", RGB=True) - ( - self.train_dataset, - self.validation_dataset, - self.extra_validation_datasets, - self.predict_dataset, - ) = return_subsets(self.dataset) - - -datamodule = Fold0Module diff --git a/src/ptbench/data/tbpoc/fold_1_rgb.py b/src/ptbench/data/tbpoc/fold_1_rgb.py deleted file mode 100644 index 7c149f9347cd5be1275edf205d2cb723cb0b504f..0000000000000000000000000000000000000000 --- a/src/ptbench/data/tbpoc/fold_1_rgb.py +++ /dev/null @@ -1,47 +0,0 @@ -# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""TB-POC dataset for TB detection (cross validation fold 1) - -* Split reference: none (stratified kfolding) -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.tbpoc` for dataset details -""" - - -from clapper.logging import setup - -from .. import return_subsets -from ..base_datamodule import BaseDataModule -from . import _maker - -logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") - - -class Fold0Module(BaseDataModule): - def __init__( - self, - train_batch_size=1, - predict_batch_size=1, - drop_incomplete_batch=False, - multiproc_kwargs=None, - ): - super().__init__( - train_batch_size=train_batch_size, - predict_batch_size=predict_batch_size, - drop_incomplete_batch=drop_incomplete_batch, - multiproc_kwargs=multiproc_kwargs, - ) - - def setup(self, stage: str): - self.dataset = _maker("fold_1", RGB=True) - ( - self.train_dataset, - self.validation_dataset, - self.extra_validation_datasets, - self.predict_dataset, - ) = return_subsets(self.dataset) - - -datamodule = Fold0Module diff --git a/src/ptbench/data/tbpoc/fold_2_rgb.py b/src/ptbench/data/tbpoc/fold_2_rgb.py deleted file mode 100644 index 54ad328ef037545af552d33079b0789a65b8c511..0000000000000000000000000000000000000000 --- a/src/ptbench/data/tbpoc/fold_2_rgb.py +++ /dev/null @@ -1,46 +0,0 @@ -# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""TB-POC dataset for TB detection (cross validation fold 2) - -* Split reference: none (stratified kfolding) -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.tbpoc` for dataset details -""" - -from clapper.logging import setup - -from .. import return_subsets -from ..base_datamodule import BaseDataModule -from . import _maker - -logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") - - -class Fold0Module(BaseDataModule): - def __init__( - self, - train_batch_size=1, - predict_batch_size=1, - drop_incomplete_batch=False, - multiproc_kwargs=None, - ): - super().__init__( - train_batch_size=train_batch_size, - predict_batch_size=predict_batch_size, - drop_incomplete_batch=drop_incomplete_batch, - multiproc_kwargs=multiproc_kwargs, - ) - - def setup(self, stage: str): - self.dataset = _maker("fold_2", RGB=True) - ( - self.train_dataset, - self.validation_dataset, - self.extra_validation_datasets, - self.predict_dataset, - ) = return_subsets(self.dataset) - - -datamodule = Fold0Module diff --git a/src/ptbench/data/tbpoc/fold_3_rgb.py b/src/ptbench/data/tbpoc/fold_3_rgb.py deleted file mode 100644 index c17e062502c12a662a2cce5a4db301bbd16f7a35..0000000000000000000000000000000000000000 --- a/src/ptbench/data/tbpoc/fold_3_rgb.py +++ /dev/null @@ -1,46 +0,0 @@ -# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""TB-POC dataset for TB detection (cross validation fold 3) - -* Split reference: none (stratified kfolding) -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.tbpoc` for dataset details -""" - -from clapper.logging import setup - -from .. import return_subsets -from ..base_datamodule import BaseDataModule -from . import _maker - -logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") - - -class Fold0Module(BaseDataModule): - def __init__( - self, - train_batch_size=1, - predict_batch_size=1, - drop_incomplete_batch=False, - multiproc_kwargs=None, - ): - super().__init__( - train_batch_size=train_batch_size, - predict_batch_size=predict_batch_size, - drop_incomplete_batch=drop_incomplete_batch, - multiproc_kwargs=multiproc_kwargs, - ) - - def setup(self, stage: str): - self.dataset = _maker("fold_3", RGB=True) - ( - self.train_dataset, - self.validation_dataset, - self.extra_validation_datasets, - self.predict_dataset, - ) = return_subsets(self.dataset) - - -datamodule = Fold0Module diff --git a/src/ptbench/data/tbpoc/fold_4_rgb.py b/src/ptbench/data/tbpoc/fold_4_rgb.py deleted file mode 100644 index a5f9f1190a64da105d09d61d20d084ba11aae1b9..0000000000000000000000000000000000000000 --- a/src/ptbench/data/tbpoc/fold_4_rgb.py +++ /dev/null @@ -1,46 +0,0 @@ -# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""TB-POC dataset for TB detection (cross validation fold 4) - -* Split reference: none (stratified kfolding) -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.tbpoc` for dataset details -""" - -from clapper.logging import setup - -from .. import return_subsets -from ..base_datamodule import BaseDataModule -from . import _maker - -logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") - - -class Fold0Module(BaseDataModule): - def __init__( - self, - train_batch_size=1, - predict_batch_size=1, - drop_incomplete_batch=False, - multiproc_kwargs=None, - ): - super().__init__( - train_batch_size=train_batch_size, - predict_batch_size=predict_batch_size, - drop_incomplete_batch=drop_incomplete_batch, - multiproc_kwargs=multiproc_kwargs, - ) - - def setup(self, stage: str): - self.dataset = _maker("fold_4", RGB=True) - ( - self.train_dataset, - self.validation_dataset, - self.extra_validation_datasets, - self.predict_dataset, - ) = return_subsets(self.dataset) - - -datamodule = Fold0Module diff --git a/src/ptbench/data/tbpoc/fold_5_rgb.py b/src/ptbench/data/tbpoc/fold_5_rgb.py deleted file mode 100644 index 956c6b9f4eff0e31264b655700ce7ad12d595e1a..0000000000000000000000000000000000000000 --- a/src/ptbench/data/tbpoc/fold_5_rgb.py +++ /dev/null @@ -1,46 +0,0 @@ -# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""TB-POC dataset for TB detection (cross validation fold 5) - -* Split reference: none (stratified kfolding) -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.tbpoc` for dataset details -""" - -from clapper.logging import setup - -from .. import return_subsets -from ..base_datamodule import BaseDataModule -from . import _maker - -logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") - - -class Fold0Module(BaseDataModule): - def __init__( - self, - train_batch_size=1, - predict_batch_size=1, - drop_incomplete_batch=False, - multiproc_kwargs=None, - ): - super().__init__( - train_batch_size=train_batch_size, - predict_batch_size=predict_batch_size, - drop_incomplete_batch=drop_incomplete_batch, - multiproc_kwargs=multiproc_kwargs, - ) - - def setup(self, stage: str): - self.dataset = _maker("fold_5", RGB=True) - ( - self.train_dataset, - self.validation_dataset, - self.extra_validation_datasets, - self.predict_dataset, - ) = return_subsets(self.dataset) - - -datamodule = Fold0Module diff --git a/src/ptbench/data/tbpoc/fold_6_rgb.py b/src/ptbench/data/tbpoc/fold_6_rgb.py deleted file mode 100644 index fd781c26ccf775e79f45754d74066b53ae252095..0000000000000000000000000000000000000000 --- a/src/ptbench/data/tbpoc/fold_6_rgb.py +++ /dev/null @@ -1,46 +0,0 @@ -# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""TB-POC dataset for TB detection (cross validation fold 6) - -* Split reference: none (stratified kfolding) -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.tbpoc` for dataset details -""" - -from clapper.logging import setup - -from .. import return_subsets -from ..base_datamodule import BaseDataModule -from . import _maker - -logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") - - -class Fold0Module(BaseDataModule): - def __init__( - self, - train_batch_size=1, - predict_batch_size=1, - drop_incomplete_batch=False, - multiproc_kwargs=None, - ): - super().__init__( - train_batch_size=train_batch_size, - predict_batch_size=predict_batch_size, - drop_incomplete_batch=drop_incomplete_batch, - multiproc_kwargs=multiproc_kwargs, - ) - - def setup(self, stage: str): - self.dataset = _maker("fold_6", RGB=True) - ( - self.train_dataset, - self.validation_dataset, - self.extra_validation_datasets, - self.predict_dataset, - ) = return_subsets(self.dataset) - - -datamodule = Fold0Module diff --git a/src/ptbench/data/tbpoc/fold_7_rgb.py b/src/ptbench/data/tbpoc/fold_7_rgb.py deleted file mode 100644 index 1d5a1165f7601a7e32f1002ded58ff0c7e2493bb..0000000000000000000000000000000000000000 --- a/src/ptbench/data/tbpoc/fold_7_rgb.py +++ /dev/null @@ -1,46 +0,0 @@ -# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""TB-POC dataset for TB detection (cross validation fold 7) - -* Split reference: none (stratified kfolding) -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.tbpoc` for dataset details -""" - -from clapper.logging import setup - -from .. import return_subsets -from ..base_datamodule import BaseDataModule -from . import _maker - -logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") - - -class Fold0Module(BaseDataModule): - def __init__( - self, - train_batch_size=1, - predict_batch_size=1, - drop_incomplete_batch=False, - multiproc_kwargs=None, - ): - super().__init__( - train_batch_size=train_batch_size, - predict_batch_size=predict_batch_size, - drop_incomplete_batch=drop_incomplete_batch, - multiproc_kwargs=multiproc_kwargs, - ) - - def setup(self, stage: str): - self.dataset = _maker("fold_7", RGB=True) - ( - self.train_dataset, - self.validation_dataset, - self.extra_validation_datasets, - self.predict_dataset, - ) = return_subsets(self.dataset) - - -datamodule = Fold0Module diff --git a/src/ptbench/data/tbpoc/fold_8_rgb.py b/src/ptbench/data/tbpoc/fold_8_rgb.py deleted file mode 100644 index bbbf25feb10e311d60d621d5003a9166f9029596..0000000000000000000000000000000000000000 --- a/src/ptbench/data/tbpoc/fold_8_rgb.py +++ /dev/null @@ -1,46 +0,0 @@ -# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""TB-POC dataset for TB detection (cross validation fold 8) - -* Split reference: none (stratified kfolding) -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.tbpoc` for dataset details -""" - -from clapper.logging import setup - -from .. import return_subsets -from ..base_datamodule import BaseDataModule -from . import _maker - -logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") - - -class Fold0Module(BaseDataModule): - def __init__( - self, - train_batch_size=1, - predict_batch_size=1, - drop_incomplete_batch=False, - multiproc_kwargs=None, - ): - super().__init__( - train_batch_size=train_batch_size, - predict_batch_size=predict_batch_size, - drop_incomplete_batch=drop_incomplete_batch, - multiproc_kwargs=multiproc_kwargs, - ) - - def setup(self, stage: str): - self.dataset = _maker("fold_8", RGB=True) - ( - self.train_dataset, - self.validation_dataset, - self.extra_validation_datasets, - self.predict_dataset, - ) = return_subsets(self.dataset) - - -datamodule = Fold0Module diff --git a/src/ptbench/data/tbpoc/fold_9_rgb.py b/src/ptbench/data/tbpoc/fold_9_rgb.py deleted file mode 100644 index ac17ba4dcd4f141928836f2a570bc39ff777f6b3..0000000000000000000000000000000000000000 --- a/src/ptbench/data/tbpoc/fold_9_rgb.py +++ /dev/null @@ -1,46 +0,0 @@ -# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""TB-POC dataset for TB detection (cross validation fold 9) - -* Split reference: none (stratified kfolding) -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.tbpoc` for dataset details -""" - -from clapper.logging import setup - -from .. import return_subsets -from ..base_datamodule import BaseDataModule -from . import _maker - -logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") - - -class Fold0Module(BaseDataModule): - def __init__( - self, - train_batch_size=1, - predict_batch_size=1, - drop_incomplete_batch=False, - multiproc_kwargs=None, - ): - super().__init__( - train_batch_size=train_batch_size, - predict_batch_size=predict_batch_size, - drop_incomplete_batch=drop_incomplete_batch, - multiproc_kwargs=multiproc_kwargs, - ) - - def setup(self, stage: str): - self.dataset = _maker("fold_9", RGB=True) - ( - self.train_dataset, - self.validation_dataset, - self.extra_validation_datasets, - self.predict_dataset, - ) = return_subsets(self.dataset) - - -datamodule = Fold0Module diff --git a/src/ptbench/data/tbpoc_RS/__init__.py b/src/ptbench/data/tbpoc_RS/__init__.py deleted file mode 100644 index 709a53b4a06c1fcaed5aa280e160204afede2b98..0000000000000000000000000000000000000000 --- a/src/ptbench/data/tbpoc_RS/__init__.py +++ /dev/null @@ -1,51 +0,0 @@ -# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""TB-POC dataset for computer-aided diagnosis. - -* Reference: [TB-POC-2018]_ -* Original resolution (height x width or width x height): 2048 x 2500 -* Split reference: none -* Stratified kfold protocol: - - * Training samples: 72% of TB and healthy CXR (including labels) - * Validation samples: 18% of TB and healthy CXR (including labels) - * Test samples: 10% of TB and healthy CXR (including labels) -""" - -import importlib.resources - -from ..dataset import JSONDataset -from ..loader import make_delayed - -_protocols = [ - importlib.resources.files(__name__).joinpath("fold_0.json.bz2"), - importlib.resources.files(__name__).joinpath("fold_1.json.bz2"), - importlib.resources.files(__name__).joinpath("fold_2.json.bz2"), - importlib.resources.files(__name__).joinpath("fold_3.json.bz2"), - importlib.resources.files(__name__).joinpath("fold_4.json.bz2"), - importlib.resources.files(__name__).joinpath("fold_5.json.bz2"), - importlib.resources.files(__name__).joinpath("fold_6.json.bz2"), - importlib.resources.files(__name__).joinpath("fold_7.json.bz2"), - importlib.resources.files(__name__).joinpath("fold_8.json.bz2"), - importlib.resources.files(__name__).joinpath("fold_9.json.bz2"), -] - - -def _raw_data_loader(sample): - return dict(data=sample["data"], label=sample["label"]) - - -def _loader(context, sample): - # "context" is ignored in this case - database is homogeneous - # we returned delayed samples to avoid loading all images at once - return make_delayed(sample, _raw_data_loader, key=sample["filename"]) - - -dataset = JSONDataset( - protocols=_protocols, - fieldnames=("filename", "label", "data"), - loader=_loader, -) -"""Extended TB-POC dataset object.""" diff --git a/src/ptbench/data/tbpoc_RS/fold_0.json.bz2 b/src/ptbench/data/tbpoc_RS/fold_0.json.bz2 deleted file mode 100644 index 4f0705a54e307220a14ef6f4f940228446a8431c..0000000000000000000000000000000000000000 Binary files a/src/ptbench/data/tbpoc_RS/fold_0.json.bz2 and /dev/null differ diff --git a/src/ptbench/data/tbpoc_RS/fold_1.json.bz2 b/src/ptbench/data/tbpoc_RS/fold_1.json.bz2 deleted file mode 100644 index 8cc4ea183eae2b24d958da23b77d38aab36e7fcf..0000000000000000000000000000000000000000 Binary files a/src/ptbench/data/tbpoc_RS/fold_1.json.bz2 and /dev/null differ diff --git a/src/ptbench/data/tbpoc_RS/fold_2.json.bz2 b/src/ptbench/data/tbpoc_RS/fold_2.json.bz2 deleted file mode 100644 index e9c35e9e941c389d2d49bf48b0b2c4639d90c81c..0000000000000000000000000000000000000000 Binary files a/src/ptbench/data/tbpoc_RS/fold_2.json.bz2 and /dev/null differ diff --git a/src/ptbench/data/tbpoc_RS/fold_3.json.bz2 b/src/ptbench/data/tbpoc_RS/fold_3.json.bz2 deleted file mode 100644 index b83873dbf4b30005860fdaf3a7a98e8b0bf95337..0000000000000000000000000000000000000000 Binary files a/src/ptbench/data/tbpoc_RS/fold_3.json.bz2 and /dev/null differ diff --git a/src/ptbench/data/tbpoc_RS/fold_4.json.bz2 b/src/ptbench/data/tbpoc_RS/fold_4.json.bz2 deleted file mode 100644 index 65ae5ff6e3f658a1f2ee04caede04460a1104535..0000000000000000000000000000000000000000 Binary files a/src/ptbench/data/tbpoc_RS/fold_4.json.bz2 and /dev/null differ diff --git a/src/ptbench/data/tbpoc_RS/fold_5.json.bz2 b/src/ptbench/data/tbpoc_RS/fold_5.json.bz2 deleted file mode 100644 index 1c8da15fcfadbdd3a49a9923081fa4fee61540c6..0000000000000000000000000000000000000000 Binary files a/src/ptbench/data/tbpoc_RS/fold_5.json.bz2 and /dev/null differ diff --git a/src/ptbench/data/tbpoc_RS/fold_6.json.bz2 b/src/ptbench/data/tbpoc_RS/fold_6.json.bz2 deleted file mode 100644 index db56dde9614d9f51f6a95e238fa084252d39d036..0000000000000000000000000000000000000000 Binary files a/src/ptbench/data/tbpoc_RS/fold_6.json.bz2 and /dev/null differ diff --git a/src/ptbench/data/tbpoc_RS/fold_7.json.bz2 b/src/ptbench/data/tbpoc_RS/fold_7.json.bz2 deleted file mode 100644 index 7ab696d89d3f947f24e471b6e1317368da6a2927..0000000000000000000000000000000000000000 Binary files a/src/ptbench/data/tbpoc_RS/fold_7.json.bz2 and /dev/null differ diff --git a/src/ptbench/data/tbpoc_RS/fold_8.json.bz2 b/src/ptbench/data/tbpoc_RS/fold_8.json.bz2 deleted file mode 100644 index 121fee6ae28b6f7199347d6c25115dfed2397adf..0000000000000000000000000000000000000000 Binary files a/src/ptbench/data/tbpoc_RS/fold_8.json.bz2 and /dev/null differ diff --git a/src/ptbench/data/tbpoc_RS/fold_9.json.bz2 b/src/ptbench/data/tbpoc_RS/fold_9.json.bz2 deleted file mode 100644 index 1f9d0e9e00366f8fa8e0b57edd2491501caf4bc3..0000000000000000000000000000000000000000 Binary files a/src/ptbench/data/tbpoc_RS/fold_9.json.bz2 and /dev/null differ diff --git a/src/ptbench/data/tbx11k_simplified/fold_0_rgb.py b/src/ptbench/data/tbx11k_simplified/fold_0_rgb.py deleted file mode 100644 index 349b226e8680ae4e13541381dda768e9e28132a7..0000000000000000000000000000000000000000 --- a/src/ptbench/data/tbx11k_simplified/fold_0_rgb.py +++ /dev/null @@ -1,49 +0,0 @@ -# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""TBX11k simplified dataset for TB detection (default protocol) - -* Split reference: first 62.5% of TB and healthy CXR for "train" 15.9% for -* "validation", 21.6% for "test" -* This split only consists of healthy and active TB samples -* "Latent TB" or "sick & non-TB" samples are not included in this configuration -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.tbx11k_simplified` for dataset details -""" - -from clapper.logging import setup - -from .. import return_subsets -from ..base_datamodule import BaseDataModule -from . import _maker - -logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") - - -class Fold0Module(BaseDataModule): - def __init__( - self, - train_batch_size=1, - predict_batch_size=1, - drop_incomplete_batch=False, - multiproc_kwargs=None, - ): - super().__init__( - train_batch_size=train_batch_size, - predict_batch_size=predict_batch_size, - drop_incomplete_batch=drop_incomplete_batch, - multiproc_kwargs=multiproc_kwargs, - ) - - def setup(self, stage: str): - self.dataset = _maker("fold_0", RGB=True) - ( - self.train_dataset, - self.validation_dataset, - self.extra_validation_datasets, - self.predict_dataset, - ) = return_subsets(self.dataset) - - -datamodule = Fold0Module diff --git a/src/ptbench/data/tbx11k_simplified/fold_1_rgb.py b/src/ptbench/data/tbx11k_simplified/fold_1_rgb.py deleted file mode 100644 index 439dff09c9d8780b11a3369954ea7008a55af78d..0000000000000000000000000000000000000000 --- a/src/ptbench/data/tbx11k_simplified/fold_1_rgb.py +++ /dev/null @@ -1,50 +0,0 @@ -# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""TBX11k simplified dataset for TB detection (default protocol) - -* Split reference: first 62.5% of TB and healthy CXR for "train" 15.9% for -* "validation", 21.6% for "test" -* This split only consists of healthy and active TB samples -* "Latent TB" or "sick & non-TB" samples are not included in this configuration -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.tbx11k_simplified` for dataset details -""" - - -from clapper.logging import setup - -from .. import return_subsets -from ..base_datamodule import BaseDataModule -from . import _maker - -logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") - - -class Fold0Module(BaseDataModule): - def __init__( - self, - train_batch_size=1, - predict_batch_size=1, - drop_incomplete_batch=False, - multiproc_kwargs=None, - ): - super().__init__( - train_batch_size=train_batch_size, - predict_batch_size=predict_batch_size, - drop_incomplete_batch=drop_incomplete_batch, - multiproc_kwargs=multiproc_kwargs, - ) - - def setup(self, stage: str): - self.dataset = _maker("fold_1", RGB=True) - ( - self.train_dataset, - self.validation_dataset, - self.extra_validation_datasets, - self.predict_dataset, - ) = return_subsets(self.dataset) - - -datamodule = Fold0Module diff --git a/src/ptbench/data/tbx11k_simplified/fold_2_rgb.py b/src/ptbench/data/tbx11k_simplified/fold_2_rgb.py deleted file mode 100644 index 60a6322f582365a68f12731f594179153ae94c37..0000000000000000000000000000000000000000 --- a/src/ptbench/data/tbx11k_simplified/fold_2_rgb.py +++ /dev/null @@ -1,49 +0,0 @@ -# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""TBX11k simplified dataset for TB detection (default protocol) - -* Split reference: first 62.5% of TB and healthy CXR for "train" 15.9% for -* "validation", 21.6% for "test" -* This split only consists of healthy and active TB samples -* "Latent TB" or "sick & non-TB" samples are not included in this configuration -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.tbx11k_simplified` for dataset details -""" - -from clapper.logging import setup - -from .. import return_subsets -from ..base_datamodule import BaseDataModule -from . import _maker - -logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") - - -class Fold0Module(BaseDataModule): - def __init__( - self, - train_batch_size=1, - predict_batch_size=1, - drop_incomplete_batch=False, - multiproc_kwargs=None, - ): - super().__init__( - train_batch_size=train_batch_size, - predict_batch_size=predict_batch_size, - drop_incomplete_batch=drop_incomplete_batch, - multiproc_kwargs=multiproc_kwargs, - ) - - def setup(self, stage: str): - self.dataset = _maker("fold_2", RGB=True) - ( - self.train_dataset, - self.validation_dataset, - self.extra_validation_datasets, - self.predict_dataset, - ) = return_subsets(self.dataset) - - -datamodule = Fold0Module diff --git a/src/ptbench/data/tbx11k_simplified/fold_3_rgb.py b/src/ptbench/data/tbx11k_simplified/fold_3_rgb.py deleted file mode 100644 index 3a2975ed639e59a6f7e778e1744185303824a98f..0000000000000000000000000000000000000000 --- a/src/ptbench/data/tbx11k_simplified/fold_3_rgb.py +++ /dev/null @@ -1,49 +0,0 @@ -# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""TBX11k simplified dataset for TB detection (default protocol) - -* Split reference: first 62.5% of TB and healthy CXR for "train" 15.9% for -* "validation", 21.6% for "test" -* This split only consists of healthy and active TB samples -* "Latent TB" or "sick & non-TB" samples are not included in this configuration -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.tbx11k_simplified` for dataset details -""" - -from clapper.logging import setup - -from .. import return_subsets -from ..base_datamodule import BaseDataModule -from . import _maker - -logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") - - -class Fold0Module(BaseDataModule): - def __init__( - self, - train_batch_size=1, - predict_batch_size=1, - drop_incomplete_batch=False, - multiproc_kwargs=None, - ): - super().__init__( - train_batch_size=train_batch_size, - predict_batch_size=predict_batch_size, - drop_incomplete_batch=drop_incomplete_batch, - multiproc_kwargs=multiproc_kwargs, - ) - - def setup(self, stage: str): - self.dataset = _maker("fold_3", RGB=True) - ( - self.train_dataset, - self.validation_dataset, - self.extra_validation_datasets, - self.predict_dataset, - ) = return_subsets(self.dataset) - - -datamodule = Fold0Module diff --git a/src/ptbench/data/tbx11k_simplified/fold_4_rgb.py b/src/ptbench/data/tbx11k_simplified/fold_4_rgb.py deleted file mode 100644 index 021aa7a740b4aaab6eb267c2549ed50be96cf384..0000000000000000000000000000000000000000 --- a/src/ptbench/data/tbx11k_simplified/fold_4_rgb.py +++ /dev/null @@ -1,49 +0,0 @@ -# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""TBX11k simplified dataset for TB detection (default protocol) - -* Split reference: first 62.5% of TB and healthy CXR for "train" 15.9% for -* "validation", 21.6% for "test" -* This split only consists of healthy and active TB samples -* "Latent TB" or "sick & non-TB" samples are not included in this configuration -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.tbx11k_simplified` for dataset details -""" - -from clapper.logging import setup - -from .. import return_subsets -from ..base_datamodule import BaseDataModule -from . import _maker - -logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") - - -class Fold0Module(BaseDataModule): - def __init__( - self, - train_batch_size=1, - predict_batch_size=1, - drop_incomplete_batch=False, - multiproc_kwargs=None, - ): - super().__init__( - train_batch_size=train_batch_size, - predict_batch_size=predict_batch_size, - drop_incomplete_batch=drop_incomplete_batch, - multiproc_kwargs=multiproc_kwargs, - ) - - def setup(self, stage: str): - self.dataset = _maker("fold_4", RGB=True) - ( - self.train_dataset, - self.validation_dataset, - self.extra_validation_datasets, - self.predict_dataset, - ) = return_subsets(self.dataset) - - -datamodule = Fold0Module diff --git a/src/ptbench/data/tbx11k_simplified/fold_5_rgb.py b/src/ptbench/data/tbx11k_simplified/fold_5_rgb.py deleted file mode 100644 index 07e4209ecadeeba09c7d9a67006678d6c1fe4ecf..0000000000000000000000000000000000000000 --- a/src/ptbench/data/tbx11k_simplified/fold_5_rgb.py +++ /dev/null @@ -1,49 +0,0 @@ -# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""TBX11k simplified dataset for TB detection (default protocol) - -* Split reference: first 62.5% of TB and healthy CXR for "train" 15.9% for -* "validation", 21.6% for "test" -* This split only consists of healthy and active TB samples -* "Latent TB" or "sick & non-TB" samples are not included in this configuration -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.tbx11k_simplified` for dataset details -""" - -from clapper.logging import setup - -from .. import return_subsets -from ..base_datamodule import BaseDataModule -from . import _maker - -logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") - - -class Fold0Module(BaseDataModule): - def __init__( - self, - train_batch_size=1, - predict_batch_size=1, - drop_incomplete_batch=False, - multiproc_kwargs=None, - ): - super().__init__( - train_batch_size=train_batch_size, - predict_batch_size=predict_batch_size, - drop_incomplete_batch=drop_incomplete_batch, - multiproc_kwargs=multiproc_kwargs, - ) - - def setup(self, stage: str): - self.dataset = _maker("fold_5", RGB=True) - ( - self.train_dataset, - self.validation_dataset, - self.extra_validation_datasets, - self.predict_dataset, - ) = return_subsets(self.dataset) - - -datamodule = Fold0Module diff --git a/src/ptbench/data/tbx11k_simplified/fold_6_rgb.py b/src/ptbench/data/tbx11k_simplified/fold_6_rgb.py deleted file mode 100644 index e4e2b1fc6e9d83b39acc2e5a378872ac75b60978..0000000000000000000000000000000000000000 --- a/src/ptbench/data/tbx11k_simplified/fold_6_rgb.py +++ /dev/null @@ -1,49 +0,0 @@ -# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""TBX11k simplified dataset for TB detection (default protocol) - -* Split reference: first 62.5% of TB and healthy CXR for "train" 15.9% for -* "validation", 21.6% for "test" -* This split only consists of healthy and active TB samples -* "Latent TB" or "sick & non-TB" samples are not included in this configuration -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.tbx11k_simplified` for dataset details -""" - -from clapper.logging import setup - -from .. import return_subsets -from ..base_datamodule import BaseDataModule -from . import _maker - -logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") - - -class Fold0Module(BaseDataModule): - def __init__( - self, - train_batch_size=1, - predict_batch_size=1, - drop_incomplete_batch=False, - multiproc_kwargs=None, - ): - super().__init__( - train_batch_size=train_batch_size, - predict_batch_size=predict_batch_size, - drop_incomplete_batch=drop_incomplete_batch, - multiproc_kwargs=multiproc_kwargs, - ) - - def setup(self, stage: str): - self.dataset = _maker("fold_6", RGB=True) - ( - self.train_dataset, - self.validation_dataset, - self.extra_validation_datasets, - self.predict_dataset, - ) = return_subsets(self.dataset) - - -datamodule = Fold0Module diff --git a/src/ptbench/data/tbx11k_simplified/fold_7_rgb.py b/src/ptbench/data/tbx11k_simplified/fold_7_rgb.py deleted file mode 100644 index 0412530eb47a1319cdee0c2bb2386267265e6676..0000000000000000000000000000000000000000 --- a/src/ptbench/data/tbx11k_simplified/fold_7_rgb.py +++ /dev/null @@ -1,49 +0,0 @@ -# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""TBX11k simplified dataset for TB detection (default protocol) - -* Split reference: first 62.5% of TB and healthy CXR for "train" 15.9% for -* "validation", 21.6% for "test" -* This split only consists of healthy and active TB samples -* "Latent TB" or "sick & non-TB" samples are not included in this configuration -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.tbx11k_simplified` for dataset details -""" - -from clapper.logging import setup - -from .. import return_subsets -from ..base_datamodule import BaseDataModule -from . import _maker - -logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") - - -class Fold0Module(BaseDataModule): - def __init__( - self, - train_batch_size=1, - predict_batch_size=1, - drop_incomplete_batch=False, - multiproc_kwargs=None, - ): - super().__init__( - train_batch_size=train_batch_size, - predict_batch_size=predict_batch_size, - drop_incomplete_batch=drop_incomplete_batch, - multiproc_kwargs=multiproc_kwargs, - ) - - def setup(self, stage: str): - self.dataset = _maker("fold_7", RGB=True) - ( - self.train_dataset, - self.validation_dataset, - self.extra_validation_datasets, - self.predict_dataset, - ) = return_subsets(self.dataset) - - -datamodule = Fold0Module diff --git a/src/ptbench/data/tbx11k_simplified/fold_8_rgb.py b/src/ptbench/data/tbx11k_simplified/fold_8_rgb.py deleted file mode 100644 index c88b51d94d03d40b60108c0e390b96740af55e64..0000000000000000000000000000000000000000 --- a/src/ptbench/data/tbx11k_simplified/fold_8_rgb.py +++ /dev/null @@ -1,49 +0,0 @@ -# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""TBX11k simplified dataset for TB detection (default protocol) - -* Split reference: first 62.5% of TB and healthy CXR for "train" 15.9% for -* "validation", 21.6% for "test" -* This split only consists of healthy and active TB samples -* "Latent TB" or "sick & non-TB" samples are not included in this configuration -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.tbx11k_simplified` for dataset details -""" - -from clapper.logging import setup - -from .. import return_subsets -from ..base_datamodule import BaseDataModule -from . import _maker - -logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") - - -class Fold0Module(BaseDataModule): - def __init__( - self, - train_batch_size=1, - predict_batch_size=1, - drop_incomplete_batch=False, - multiproc_kwargs=None, - ): - super().__init__( - train_batch_size=train_batch_size, - predict_batch_size=predict_batch_size, - drop_incomplete_batch=drop_incomplete_batch, - multiproc_kwargs=multiproc_kwargs, - ) - - def setup(self, stage: str): - self.dataset = _maker("fold_8", RGB=True) - ( - self.train_dataset, - self.validation_dataset, - self.extra_validation_datasets, - self.predict_dataset, - ) = return_subsets(self.dataset) - - -datamodule = Fold0Module diff --git a/src/ptbench/data/tbx11k_simplified/fold_9_rgb.py b/src/ptbench/data/tbx11k_simplified/fold_9_rgb.py deleted file mode 100644 index 20cb6d49a5bb3275728e964b0d72f44c57443d59..0000000000000000000000000000000000000000 --- a/src/ptbench/data/tbx11k_simplified/fold_9_rgb.py +++ /dev/null @@ -1,49 +0,0 @@ -# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""TBX11k simplified dataset for TB detection (default protocol) - -* Split reference: first 62.5% of TB and healthy CXR for "train" 15.9% for -* "validation", 21.6% for "test" -* This split only consists of healthy and active TB samples -* "Latent TB" or "sick & non-TB" samples are not included in this configuration -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.tbx11k_simplified` for dataset details -""" - -from clapper.logging import setup - -from .. import return_subsets -from ..base_datamodule import BaseDataModule -from . import _maker - -logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") - - -class Fold0Module(BaseDataModule): - def __init__( - self, - train_batch_size=1, - predict_batch_size=1, - drop_incomplete_batch=False, - multiproc_kwargs=None, - ): - super().__init__( - train_batch_size=train_batch_size, - predict_batch_size=predict_batch_size, - drop_incomplete_batch=drop_incomplete_batch, - multiproc_kwargs=multiproc_kwargs, - ) - - def setup(self, stage: str): - self.dataset = _maker("fold_9", RGB=True) - ( - self.train_dataset, - self.validation_dataset, - self.extra_validation_datasets, - self.predict_dataset, - ) = return_subsets(self.dataset) - - -datamodule = Fold0Module diff --git a/src/ptbench/data/tbx11k_simplified_RS/__init__.py b/src/ptbench/data/tbx11k_simplified_RS/__init__.py deleted file mode 100644 index 8598a8c5b1a11f1a8a1866e5e4d04b83c27925ff..0000000000000000000000000000000000000000 --- a/src/ptbench/data/tbx11k_simplified_RS/__init__.py +++ /dev/null @@ -1,62 +0,0 @@ -# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""Extended TBX11K simplified dataset for computer-aided diagnosis (extended -with DensenetRS predictions) - -The TBX11K database has been established to foster research -in computer-aided diagnosis of pulmonary diseases with a special -focus on tuberculosis (aTB). The dataset was specifically -designed to be used with CNNs. It contains 11,000 chest X-ray -images, each of a unique patient. They were labeled by expert -radiologists with 5 - 10+ years of experience. Possible labels -are: "healthy", "active TB", "latent TB", and "sick & non-tb". -The version of the dataset used in this benchmark is a simplified. - -* Reference: [TBX11K-SIMPLIFIED-2020]_ -* Original (released) resolution (height x width or width x height): 512 x 512 -* Split reference: none -* Protocol ``default``: - - * Training samples: 62.5% of TB and healthy CXR (including labels) - * Validation samples: 15.9% of TB and healthy CXR (including labels) - * Test samples: 21.6% of TB and healthy CXR (including labels) -""" - -import importlib.resources - -from ..dataset import JSONDataset -from ..loader import make_delayed - -_protocols = [ - importlib.resources.files(__name__).joinpath("default.json.bz2"), - importlib.resources.files(__name__).joinpath("fold_0.json.bz2"), - importlib.resources.files(__name__).joinpath("fold_1.json.bz2"), - importlib.resources.files(__name__).joinpath("fold_2.json.bz2"), - importlib.resources.files(__name__).joinpath("fold_3.json.bz2"), - importlib.resources.files(__name__).joinpath("fold_4.json.bz2"), - importlib.resources.files(__name__).joinpath("fold_5.json.bz2"), - importlib.resources.files(__name__).joinpath("fold_6.json.bz2"), - importlib.resources.files(__name__).joinpath("fold_7.json.bz2"), - importlib.resources.files(__name__).joinpath("fold_8.json.bz2"), - importlib.resources.files(__name__).joinpath("fold_9.json.bz2"), -] - - -def _raw_data_loader(sample): - return dict(data=sample["data"], label=sample["label"]) - - -def _loader(context, sample): - # "context" is ignored in this case - database is homogeneous - # we returned delayed samples to avoid loading all images at once - return make_delayed(sample, _raw_data_loader, key=sample["filename"]) - - -dataset = JSONDataset( - protocols=_protocols, - fieldnames=("filename", "label", "data"), - loader=_loader, -) -"""Extended TBX11K simplified dataset object.""" diff --git a/src/ptbench/data/tbx11k_simplified_RS/default.json.bz2 b/src/ptbench/data/tbx11k_simplified_RS/default.json.bz2 deleted file mode 100644 index 5c192dec44e60c3fb36606cd60d0fa3505d8a96b..0000000000000000000000000000000000000000 Binary files a/src/ptbench/data/tbx11k_simplified_RS/default.json.bz2 and /dev/null differ diff --git a/src/ptbench/data/tbx11k_simplified_RS/fold_0.json.bz2 b/src/ptbench/data/tbx11k_simplified_RS/fold_0.json.bz2 deleted file mode 100644 index 4c5f8107c702ece1c017efdbea53de24f4a635c8..0000000000000000000000000000000000000000 Binary files a/src/ptbench/data/tbx11k_simplified_RS/fold_0.json.bz2 and /dev/null differ diff --git a/src/ptbench/data/tbx11k_simplified_RS/fold_1.json.bz2 b/src/ptbench/data/tbx11k_simplified_RS/fold_1.json.bz2 deleted file mode 100644 index 649066593b98927a38a29a4d9fedb29a8e6f74fc..0000000000000000000000000000000000000000 Binary files a/src/ptbench/data/tbx11k_simplified_RS/fold_1.json.bz2 and /dev/null differ diff --git a/src/ptbench/data/tbx11k_simplified_RS/fold_2.json.bz2 b/src/ptbench/data/tbx11k_simplified_RS/fold_2.json.bz2 deleted file mode 100644 index 1d92d9b8ac56fd1d6ac7e652f586563e95d74ada..0000000000000000000000000000000000000000 Binary files a/src/ptbench/data/tbx11k_simplified_RS/fold_2.json.bz2 and /dev/null differ diff --git a/src/ptbench/data/tbx11k_simplified_RS/fold_3.json.bz2 b/src/ptbench/data/tbx11k_simplified_RS/fold_3.json.bz2 deleted file mode 100644 index ec4b689bebf364d8c95851a2e9887aee72ee63a7..0000000000000000000000000000000000000000 Binary files a/src/ptbench/data/tbx11k_simplified_RS/fold_3.json.bz2 and /dev/null differ diff --git a/src/ptbench/data/tbx11k_simplified_RS/fold_4.json.bz2 b/src/ptbench/data/tbx11k_simplified_RS/fold_4.json.bz2 deleted file mode 100644 index 490352c09dae504c2a60c9d662ca4c8d7692b29c..0000000000000000000000000000000000000000 Binary files a/src/ptbench/data/tbx11k_simplified_RS/fold_4.json.bz2 and /dev/null differ diff --git a/src/ptbench/data/tbx11k_simplified_RS/fold_5.json.bz2 b/src/ptbench/data/tbx11k_simplified_RS/fold_5.json.bz2 deleted file mode 100644 index d20b871321b8445588a561ce833d9ca03ce925de..0000000000000000000000000000000000000000 Binary files a/src/ptbench/data/tbx11k_simplified_RS/fold_5.json.bz2 and /dev/null differ diff --git a/src/ptbench/data/tbx11k_simplified_RS/fold_6.json.bz2 b/src/ptbench/data/tbx11k_simplified_RS/fold_6.json.bz2 deleted file mode 100644 index 8eb8360de34f36258ed341f0bb3e89c0bc4b966d..0000000000000000000000000000000000000000 Binary files a/src/ptbench/data/tbx11k_simplified_RS/fold_6.json.bz2 and /dev/null differ diff --git a/src/ptbench/data/tbx11k_simplified_RS/fold_7.json.bz2 b/src/ptbench/data/tbx11k_simplified_RS/fold_7.json.bz2 deleted file mode 100644 index 3c9753344bd607e9dda547eb8c5bf4cf5536b4f3..0000000000000000000000000000000000000000 Binary files a/src/ptbench/data/tbx11k_simplified_RS/fold_7.json.bz2 and /dev/null differ diff --git a/src/ptbench/data/tbx11k_simplified_RS/fold_8.json.bz2 b/src/ptbench/data/tbx11k_simplified_RS/fold_8.json.bz2 deleted file mode 100644 index 433d4de4b5805dba15aed5d036ee8558b0249226..0000000000000000000000000000000000000000 Binary files a/src/ptbench/data/tbx11k_simplified_RS/fold_8.json.bz2 and /dev/null differ diff --git a/src/ptbench/data/tbx11k_simplified_RS/fold_9.json.bz2 b/src/ptbench/data/tbx11k_simplified_RS/fold_9.json.bz2 deleted file mode 100644 index 824ea8aafaf7198c85726cb52d14243ee9267f00..0000000000000000000000000000000000000000 Binary files a/src/ptbench/data/tbx11k_simplified_RS/fold_9.json.bz2 and /dev/null differ diff --git a/src/ptbench/data/tbx11k_simplified_v2/fold_0_rgb.py b/src/ptbench/data/tbx11k_simplified_v2/fold_0_rgb.py deleted file mode 100644 index 1ba0e2ac26cc63843b90d32b011a0eefc6a6d364..0000000000000000000000000000000000000000 --- a/src/ptbench/data/tbx11k_simplified_v2/fold_0_rgb.py +++ /dev/null @@ -1,49 +0,0 @@ -# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""TBX11k simplified dataset for TB detection (cross validation fold 0, RGB) - -* Split reference: first 62.6% of CXR for "train", 16% for "validation", -* 21.4% for "test" -* This split consists of non-TB and active TB samples -* "healthy", "latent TB", and "sick & non-TB" samples are all merged under the label "non-TB" -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.tbx11k_simplified_v2` for dataset details -""" - -from clapper.logging import setup - -from .. import return_subsets -from ..base_datamodule import BaseDataModule -from . import _maker - -logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") - - -class Fold0Module(BaseDataModule): - def __init__( - self, - train_batch_size=1, - predict_batch_size=1, - drop_incomplete_batch=False, - multiproc_kwargs=None, - ): - super().__init__( - train_batch_size=train_batch_size, - predict_batch_size=predict_batch_size, - drop_incomplete_batch=drop_incomplete_batch, - multiproc_kwargs=multiproc_kwargs, - ) - - def setup(self, stage: str): - self.dataset = _maker("fold_0", RGB=True) - ( - self.train_dataset, - self.validation_dataset, - self.extra_validation_datasets, - self.predict_dataset, - ) = return_subsets(self.dataset) - - -datamodule = Fold0Module diff --git a/src/ptbench/data/tbx11k_simplified_v2/fold_1_rgb.py b/src/ptbench/data/tbx11k_simplified_v2/fold_1_rgb.py deleted file mode 100644 index c543f62ef12c9bff50b1e457ea0aef2fde40f3f3..0000000000000000000000000000000000000000 --- a/src/ptbench/data/tbx11k_simplified_v2/fold_1_rgb.py +++ /dev/null @@ -1,49 +0,0 @@ -# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""TBX11k simplified dataset for TB detection (cross validation fold 1, RGB) - -* Split reference: first 62.6% of CXR for "train", 16% for "validation", -* 21.4% for "test" -* This split consists of non-TB and active TB samples -* "healthy", "latent TB", and "sick & non-TB" samples are all merged under the label "non-TB" -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.tbx11k_simplified_v2` for dataset details -""" - -from clapper.logging import setup - -from .. import return_subsets -from ..base_datamodule import BaseDataModule -from . import _maker - -logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") - - -class Fold0Module(BaseDataModule): - def __init__( - self, - train_batch_size=1, - predict_batch_size=1, - drop_incomplete_batch=False, - multiproc_kwargs=None, - ): - super().__init__( - train_batch_size=train_batch_size, - predict_batch_size=predict_batch_size, - drop_incomplete_batch=drop_incomplete_batch, - multiproc_kwargs=multiproc_kwargs, - ) - - def setup(self, stage: str): - self.dataset = _maker("fold_1", RGB=True) - ( - self.train_dataset, - self.validation_dataset, - self.extra_validation_datasets, - self.predict_dataset, - ) = return_subsets(self.dataset) - - -datamodule = Fold0Module diff --git a/src/ptbench/data/tbx11k_simplified_v2/fold_2_rgb.py b/src/ptbench/data/tbx11k_simplified_v2/fold_2_rgb.py deleted file mode 100644 index 39ba5a97cb95826b706e36cc356995ba5ecd95a4..0000000000000000000000000000000000000000 --- a/src/ptbench/data/tbx11k_simplified_v2/fold_2_rgb.py +++ /dev/null @@ -1,49 +0,0 @@ -# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""TBX11k simplified dataset for TB detection (cross validation fold 2, RGB) - -* Split reference: first 62.6% of CXR for "train", 16% for "validation", -* 21.4% for "test" -* This split consists of non-TB and active TB samples -* "healthy", "latent TB", and "sick & non-TB" samples are all merged under the label "non-TB" -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.tbx11k_simplified_v2` for dataset details -""" - -from clapper.logging import setup - -from .. import return_subsets -from ..base_datamodule import BaseDataModule -from . import _maker - -logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") - - -class Fold0Module(BaseDataModule): - def __init__( - self, - train_batch_size=1, - predict_batch_size=1, - drop_incomplete_batch=False, - multiproc_kwargs=None, - ): - super().__init__( - train_batch_size=train_batch_size, - predict_batch_size=predict_batch_size, - drop_incomplete_batch=drop_incomplete_batch, - multiproc_kwargs=multiproc_kwargs, - ) - - def setup(self, stage: str): - self.dataset = _maker("fold_2", RGB=True) - ( - self.train_dataset, - self.validation_dataset, - self.extra_validation_datasets, - self.predict_dataset, - ) = return_subsets(self.dataset) - - -datamodule = Fold0Module diff --git a/src/ptbench/data/tbx11k_simplified_v2/fold_3_rgb.py b/src/ptbench/data/tbx11k_simplified_v2/fold_3_rgb.py deleted file mode 100644 index 0aaab4af498453e58fec75a4dc887173c1494d46..0000000000000000000000000000000000000000 --- a/src/ptbench/data/tbx11k_simplified_v2/fold_3_rgb.py +++ /dev/null @@ -1,49 +0,0 @@ -# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""TBX11k simplified dataset for TB detection (cross validation fold 3, RGB) - -* Split reference: first 62.6% of CXR for "train", 16% for "validation", -* 21.4% for "test" -* This split consists of non-TB and active TB samples -* "healthy", "latent TB", and "sick & non-TB" samples are all merged under the label "non-TB" -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.tbx11k_simplified_v2` for dataset details -""" - -from clapper.logging import setup - -from .. import return_subsets -from ..base_datamodule import BaseDataModule -from . import _maker - -logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") - - -class Fold0Module(BaseDataModule): - def __init__( - self, - train_batch_size=1, - predict_batch_size=1, - drop_incomplete_batch=False, - multiproc_kwargs=None, - ): - super().__init__( - train_batch_size=train_batch_size, - predict_batch_size=predict_batch_size, - drop_incomplete_batch=drop_incomplete_batch, - multiproc_kwargs=multiproc_kwargs, - ) - - def setup(self, stage: str): - self.dataset = _maker("fold_3", RGB=True) - ( - self.train_dataset, - self.validation_dataset, - self.extra_validation_datasets, - self.predict_dataset, - ) = return_subsets(self.dataset) - - -datamodule = Fold0Module diff --git a/src/ptbench/data/tbx11k_simplified_v2/fold_4_rgb.py b/src/ptbench/data/tbx11k_simplified_v2/fold_4_rgb.py deleted file mode 100644 index b53968c6cc96d0735100a4928c1ce3d0e2ad51d3..0000000000000000000000000000000000000000 --- a/src/ptbench/data/tbx11k_simplified_v2/fold_4_rgb.py +++ /dev/null @@ -1,49 +0,0 @@ -# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""TBX11k simplified dataset for TB detection (cross validation fold 4, RGB) - -* Split reference: first 62.6% of CXR for "train", 16% for "validation", -* 21.4% for "test" -* This split consists of non-TB and active TB samples -* "healthy", "latent TB", and "sick & non-TB" samples are all merged under the label "non-TB" -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.tbx11k_simplified_v2` for dataset details -""" - -from clapper.logging import setup - -from .. import return_subsets -from ..base_datamodule import BaseDataModule -from . import _maker - -logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") - - -class Fold0Module(BaseDataModule): - def __init__( - self, - train_batch_size=1, - predict_batch_size=1, - drop_incomplete_batch=False, - multiproc_kwargs=None, - ): - super().__init__( - train_batch_size=train_batch_size, - predict_batch_size=predict_batch_size, - drop_incomplete_batch=drop_incomplete_batch, - multiproc_kwargs=multiproc_kwargs, - ) - - def setup(self, stage: str): - self.dataset = _maker("fold_4", RGB=True) - ( - self.train_dataset, - self.validation_dataset, - self.extra_validation_datasets, - self.predict_dataset, - ) = return_subsets(self.dataset) - - -datamodule = Fold0Module diff --git a/src/ptbench/data/tbx11k_simplified_v2/fold_5_rgb.py b/src/ptbench/data/tbx11k_simplified_v2/fold_5_rgb.py deleted file mode 100644 index a7ae16472f80f32b83dc31e8ffb9ed746dd0ed78..0000000000000000000000000000000000000000 --- a/src/ptbench/data/tbx11k_simplified_v2/fold_5_rgb.py +++ /dev/null @@ -1,49 +0,0 @@ -# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""TBX11k simplified dataset for TB detection (cross validation fold 5, RGB) - -* Split reference: first 62.6% of CXR for "train", 16% for "validation", -* 21.4% for "test" -* This split consists of non-TB and active TB samples -* "healthy", "latent TB", and "sick & non-TB" samples are all merged under the label "non-TB" -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.tbx11k_simplified_v2` for dataset details -""" - -from clapper.logging import setup - -from .. import return_subsets -from ..base_datamodule import BaseDataModule -from . import _maker - -logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") - - -class Fold0Module(BaseDataModule): - def __init__( - self, - train_batch_size=1, - predict_batch_size=1, - drop_incomplete_batch=False, - multiproc_kwargs=None, - ): - super().__init__( - train_batch_size=train_batch_size, - predict_batch_size=predict_batch_size, - drop_incomplete_batch=drop_incomplete_batch, - multiproc_kwargs=multiproc_kwargs, - ) - - def setup(self, stage: str): - self.dataset = _maker("fold_5", RGB=True) - ( - self.train_dataset, - self.validation_dataset, - self.extra_validation_datasets, - self.predict_dataset, - ) = return_subsets(self.dataset) - - -datamodule = Fold0Module diff --git a/src/ptbench/data/tbx11k_simplified_v2/fold_6_rgb.py b/src/ptbench/data/tbx11k_simplified_v2/fold_6_rgb.py deleted file mode 100644 index e46d97d57af3f676a6f390281f9d5e3cba8b8dbf..0000000000000000000000000000000000000000 --- a/src/ptbench/data/tbx11k_simplified_v2/fold_6_rgb.py +++ /dev/null @@ -1,49 +0,0 @@ -# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""TBX11k simplified dataset for TB detection (cross validation fold 6, RGB) - -* Split reference: first 62.6% of CXR for "train", 16% for "validation", -* 21.4% for "test" -* This split consists of non-TB and active TB samples -* "healthy", "latent TB", and "sick & non-TB" samples are all merged under the label "non-TB" -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.tbx11k_simplified_v2` for dataset details -""" - -from clapper.logging import setup - -from .. import return_subsets -from ..base_datamodule import BaseDataModule -from . import _maker - -logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") - - -class Fold0Module(BaseDataModule): - def __init__( - self, - train_batch_size=1, - predict_batch_size=1, - drop_incomplete_batch=False, - multiproc_kwargs=None, - ): - super().__init__( - train_batch_size=train_batch_size, - predict_batch_size=predict_batch_size, - drop_incomplete_batch=drop_incomplete_batch, - multiproc_kwargs=multiproc_kwargs, - ) - - def setup(self, stage: str): - self.dataset = _maker("fold_6", RGB=True) - ( - self.train_dataset, - self.validation_dataset, - self.extra_validation_datasets, - self.predict_dataset, - ) = return_subsets(self.dataset) - - -datamodule = Fold0Module diff --git a/src/ptbench/data/tbx11k_simplified_v2/fold_7_rgb.py b/src/ptbench/data/tbx11k_simplified_v2/fold_7_rgb.py deleted file mode 100644 index 4c595e735fb47d63dacefc6b9eb4f3ef7fe9a9fc..0000000000000000000000000000000000000000 --- a/src/ptbench/data/tbx11k_simplified_v2/fold_7_rgb.py +++ /dev/null @@ -1,49 +0,0 @@ -# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""TBX11k simplified dataset for TB detection (cross validation fold 7, RGB) - -* Split reference: first 62.6% of CXR for "train", 16% for "validation", -* 21.4% for "test" -* This split consists of non-TB and active TB samples -* "healthy", "latent TB", and "sick & non-TB" samples are all merged under the label "non-TB" -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.tbx11k_simplified_v2` for dataset details -""" - -from clapper.logging import setup - -from .. import return_subsets -from ..base_datamodule import BaseDataModule -from . import _maker - -logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") - - -class Fold0Module(BaseDataModule): - def __init__( - self, - train_batch_size=1, - predict_batch_size=1, - drop_incomplete_batch=False, - multiproc_kwargs=None, - ): - super().__init__( - train_batch_size=train_batch_size, - predict_batch_size=predict_batch_size, - drop_incomplete_batch=drop_incomplete_batch, - multiproc_kwargs=multiproc_kwargs, - ) - - def setup(self, stage: str): - self.dataset = _maker("fold_7", RGB=True) - ( - self.train_dataset, - self.validation_dataset, - self.extra_validation_datasets, - self.predict_dataset, - ) = return_subsets(self.dataset) - - -datamodule = Fold0Module diff --git a/src/ptbench/data/tbx11k_simplified_v2/fold_8_rgb.py b/src/ptbench/data/tbx11k_simplified_v2/fold_8_rgb.py deleted file mode 100644 index 70510ce5ee8c87cdff8190c0be875dd84e3d385c..0000000000000000000000000000000000000000 --- a/src/ptbench/data/tbx11k_simplified_v2/fold_8_rgb.py +++ /dev/null @@ -1,49 +0,0 @@ -# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""TBX11k simplified dataset for TB detection (cross validation fold 8, RGB) - -* Split reference: first 62.6% of CXR for "train", 16% for "validation", -* 21.4% for "test" -* This split consists of non-TB and active TB samples -* "healthy", "latent TB", and "sick & non-TB" samples are all merged under the label "non-TB" -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.tbx11k_simplified_v2` for dataset details -""" - -from clapper.logging import setup - -from .. import return_subsets -from ..base_datamodule import BaseDataModule -from . import _maker - -logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") - - -class Fold0Module(BaseDataModule): - def __init__( - self, - train_batch_size=1, - predict_batch_size=1, - drop_incomplete_batch=False, - multiproc_kwargs=None, - ): - super().__init__( - train_batch_size=train_batch_size, - predict_batch_size=predict_batch_size, - drop_incomplete_batch=drop_incomplete_batch, - multiproc_kwargs=multiproc_kwargs, - ) - - def setup(self, stage: str): - self.dataset = _maker("fold_8", RGB=True) - ( - self.train_dataset, - self.validation_dataset, - self.extra_validation_datasets, - self.predict_dataset, - ) = return_subsets(self.dataset) - - -datamodule = Fold0Module diff --git a/src/ptbench/data/tbx11k_simplified_v2/fold_9_rgb.py b/src/ptbench/data/tbx11k_simplified_v2/fold_9_rgb.py deleted file mode 100644 index 271e33480a84ff839cc997e3ae5b881e0595ae68..0000000000000000000000000000000000000000 --- a/src/ptbench/data/tbx11k_simplified_v2/fold_9_rgb.py +++ /dev/null @@ -1,49 +0,0 @@ -# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""TBX11k simplified dataset for TB detection (cross validation fold 9, RGB) - -* Split reference: first 62.6% of CXR for "train", 16% for "validation", -* 21.4% for "test" -* This split consists of non-TB and active TB samples -* "healthy", "latent TB", and "sick & non-TB" samples are all merged under the label "non-TB" -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.tbx11k_simplified_v2` for dataset details -""" - -from clapper.logging import setup - -from .. import return_subsets -from ..base_datamodule import BaseDataModule -from . import _maker - -logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") - - -class Fold0Module(BaseDataModule): - def __init__( - self, - train_batch_size=1, - predict_batch_size=1, - drop_incomplete_batch=False, - multiproc_kwargs=None, - ): - super().__init__( - train_batch_size=train_batch_size, - predict_batch_size=predict_batch_size, - drop_incomplete_batch=drop_incomplete_batch, - multiproc_kwargs=multiproc_kwargs, - ) - - def setup(self, stage: str): - self.dataset = _maker("fold_9", RGB=True) - ( - self.train_dataset, - self.validation_dataset, - self.extra_validation_datasets, - self.predict_dataset, - ) = return_subsets(self.dataset) - - -datamodule = Fold0Module diff --git a/src/ptbench/data/tbx11k_simplified_v2_RS/__init__.py b/src/ptbench/data/tbx11k_simplified_v2_RS/__init__.py deleted file mode 100644 index f2bcf8c2a53cd117f556c5854774d5ea81fb2f6c..0000000000000000000000000000000000000000 --- a/src/ptbench/data/tbx11k_simplified_v2_RS/__init__.py +++ /dev/null @@ -1,63 +0,0 @@ -# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""Extended TBX11K simplified dataset for computer-aided diagnosis (extended -with DensenetRS predictions) - -The TBX11K database has been established to foster research -in computer-aided diagnosis of pulmonary diseases with a special -focus on tuberculosis (aTB). The dataset was specifically -designed to be used with CNNs. It contains 11,000 chest X-ray -images, each of a unique patient. They were labeled by expert -radiologists with 5 - 10+ years of experience. Possible labels -are: "healthy", "active TB", "latent TB", and "sick & non-tb". -The version of the dataset used in this benchmark is a simplified. - -* Reference: [TBX11K-SIMPLIFIED-2020]_ -* Original (released) resolution (height x width or width x height): 512 x 512 -* Split reference: none -* Protocol ``default``: - - * Training samples: 62.6% of CXR (including labels) - * Validation samples: 16% of CXR (including labels) - * Test samples: 21.4% of CXR (including labels) -""" - -import importlib.resources - -from ..dataset import JSONDataset -from ..loader import make_delayed - -_protocols = [ - importlib.resources.files(__name__).joinpath("default.json.bz2"), - importlib.resources.files(__name__).joinpath("fold_0.json.bz2"), - importlib.resources.files(__name__).joinpath("fold_1.json.bz2"), - importlib.resources.files(__name__).joinpath("fold_2.json.bz2"), - importlib.resources.files(__name__).joinpath("fold_3.json.bz2"), - importlib.resources.files(__name__).joinpath("fold_4.json.bz2"), - importlib.resources.files(__name__).joinpath("fold_5.json.bz2"), - importlib.resources.files(__name__).joinpath("fold_6.json.bz2"), - importlib.resources.files(__name__).joinpath("fold_7.json.bz2"), - importlib.resources.files(__name__).joinpath("fold_8.json.bz2"), - importlib.resources.files(__name__).joinpath("fold_9.json.bz2"), -] - - -def _raw_data_loader(sample): - return dict(data=sample["data"], label=sample["label"]) - - -def _loader(context, sample): - # "context" is ignored in this case - database is homogeneous - # we returned delayed samples to avoid loading all images at once - return make_delayed(sample, _raw_data_loader, key=sample["filename"]) - - -dataset = JSONDataset( - protocols=_protocols, - fieldnames=("filename", "label", "data"), - loader=_loader, -) - -"""Extended TBX11K simplified dataset object.""" diff --git a/src/ptbench/data/tbx11k_simplified_v2_RS/default.json.bz2 b/src/ptbench/data/tbx11k_simplified_v2_RS/default.json.bz2 deleted file mode 100644 index a9df1825d2349ebaf364884839a0fd3174e27ea2..0000000000000000000000000000000000000000 Binary files a/src/ptbench/data/tbx11k_simplified_v2_RS/default.json.bz2 and /dev/null differ diff --git a/src/ptbench/data/tbx11k_simplified_v2_RS/fold_0.json.bz2 b/src/ptbench/data/tbx11k_simplified_v2_RS/fold_0.json.bz2 deleted file mode 100644 index 0084bd7fa1149d2e1507e3333a97a714c37e554f..0000000000000000000000000000000000000000 Binary files a/src/ptbench/data/tbx11k_simplified_v2_RS/fold_0.json.bz2 and /dev/null differ diff --git a/src/ptbench/data/tbx11k_simplified_v2_RS/fold_1.json.bz2 b/src/ptbench/data/tbx11k_simplified_v2_RS/fold_1.json.bz2 deleted file mode 100644 index e8084fcdfd515b3c0ef78cac625b35da6ff46e88..0000000000000000000000000000000000000000 Binary files a/src/ptbench/data/tbx11k_simplified_v2_RS/fold_1.json.bz2 and /dev/null differ diff --git a/src/ptbench/data/tbx11k_simplified_v2_RS/fold_2.json.bz2 b/src/ptbench/data/tbx11k_simplified_v2_RS/fold_2.json.bz2 deleted file mode 100644 index 2328c509c0695443a43bc1bee2b9a3d7746282c0..0000000000000000000000000000000000000000 Binary files a/src/ptbench/data/tbx11k_simplified_v2_RS/fold_2.json.bz2 and /dev/null differ diff --git a/src/ptbench/data/tbx11k_simplified_v2_RS/fold_3.json.bz2 b/src/ptbench/data/tbx11k_simplified_v2_RS/fold_3.json.bz2 deleted file mode 100644 index ae9712900b960ddf504c6b7544492d2c19a8c42d..0000000000000000000000000000000000000000 Binary files a/src/ptbench/data/tbx11k_simplified_v2_RS/fold_3.json.bz2 and /dev/null differ diff --git a/src/ptbench/data/tbx11k_simplified_v2_RS/fold_4.json.bz2 b/src/ptbench/data/tbx11k_simplified_v2_RS/fold_4.json.bz2 deleted file mode 100644 index 86d9c595977c68f097e3711f005188e42090a5e4..0000000000000000000000000000000000000000 Binary files a/src/ptbench/data/tbx11k_simplified_v2_RS/fold_4.json.bz2 and /dev/null differ diff --git a/src/ptbench/data/tbx11k_simplified_v2_RS/fold_5.json.bz2 b/src/ptbench/data/tbx11k_simplified_v2_RS/fold_5.json.bz2 deleted file mode 100644 index 84f479c72ad0667f8baa1ce666e540f6ee00762a..0000000000000000000000000000000000000000 Binary files a/src/ptbench/data/tbx11k_simplified_v2_RS/fold_5.json.bz2 and /dev/null differ diff --git a/src/ptbench/data/tbx11k_simplified_v2_RS/fold_6.json.bz2 b/src/ptbench/data/tbx11k_simplified_v2_RS/fold_6.json.bz2 deleted file mode 100644 index 48d52e3b0ed6708953c6b591bf0d27ae64e324cb..0000000000000000000000000000000000000000 Binary files a/src/ptbench/data/tbx11k_simplified_v2_RS/fold_6.json.bz2 and /dev/null differ diff --git a/src/ptbench/data/tbx11k_simplified_v2_RS/fold_7.json.bz2 b/src/ptbench/data/tbx11k_simplified_v2_RS/fold_7.json.bz2 deleted file mode 100644 index 8ac08974c73c84ef4356b884cf48a6e777e417e0..0000000000000000000000000000000000000000 Binary files a/src/ptbench/data/tbx11k_simplified_v2_RS/fold_7.json.bz2 and /dev/null differ diff --git a/src/ptbench/data/tbx11k_simplified_v2_RS/fold_8.json.bz2 b/src/ptbench/data/tbx11k_simplified_v2_RS/fold_8.json.bz2 deleted file mode 100644 index 84dad2eb65508361e11d2f826494e25a0dbae6f8..0000000000000000000000000000000000000000 Binary files a/src/ptbench/data/tbx11k_simplified_v2_RS/fold_8.json.bz2 and /dev/null differ diff --git a/src/ptbench/data/tbx11k_simplified_v2_RS/fold_9.json.bz2 b/src/ptbench/data/tbx11k_simplified_v2_RS/fold_9.json.bz2 deleted file mode 100644 index 0c23b2ca6834ab68988d97a1ccd857f6d40c9b57..0000000000000000000000000000000000000000 Binary files a/src/ptbench/data/tbx11k_simplified_v2_RS/fold_9.json.bz2 and /dev/null differ