diff --git a/pyproject.toml b/pyproject.toml index 27ef33fc79192665641364a77e2fbcdd613a5398..1aee2d658802aae55f37e7307f91cbb2448d5380 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -222,28 +222,28 @@ tbx11k_simplified_rs_f7 = "ptbench.configs.datasets.tbx11k_simplified_RS.fold_7" tbx11k_simplified_rs_f8 = "ptbench.configs.datasets.tbx11k_simplified_RS.fold_8" tbx11k_simplified_rs_f9 = "ptbench.configs.datasets.tbx11k_simplified_RS.fold_9" # TBX11K simplified dataset split 2 (and cross-validation folds) -tbx11k_simplified_v2 = "ptbench.configs.datasets.tbx11k_simplified_v2.default" -tbx11k_simplified_v2_rgb = "ptbench.configs.datasets.tbx11k_simplified_v2.rgb" -tbx11k_simplified_v2_f0 = "ptbench.configs.datasets.tbx11k_simplified_v2.fold_0" -tbx11k_simplified_v2_f1 = "ptbench.configs.datasets.tbx11k_simplified_v2.fold_1" -tbx11k_simplified_v2_f2 = "ptbench.configs.datasets.tbx11k_simplified_v2.fold_2" -tbx11k_simplified_v2_f3 = "ptbench.configs.datasets.tbx11k_simplified_v2.fold_3" -tbx11k_simplified_v2_f4 = "ptbench.configs.datasets.tbx11k_simplified_v2.fold_4" -tbx11k_simplified_v2_f5 = "ptbench.configs.datasets.tbx11k_simplified_v2.fold_5" -tbx11k_simplified_v2_f6 = "ptbench.configs.datasets.tbx11k_simplified_v2.fold_6" -tbx11k_simplified_v2_f7 = "ptbench.configs.datasets.tbx11k_simplified_v2.fold_7" -tbx11k_simplified_v2_f8 = "ptbench.configs.datasets.tbx11k_simplified_v2.fold_8" -tbx11k_simplified_v2_f9 = "ptbench.configs.datasets.tbx11k_simplified_v2.fold_9" -tbx11k_simplified_v2_f0_rgb = "ptbench.configs.datasets.tbx11k_simplified_v2.fold_0_rgb" -tbx11k_simplified_v2_f1_rgb = "ptbench.configs.datasets.tbx11k_simplified_v2.fold_1_rgb" -tbx11k_simplified_v2_f2_rgb = "ptbench.configs.datasets.tbx11k_simplified_v2.fold_2_rgb" -tbx11k_simplified_v2_f3_rgb = "ptbench.configs.datasets.tbx11k_simplified_v2.fold_3_rgb" -tbx11k_simplified_v2_f4_rgb = "ptbench.configs.datasets.tbx11k_simplified_v2.fold_4_rgb" -tbx11k_simplified_v2_f5_rgb = "ptbench.configs.datasets.tbx11k_simplified_v2.fold_5_rgb" -tbx11k_simplified_v2_f6_rgb = "ptbench.configs.datasets.tbx11k_simplified_v2.fold_6_rgb" -tbx11k_simplified_v2_f7_rgb = "ptbench.configs.datasets.tbx11k_simplified_v2.fold_7_rgb" -tbx11k_simplified_v2_f8_rgb = "ptbench.configs.datasets.tbx11k_simplified_v2.fold_8_rgb" -tbx11k_simplified_v2_f9_rgb = "ptbench.configs.datasets.tbx11k_simplified_v2.fold_9_rgb" +tbx11k_simplified_v2 = "ptbench.data.tbx11k_simplified_v2.default" +tbx11k_simplified_v2_rgb = "ptbench.data.tbx11k_simplified_v2.rgb" +tbx11k_simplified_v2_f0 = "ptbench.data.tbx11k_simplified_v2.fold_0" +tbx11k_simplified_v2_f1 = "ptbench.data.tbx11k_simplified_v2.fold_1" +tbx11k_simplified_v2_f2 = "ptbench.data.tbx11k_simplified_v2.fold_2" +tbx11k_simplified_v2_f3 = "ptbench.data.tbx11k_simplified_v2.fold_3" +tbx11k_simplified_v2_f4 = "ptbench.data.tbx11k_simplified_v2.fold_4" +tbx11k_simplified_v2_f5 = "ptbench.data.tbx11k_simplified_v2.fold_5" +tbx11k_simplified_v2_f6 = "ptbench.data.tbx11k_simplified_v2.fold_6" +tbx11k_simplified_v2_f7 = "ptbench.data.tbx11k_simplified_v2.fold_7" +tbx11k_simplified_v2_f8 = "ptbench.data.tbx11k_simplified_v2.fold_8" +tbx11k_simplified_v2_f9 = "ptbench.data.tbx11k_simplified_v2.fold_9" +tbx11k_simplified_v2_f0_rgb = "ptbench.data.tbx11k_simplified_v2.fold_0_rgb" +tbx11k_simplified_v2_f1_rgb = "ptbench.data.tbx11k_simplified_v2.fold_1_rgb" +tbx11k_simplified_v2_f2_rgb = "ptbench.data.tbx11k_simplified_v2.fold_2_rgb" +tbx11k_simplified_v2_f3_rgb = "ptbench.data.tbx11k_simplified_v2.fold_3_rgb" +tbx11k_simplified_v2_f4_rgb = "ptbench.data.tbx11k_simplified_v2.fold_4_rgb" +tbx11k_simplified_v2_f5_rgb = "ptbench.data.tbx11k_simplified_v2.fold_5_rgb" +tbx11k_simplified_v2_f6_rgb = "ptbench.data.tbx11k_simplified_v2.fold_6_rgb" +tbx11k_simplified_v2_f7_rgb = "ptbench.data.tbx11k_simplified_v2.fold_7_rgb" +tbx11k_simplified_v2_f8_rgb = "ptbench.data.tbx11k_simplified_v2.fold_8_rgb" +tbx11k_simplified_v2_f9_rgb = "ptbench.data.tbx11k_simplified_v2.fold_9_rgb" # extended TBX11K simplified dataset split 2 (with radiological signs) tbx11k_simplified_v2_rs = "ptbench.configs.datasets.tbx11k_simplified_v2_RS.default" tbx11k_simplified_v2_rs_f0 = "ptbench.configs.datasets.tbx11k_simplified_v2_RS.fold_0" diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_v2/__init__.py b/src/ptbench/configs/datasets/tbx11k_simplified_v2/__init__.py deleted file mode 100644 index a1e6f50ca64a471417fef6ba47253cce9af9b12f..0000000000000000000000000000000000000000 --- a/src/ptbench/configs/datasets/tbx11k_simplified_v2/__init__.py +++ /dev/null @@ -1,25 +0,0 @@ -# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - - -def _maker(protocol, RGB=False): - from torchvision import transforms - - from ....data.tbx11k_simplified_v2 import dataset as raw - from ....data.transforms import ElasticDeformation - from .. import make_dataset as mk - - post_transforms = [] - if RGB: - post_transforms = [ - transforms.Lambda(lambda x: x.convert("RGB")), - transforms.ToTensor(), - ] - - return mk( - [raw.subsets(protocol)], - [], - [ElasticDeformation(p=0.8)], - post_transforms, - ) diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_v2/default.py b/src/ptbench/configs/datasets/tbx11k_simplified_v2/default.py deleted file mode 100644 index f969c8554d7b67f35b2aea6c08a09d2aeb7340bf..0000000000000000000000000000000000000000 --- a/src/ptbench/configs/datasets/tbx11k_simplified_v2/default.py +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""TBX11k simplified dataset for TB detection (default protocol) - -* Split reference: first 62.6% of CXR for "train", 16% for "validation", -* 21.4% for "test" -* This split consists of non-TB and active TB samples -* "healthy", "latent TB", and "sick & non-TB" samples are all merged under the label "non-TB" -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.tbx11k_simplified_v2` for dataset details -""" - -from . import _maker - -dataset = _maker("default") diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_0.py b/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_0.py deleted file mode 100644 index 1805c0005fbad4f93dbb8f2b4225959cb0882832..0000000000000000000000000000000000000000 --- a/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_0.py +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""TBX11k simplified dataset for TB detection (cross validation fold 0) - -* Split reference: first 62.6% of CXR for "train", 16% for "validation", -* 21.4% for "test" -* This split consists of non-TB and active TB samples -* "healthy", "latent TB", and "sick & non-TB" samples are all merged under the label "non-TB" -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.tbx11k_simplified_v2` for dataset details -""" - -from . import _maker - -dataset = _maker("fold_0") diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_0_rgb.py b/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_0_rgb.py deleted file mode 100644 index b232652f8d009bd5ec8b80108201b216d3ada961..0000000000000000000000000000000000000000 --- a/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_0_rgb.py +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""TBX11k simplified dataset for TB detection (cross validation fold 0, RGB) - -* Split reference: first 62.6% of CXR for "train", 16% for "validation", -* 21.4% for "test" -* This split consists of non-TB and active TB samples -* "healthy", "latent TB", and "sick & non-TB" samples are all merged under the label "non-TB" -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.tbx11k_simplified_v2` for dataset details -""" - -from . import _maker - -dataset = _maker("fold_0", RGB=True) diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_1.py b/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_1.py deleted file mode 100644 index 8d8074a1281c4b528527e02a824892239d558c1a..0000000000000000000000000000000000000000 --- a/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_1.py +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""TBX11k simplified dataset for TB detection (cross validation fold 1) - -* Split reference: first 62.6% of CXR for "train", 16% for "validation", -* 21.4% for "test" -* This split consists of non-TB and active TB samples -* "healthy", "latent TB", and "sick & non-TB" samples are all merged under the label "non-TB" -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.tbx11k_simplified_v2` for dataset details -""" - -from . import _maker - -dataset = _maker("fold_1") diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_1_rgb.py b/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_1_rgb.py deleted file mode 100644 index ec0421059f94bffb2fa769ad5990f31636e75a47..0000000000000000000000000000000000000000 --- a/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_1_rgb.py +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""TBX11k simplified dataset for TB detection (cross validation fold 1, RGB) - -* Split reference: first 62.6% of CXR for "train", 16% for "validation", -* 21.4% for "test" -* This split consists of non-TB and active TB samples -* "healthy", "latent TB", and "sick & non-TB" samples are all merged under the label "non-TB" -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.tbx11k_simplified_v2` for dataset details -""" - -from . import _maker - -dataset = _maker("fold_1", RGB=True) diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_2.py b/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_2.py deleted file mode 100644 index 1962b0370b9a3c24fbe1cb81119760f5239d4f83..0000000000000000000000000000000000000000 --- a/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_2.py +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""TBX11k simplified dataset for TB detection (cross validation fold 2) - -* Split reference: first 62.6% of CXR for "train", 16% for "validation", -* 21.4% for "test" -* This split consists of non-TB and active TB samples -* "healthy", "latent TB", and "sick & non-TB" samples are all merged under the label "non-TB" -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.tbx11k_simplified_v2` for dataset details -""" - -from . import _maker - -dataset = _maker("fold_2") diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_2_rgb.py b/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_2_rgb.py deleted file mode 100644 index 660073a4c13ae62dba576cffb61953e063bdc560..0000000000000000000000000000000000000000 --- a/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_2_rgb.py +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""TBX11k simplified dataset for TB detection (cross validation fold 2, RGB) - -* Split reference: first 62.6% of CXR for "train", 16% for "validation", -* 21.4% for "test" -* This split consists of non-TB and active TB samples -* "healthy", "latent TB", and "sick & non-TB" samples are all merged under the label "non-TB" -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.tbx11k_simplified_v2` for dataset details -""" - -from . import _maker - -dataset = _maker("fold_2", RGB=True) diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_3.py b/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_3.py deleted file mode 100644 index 9872ef5831ffb43a61afce0764a91915aae603d6..0000000000000000000000000000000000000000 --- a/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_3.py +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""TBX11k simplified dataset for TB detection (cross validation fold 3) - -* Split reference: first 62.6% of CXR for "train", 16% for "validation", -* 21.4% for "test" -* This split consists of non-TB and active TB samples -* "healthy", "latent TB", and "sick & non-TB" samples are all merged under the label "non-TB" -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.tbx11k_simplified_v2` for dataset details -""" - -from . import _maker - -dataset = _maker("fold_3") diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_3_rgb.py b/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_3_rgb.py deleted file mode 100644 index bb54f1d206ea1893f1c6f4bac516e7d6a00d3a7d..0000000000000000000000000000000000000000 --- a/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_3_rgb.py +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""TBX11k simplified dataset for TB detection (cross validation fold 3, RGB) - -* Split reference: first 62.6% of CXR for "train", 16% for "validation", -* 21.4% for "test" -* This split consists of non-TB and active TB samples -* "healthy", "latent TB", and "sick & non-TB" samples are all merged under the label "non-TB" -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.tbx11k_simplified_v2` for dataset details -""" - -from . import _maker - -dataset = _maker("fold_3", RGB=True) diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_4.py b/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_4.py deleted file mode 100644 index 7cde75d8faf0d9834bdd9c4bbc0715b76cc5c530..0000000000000000000000000000000000000000 --- a/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_4.py +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""TBX11k simplified dataset for TB detection (cross validation fold 4) - -* Split reference: first 62.6% of CXR for "train", 16% for "validation", -* 21.4% for "test" -* This split consists of non-TB and active TB samples -* "healthy", "latent TB", and "sick & non-TB" samples are all merged under the label "non-TB" -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.tbx11k_simplified_v2` for dataset details -""" - -from . import _maker - -dataset = _maker("fold_4") diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_4_rgb.py b/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_4_rgb.py deleted file mode 100644 index d5bf8d8255148537ee2e432e4102949ee2f81d15..0000000000000000000000000000000000000000 --- a/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_4_rgb.py +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""TBX11k simplified dataset for TB detection (cross validation fold 4, RGB) - -* Split reference: first 62.6% of CXR for "train", 16% for "validation", -* 21.4% for "test" -* This split consists of non-TB and active TB samples -* "healthy", "latent TB", and "sick & non-TB" samples are all merged under the label "non-TB" -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.tbx11k_simplified_v2` for dataset details -""" - -from . import _maker - -dataset = _maker("fold_4", RGB=True) diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_5.py b/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_5.py deleted file mode 100644 index d9ca35bf50da984d837d1e4609cb5178a6c6d39c..0000000000000000000000000000000000000000 --- a/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_5.py +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""TBX11k simplified dataset for TB detection (cross validation fold 5) - -* Split reference: first 62.6% of CXR for "train", 16% for "validation", -* 21.4% for "test" -* This split consists of non-TB and active TB samples -* "healthy", "latent TB", and "sick & non-TB" samples are all merged under the label "non-TB" -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.tbx11k_simplified_v2` for dataset details -""" - -from . import _maker - -dataset = _maker("fold_5") diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_5_rgb.py b/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_5_rgb.py deleted file mode 100644 index 983326689b110a2ccb0d78fcfa61cf6a950d2d36..0000000000000000000000000000000000000000 --- a/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_5_rgb.py +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""TBX11k simplified dataset for TB detection (cross validation fold 5, RGB) - -* Split reference: first 62.6% of CXR for "train", 16% for "validation", -* 21.4% for "test" -* This split consists of non-TB and active TB samples -* "healthy", "latent TB", and "sick & non-TB" samples are all merged under the label "non-TB" -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.tbx11k_simplified_v2` for dataset details -""" - -from . import _maker - -dataset = _maker("fold_5", RGB=True) diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_6.py b/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_6.py deleted file mode 100644 index c8abb0658bf78ad2cede7f1c4329735595284138..0000000000000000000000000000000000000000 --- a/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_6.py +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""TBX11k simplified dataset for TB detection (cross validation fold 6) - -* Split reference: first 62.6% of CXR for "train", 16% for "validation", -* 21.4% for "test" -* This split consists of non-TB and active TB samples -* "healthy", "latent TB", and "sick & non-TB" samples are all merged under the label "non-TB" -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.tbx11k_simplified_v2` for dataset details -""" - -from . import _maker - -dataset = _maker("fold_6") diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_6_rgb.py b/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_6_rgb.py deleted file mode 100644 index dabdb67452b3cf30e31ab2a70cc08f30d2afb003..0000000000000000000000000000000000000000 --- a/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_6_rgb.py +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""TBX11k simplified dataset for TB detection (cross validation fold 6, RGB) - -* Split reference: first 62.6% of CXR for "train", 16% for "validation", -* 21.4% for "test" -* This split consists of non-TB and active TB samples -* "healthy", "latent TB", and "sick & non-TB" samples are all merged under the label "non-TB" -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.tbx11k_simplified_v2` for dataset details -""" - -from . import _maker - -dataset = _maker("fold_6", RGB=True) diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_7.py b/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_7.py deleted file mode 100644 index 67864aa6ccdc3448177f7139879b0eb05f6cd10d..0000000000000000000000000000000000000000 --- a/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_7.py +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""TBX11k simplified dataset for TB detection (cross validation fold 7) - -* Split reference: first 62.6% of CXR for "train", 16% for "validation", -* 21.4% for "test" -* This split consists of non-TB and active TB samples -* "healthy", "latent TB", and "sick & non-TB" samples are all merged under the label "non-TB" -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.tbx11k_simplified_v2` for dataset details -""" - -from . import _maker - -dataset = _maker("fold_7") diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_7_rgb.py b/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_7_rgb.py deleted file mode 100644 index d37bda6e40f650bc313c90159838857f4140d395..0000000000000000000000000000000000000000 --- a/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_7_rgb.py +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""TBX11k simplified dataset for TB detection (cross validation fold 7, RGB) - -* Split reference: first 62.6% of CXR for "train", 16% for "validation", -* 21.4% for "test" -* This split consists of non-TB and active TB samples -* "healthy", "latent TB", and "sick & non-TB" samples are all merged under the label "non-TB" -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.tbx11k_simplified_v2` for dataset details -""" - -from . import _maker - -dataset = _maker("fold_7", RGB=True) diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_8.py b/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_8.py deleted file mode 100644 index 2ac58e16e9e9ebbb9dbc73432605c10c7ad53371..0000000000000000000000000000000000000000 --- a/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_8.py +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""TBX11k simplified dataset for TB detection (cross validation fold 8) - -* Split reference: first 62.6% of CXR for "train", 16% for "validation", -* 21.4% for "test" -* This split consists of non-TB and active TB samples -* "healthy", "latent TB", and "sick & non-TB" samples are all merged under the label "non-TB" -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.tbx11k_simplified_v2` for dataset details -""" - -from . import _maker - -dataset = _maker("fold_8") diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_8_rgb.py b/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_8_rgb.py deleted file mode 100644 index f6adcd4300305ab0b153041444f17b1eef2a7de0..0000000000000000000000000000000000000000 --- a/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_8_rgb.py +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""TBX11k simplified dataset for TB detection (cross validation fold 8, RGB) - -* Split reference: first 62.6% of CXR for "train", 16% for "validation", -* 21.4% for "test" -* This split consists of non-TB and active TB samples -* "healthy", "latent TB", and "sick & non-TB" samples are all merged under the label "non-TB" -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.tbx11k_simplified_v2` for dataset details -""" - -from . import _maker - -dataset = _maker("fold_8", RGB=True) diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_9.py b/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_9.py deleted file mode 100644 index 1034cf8176641e8afeb27fc2e00acea306411265..0000000000000000000000000000000000000000 --- a/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_9.py +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""TBX11k simplified dataset for TB detection (cross validation fold 9) - -* Split reference: first 62.6% of CXR for "train", 16% for "validation", -* 21.4% for "test" -* This split consists of non-TB and active TB samples -* "healthy", "latent TB", and "sick & non-TB" samples are all merged under the label "non-TB" -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.tbx11k_simplified_v2` for dataset details -""" - -from . import _maker - -dataset = _maker("fold_9") diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_9_rgb.py b/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_9_rgb.py deleted file mode 100644 index 8fe3f1dbf75faa2537aabe297b41b82d4c9b4c3b..0000000000000000000000000000000000000000 --- a/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_9_rgb.py +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""TBX11k simplified dataset for TB detection (cross validation fold 9, RGB) - -* Split reference: first 62.6% of CXR for "train", 16% for "validation", -* 21.4% for "test" -* This split consists of non-TB and active TB samples -* "healthy", "latent TB", and "sick & non-TB" samples are all merged under the label "non-TB" -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.tbx11k_simplified_v2` for dataset details -""" - -from . import _maker - -dataset = _maker("fold_9", RGB=True) diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_v2/rgb.py b/src/ptbench/configs/datasets/tbx11k_simplified_v2/rgb.py deleted file mode 100644 index c327dad9578765a4542aa1a41ce16d36d6d49d6a..0000000000000000000000000000000000000000 --- a/src/ptbench/configs/datasets/tbx11k_simplified_v2/rgb.py +++ /dev/null @@ -1,18 +0,0 @@ -# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""TBX11k simplified dataset for TB detection (default protocol, converted in -RGB) - -* Split reference: first 62.6% of CXR for "train", 16% for "validation", -* 21.4% for "test" -* This split consists of non-TB and active TB samples -* "healthy", "latent TB", and "sick & non-TB" samples are all merged under the label "non-TB" -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.tbx11k_simplified_v2` for dataset details -""" - -from . import _maker - -dataset = _maker("default", RGB=True) diff --git a/src/ptbench/data/tbx11k_simplified_v2/__init__.py b/src/ptbench/data/tbx11k_simplified_v2/__init__.py index 7cf676b00d4570c8b2f6b42724f55eeb0eab2fc4..d6075e85d366510d8670af362a2b685167e3c8de 100644 --- a/src/ptbench/data/tbx11k_simplified_v2/__init__.py +++ b/src/ptbench/data/tbx11k_simplified_v2/__init__.py @@ -76,15 +76,36 @@ def _loader_bbox(context, sample): return make_delayed_bbox(sample, _raw_data_loader_bbox) -dataset = JSONDataset( +json_dataset = JSONDataset( protocols=_protocols, fieldnames=("data", "label"), loader=_loader, ) -dataset_with_bboxes = JSONDataset( +json_dataset_with_bboxes = JSONDataset( protocols=_protocols, fieldnames=("data", "label", "bboxes"), loader=_loader_bbox, ) """TBX11K simplified dataset object.""" + + +def _maker(protocol, RGB=False): + from torchvision import transforms + + from .. import make_dataset + from ..transforms import ElasticDeformation + + post_transforms = [] + if RGB: + post_transforms = [ + transforms.Lambda(lambda x: x.convert("RGB")), + transforms.ToTensor(), + ] + + return make_dataset( + [json_dataset.subsets(protocol)], + [], + [ElasticDeformation(p=0.8)], + post_transforms, + ) diff --git a/src/ptbench/data/tbx11k_simplified_v2/default.py b/src/ptbench/data/tbx11k_simplified_v2/default.py new file mode 100644 index 0000000000000000000000000000000000000000..17b738c59be7a4a482ab479673da3a06e68240ea --- /dev/null +++ b/src/ptbench/data/tbx11k_simplified_v2/default.py @@ -0,0 +1,49 @@ +# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> +# +# SPDX-License-Identifier: GPL-3.0-or-later + +"""TBX11k simplified dataset for TB detection (default protocol) + +* Split reference: first 62.6% of CXR for "train", 16% for "validation", +* 21.4% for "test" +* This split consists of non-TB and active TB samples +* "healthy", "latent TB", and "sick & non-TB" samples are all merged under the label "non-TB" +* This configuration resolution: 512 x 512 (default) +* See :py:mod:`ptbench.data.tbx11k_simplified_v2` for dataset details +""" + +from clapper.logging import setup + +from .. import return_subsets +from ..base_datamodule import BaseDataModule +from . import _maker + +logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") + + +class DefaultModule(BaseDataModule): + def __init__( + self, + train_batch_size=1, + predict_batch_size=1, + drop_incomplete_batch=False, + multiproc_kwargs=None, + ): + super().__init__( + train_batch_size=train_batch_size, + predict_batch_size=predict_batch_size, + drop_incomplete_batch=drop_incomplete_batch, + multiproc_kwargs=multiproc_kwargs, + ) + + def setup(self, stage: str): + self.dataset = _maker("default") + ( + self.train_dataset, + self.validation_dataset, + self.extra_validation_datasets, + self.predict_dataset, + ) = return_subsets(self.dataset) + + +datamodule = DefaultModule diff --git a/src/ptbench/data/tbx11k_simplified_v2/fold_0.py b/src/ptbench/data/tbx11k_simplified_v2/fold_0.py new file mode 100644 index 0000000000000000000000000000000000000000..4751d17ec8266170aca6519cf8e917e90305590f --- /dev/null +++ b/src/ptbench/data/tbx11k_simplified_v2/fold_0.py @@ -0,0 +1,49 @@ +# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> +# +# SPDX-License-Identifier: GPL-3.0-or-later + +"""TBX11k simplified dataset for TB detection (cross validation fold 0) + +* Split reference: first 62.6% of CXR for "train", 16% for "validation", +* 21.4% for "test" +* This split consists of non-TB and active TB samples +* "healthy", "latent TB", and "sick & non-TB" samples are all merged under the label "non-TB" +* This configuration resolution: 512 x 512 (default) +* See :py:mod:`ptbench.data.tbx11k_simplified_v2` for dataset details +""" + +from clapper.logging import setup + +from .. import return_subsets +from ..base_datamodule import BaseDataModule +from . import _maker + +logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") + + +class Fold0Module(BaseDataModule): + def __init__( + self, + train_batch_size=1, + predict_batch_size=1, + drop_incomplete_batch=False, + multiproc_kwargs=None, + ): + super().__init__( + train_batch_size=train_batch_size, + predict_batch_size=predict_batch_size, + drop_incomplete_batch=drop_incomplete_batch, + multiproc_kwargs=multiproc_kwargs, + ) + + def setup(self, stage: str): + self.dataset = _maker("fold_0") + ( + self.train_dataset, + self.validation_dataset, + self.extra_validation_datasets, + self.predict_dataset, + ) = return_subsets(self.dataset) + + +datamodule = Fold0Module diff --git a/src/ptbench/data/tbx11k_simplified_v2/fold_0_rgb.py b/src/ptbench/data/tbx11k_simplified_v2/fold_0_rgb.py new file mode 100644 index 0000000000000000000000000000000000000000..1ba0e2ac26cc63843b90d32b011a0eefc6a6d364 --- /dev/null +++ b/src/ptbench/data/tbx11k_simplified_v2/fold_0_rgb.py @@ -0,0 +1,49 @@ +# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> +# +# SPDX-License-Identifier: GPL-3.0-or-later + +"""TBX11k simplified dataset for TB detection (cross validation fold 0, RGB) + +* Split reference: first 62.6% of CXR for "train", 16% for "validation", +* 21.4% for "test" +* This split consists of non-TB and active TB samples +* "healthy", "latent TB", and "sick & non-TB" samples are all merged under the label "non-TB" +* This configuration resolution: 512 x 512 (default) +* See :py:mod:`ptbench.data.tbx11k_simplified_v2` for dataset details +""" + +from clapper.logging import setup + +from .. import return_subsets +from ..base_datamodule import BaseDataModule +from . import _maker + +logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") + + +class Fold0Module(BaseDataModule): + def __init__( + self, + train_batch_size=1, + predict_batch_size=1, + drop_incomplete_batch=False, + multiproc_kwargs=None, + ): + super().__init__( + train_batch_size=train_batch_size, + predict_batch_size=predict_batch_size, + drop_incomplete_batch=drop_incomplete_batch, + multiproc_kwargs=multiproc_kwargs, + ) + + def setup(self, stage: str): + self.dataset = _maker("fold_0", RGB=True) + ( + self.train_dataset, + self.validation_dataset, + self.extra_validation_datasets, + self.predict_dataset, + ) = return_subsets(self.dataset) + + +datamodule = Fold0Module diff --git a/src/ptbench/data/tbx11k_simplified_v2/fold_1.py b/src/ptbench/data/tbx11k_simplified_v2/fold_1.py new file mode 100644 index 0000000000000000000000000000000000000000..c70e2d194c26cfd9e44fb68bb1423485bf27d0b3 --- /dev/null +++ b/src/ptbench/data/tbx11k_simplified_v2/fold_1.py @@ -0,0 +1,49 @@ +# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> +# +# SPDX-License-Identifier: GPL-3.0-or-later + +"""TBX11k simplified dataset for TB detection (cross validation fold 1) + +* Split reference: first 62.6% of CXR for "train", 16% for "validation", +* 21.4% for "test" +* This split consists of non-TB and active TB samples +* "healthy", "latent TB", and "sick & non-TB" samples are all merged under the label "non-TB" +* This configuration resolution: 512 x 512 (default) +* See :py:mod:`ptbench.data.tbx11k_simplified_v2` for dataset details +""" + +from clapper.logging import setup + +from .. import return_subsets +from ..base_datamodule import BaseDataModule +from . import _maker + +logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") + + +class Fold0Module(BaseDataModule): + def __init__( + self, + train_batch_size=1, + predict_batch_size=1, + drop_incomplete_batch=False, + multiproc_kwargs=None, + ): + super().__init__( + train_batch_size=train_batch_size, + predict_batch_size=predict_batch_size, + drop_incomplete_batch=drop_incomplete_batch, + multiproc_kwargs=multiproc_kwargs, + ) + + def setup(self, stage: str): + self.dataset = _maker("fold_1") + ( + self.train_dataset, + self.validation_dataset, + self.extra_validation_datasets, + self.predict_dataset, + ) = return_subsets(self.dataset) + + +datamodule = Fold0Module diff --git a/src/ptbench/data/tbx11k_simplified_v2/fold_1_rgb.py b/src/ptbench/data/tbx11k_simplified_v2/fold_1_rgb.py new file mode 100644 index 0000000000000000000000000000000000000000..c543f62ef12c9bff50b1e457ea0aef2fde40f3f3 --- /dev/null +++ b/src/ptbench/data/tbx11k_simplified_v2/fold_1_rgb.py @@ -0,0 +1,49 @@ +# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> +# +# SPDX-License-Identifier: GPL-3.0-or-later + +"""TBX11k simplified dataset for TB detection (cross validation fold 1, RGB) + +* Split reference: first 62.6% of CXR for "train", 16% for "validation", +* 21.4% for "test" +* This split consists of non-TB and active TB samples +* "healthy", "latent TB", and "sick & non-TB" samples are all merged under the label "non-TB" +* This configuration resolution: 512 x 512 (default) +* See :py:mod:`ptbench.data.tbx11k_simplified_v2` for dataset details +""" + +from clapper.logging import setup + +from .. import return_subsets +from ..base_datamodule import BaseDataModule +from . import _maker + +logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") + + +class Fold0Module(BaseDataModule): + def __init__( + self, + train_batch_size=1, + predict_batch_size=1, + drop_incomplete_batch=False, + multiproc_kwargs=None, + ): + super().__init__( + train_batch_size=train_batch_size, + predict_batch_size=predict_batch_size, + drop_incomplete_batch=drop_incomplete_batch, + multiproc_kwargs=multiproc_kwargs, + ) + + def setup(self, stage: str): + self.dataset = _maker("fold_1", RGB=True) + ( + self.train_dataset, + self.validation_dataset, + self.extra_validation_datasets, + self.predict_dataset, + ) = return_subsets(self.dataset) + + +datamodule = Fold0Module diff --git a/src/ptbench/data/tbx11k_simplified_v2/fold_2.py b/src/ptbench/data/tbx11k_simplified_v2/fold_2.py new file mode 100644 index 0000000000000000000000000000000000000000..5af836ee4090df17b5e3cf9093f13bfc8bae433f --- /dev/null +++ b/src/ptbench/data/tbx11k_simplified_v2/fold_2.py @@ -0,0 +1,49 @@ +# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> +# +# SPDX-License-Identifier: GPL-3.0-or-later + +"""TBX11k simplified dataset for TB detection (cross validation fold 2) + +* Split reference: first 62.6% of CXR for "train", 16% for "validation", +* 21.4% for "test" +* This split consists of non-TB and active TB samples +* "healthy", "latent TB", and "sick & non-TB" samples are all merged under the label "non-TB" +* This configuration resolution: 512 x 512 (default) +* See :py:mod:`ptbench.data.tbx11k_simplified_v2` for dataset details +""" + +from clapper.logging import setup + +from .. import return_subsets +from ..base_datamodule import BaseDataModule +from . import _maker + +logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") + + +class Fold0Module(BaseDataModule): + def __init__( + self, + train_batch_size=1, + predict_batch_size=1, + drop_incomplete_batch=False, + multiproc_kwargs=None, + ): + super().__init__( + train_batch_size=train_batch_size, + predict_batch_size=predict_batch_size, + drop_incomplete_batch=drop_incomplete_batch, + multiproc_kwargs=multiproc_kwargs, + ) + + def setup(self, stage: str): + self.dataset = _maker("fold_2") + ( + self.train_dataset, + self.validation_dataset, + self.extra_validation_datasets, + self.predict_dataset, + ) = return_subsets(self.dataset) + + +datamodule = Fold0Module diff --git a/src/ptbench/data/tbx11k_simplified_v2/fold_2_rgb.py b/src/ptbench/data/tbx11k_simplified_v2/fold_2_rgb.py new file mode 100644 index 0000000000000000000000000000000000000000..39ba5a97cb95826b706e36cc356995ba5ecd95a4 --- /dev/null +++ b/src/ptbench/data/tbx11k_simplified_v2/fold_2_rgb.py @@ -0,0 +1,49 @@ +# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> +# +# SPDX-License-Identifier: GPL-3.0-or-later + +"""TBX11k simplified dataset for TB detection (cross validation fold 2, RGB) + +* Split reference: first 62.6% of CXR for "train", 16% for "validation", +* 21.4% for "test" +* This split consists of non-TB and active TB samples +* "healthy", "latent TB", and "sick & non-TB" samples are all merged under the label "non-TB" +* This configuration resolution: 512 x 512 (default) +* See :py:mod:`ptbench.data.tbx11k_simplified_v2` for dataset details +""" + +from clapper.logging import setup + +from .. import return_subsets +from ..base_datamodule import BaseDataModule +from . import _maker + +logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") + + +class Fold0Module(BaseDataModule): + def __init__( + self, + train_batch_size=1, + predict_batch_size=1, + drop_incomplete_batch=False, + multiproc_kwargs=None, + ): + super().__init__( + train_batch_size=train_batch_size, + predict_batch_size=predict_batch_size, + drop_incomplete_batch=drop_incomplete_batch, + multiproc_kwargs=multiproc_kwargs, + ) + + def setup(self, stage: str): + self.dataset = _maker("fold_2", RGB=True) + ( + self.train_dataset, + self.validation_dataset, + self.extra_validation_datasets, + self.predict_dataset, + ) = return_subsets(self.dataset) + + +datamodule = Fold0Module diff --git a/src/ptbench/data/tbx11k_simplified_v2/fold_3.py b/src/ptbench/data/tbx11k_simplified_v2/fold_3.py new file mode 100644 index 0000000000000000000000000000000000000000..f5c6285bfac3bf5e4ab92e4f1b4a780e928b74bb --- /dev/null +++ b/src/ptbench/data/tbx11k_simplified_v2/fold_3.py @@ -0,0 +1,49 @@ +# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> +# +# SPDX-License-Identifier: GPL-3.0-or-later + +"""TBX11k simplified dataset for TB detection (cross validation fold 3) + +* Split reference: first 62.6% of CXR for "train", 16% for "validation", +* 21.4% for "test" +* This split consists of non-TB and active TB samples +* "healthy", "latent TB", and "sick & non-TB" samples are all merged under the label "non-TB" +* This configuration resolution: 512 x 512 (default) +* See :py:mod:`ptbench.data.tbx11k_simplified_v2` for dataset details +""" + +from clapper.logging import setup + +from .. import return_subsets +from ..base_datamodule import BaseDataModule +from . import _maker + +logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") + + +class Fold0Module(BaseDataModule): + def __init__( + self, + train_batch_size=1, + predict_batch_size=1, + drop_incomplete_batch=False, + multiproc_kwargs=None, + ): + super().__init__( + train_batch_size=train_batch_size, + predict_batch_size=predict_batch_size, + drop_incomplete_batch=drop_incomplete_batch, + multiproc_kwargs=multiproc_kwargs, + ) + + def setup(self, stage: str): + self.dataset = _maker("fold_3") + ( + self.train_dataset, + self.validation_dataset, + self.extra_validation_datasets, + self.predict_dataset, + ) = return_subsets(self.dataset) + + +datamodule = Fold0Module diff --git a/src/ptbench/data/tbx11k_simplified_v2/fold_3_rgb.py b/src/ptbench/data/tbx11k_simplified_v2/fold_3_rgb.py new file mode 100644 index 0000000000000000000000000000000000000000..0aaab4af498453e58fec75a4dc887173c1494d46 --- /dev/null +++ b/src/ptbench/data/tbx11k_simplified_v2/fold_3_rgb.py @@ -0,0 +1,49 @@ +# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> +# +# SPDX-License-Identifier: GPL-3.0-or-later + +"""TBX11k simplified dataset for TB detection (cross validation fold 3, RGB) + +* Split reference: first 62.6% of CXR for "train", 16% for "validation", +* 21.4% for "test" +* This split consists of non-TB and active TB samples +* "healthy", "latent TB", and "sick & non-TB" samples are all merged under the label "non-TB" +* This configuration resolution: 512 x 512 (default) +* See :py:mod:`ptbench.data.tbx11k_simplified_v2` for dataset details +""" + +from clapper.logging import setup + +from .. import return_subsets +from ..base_datamodule import BaseDataModule +from . import _maker + +logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") + + +class Fold0Module(BaseDataModule): + def __init__( + self, + train_batch_size=1, + predict_batch_size=1, + drop_incomplete_batch=False, + multiproc_kwargs=None, + ): + super().__init__( + train_batch_size=train_batch_size, + predict_batch_size=predict_batch_size, + drop_incomplete_batch=drop_incomplete_batch, + multiproc_kwargs=multiproc_kwargs, + ) + + def setup(self, stage: str): + self.dataset = _maker("fold_3", RGB=True) + ( + self.train_dataset, + self.validation_dataset, + self.extra_validation_datasets, + self.predict_dataset, + ) = return_subsets(self.dataset) + + +datamodule = Fold0Module diff --git a/src/ptbench/data/tbx11k_simplified_v2/fold_4.py b/src/ptbench/data/tbx11k_simplified_v2/fold_4.py new file mode 100644 index 0000000000000000000000000000000000000000..796cf8f6bc94562923d29d22a0d0299215682cb8 --- /dev/null +++ b/src/ptbench/data/tbx11k_simplified_v2/fold_4.py @@ -0,0 +1,49 @@ +# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> +# +# SPDX-License-Identifier: GPL-3.0-or-later + +"""TBX11k simplified dataset for TB detection (cross validation fold 4) + +* Split reference: first 62.6% of CXR for "train", 16% for "validation", +* 21.4% for "test" +* This split consists of non-TB and active TB samples +* "healthy", "latent TB", and "sick & non-TB" samples are all merged under the label "non-TB" +* This configuration resolution: 512 x 512 (default) +* See :py:mod:`ptbench.data.tbx11k_simplified_v2` for dataset details +""" + +from clapper.logging import setup + +from .. import return_subsets +from ..base_datamodule import BaseDataModule +from . import _maker + +logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") + + +class Fold0Module(BaseDataModule): + def __init__( + self, + train_batch_size=1, + predict_batch_size=1, + drop_incomplete_batch=False, + multiproc_kwargs=None, + ): + super().__init__( + train_batch_size=train_batch_size, + predict_batch_size=predict_batch_size, + drop_incomplete_batch=drop_incomplete_batch, + multiproc_kwargs=multiproc_kwargs, + ) + + def setup(self, stage: str): + self.dataset = _maker("fold_4") + ( + self.train_dataset, + self.validation_dataset, + self.extra_validation_datasets, + self.predict_dataset, + ) = return_subsets(self.dataset) + + +datamodule = Fold0Module diff --git a/src/ptbench/data/tbx11k_simplified_v2/fold_4_rgb.py b/src/ptbench/data/tbx11k_simplified_v2/fold_4_rgb.py new file mode 100644 index 0000000000000000000000000000000000000000..b53968c6cc96d0735100a4928c1ce3d0e2ad51d3 --- /dev/null +++ b/src/ptbench/data/tbx11k_simplified_v2/fold_4_rgb.py @@ -0,0 +1,49 @@ +# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> +# +# SPDX-License-Identifier: GPL-3.0-or-later + +"""TBX11k simplified dataset for TB detection (cross validation fold 4, RGB) + +* Split reference: first 62.6% of CXR for "train", 16% for "validation", +* 21.4% for "test" +* This split consists of non-TB and active TB samples +* "healthy", "latent TB", and "sick & non-TB" samples are all merged under the label "non-TB" +* This configuration resolution: 512 x 512 (default) +* See :py:mod:`ptbench.data.tbx11k_simplified_v2` for dataset details +""" + +from clapper.logging import setup + +from .. import return_subsets +from ..base_datamodule import BaseDataModule +from . import _maker + +logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") + + +class Fold0Module(BaseDataModule): + def __init__( + self, + train_batch_size=1, + predict_batch_size=1, + drop_incomplete_batch=False, + multiproc_kwargs=None, + ): + super().__init__( + train_batch_size=train_batch_size, + predict_batch_size=predict_batch_size, + drop_incomplete_batch=drop_incomplete_batch, + multiproc_kwargs=multiproc_kwargs, + ) + + def setup(self, stage: str): + self.dataset = _maker("fold_4", RGB=True) + ( + self.train_dataset, + self.validation_dataset, + self.extra_validation_datasets, + self.predict_dataset, + ) = return_subsets(self.dataset) + + +datamodule = Fold0Module diff --git a/src/ptbench/data/tbx11k_simplified_v2/fold_5.py b/src/ptbench/data/tbx11k_simplified_v2/fold_5.py new file mode 100644 index 0000000000000000000000000000000000000000..7adba117d4edf6984048c7321269d6d47163fa3b --- /dev/null +++ b/src/ptbench/data/tbx11k_simplified_v2/fold_5.py @@ -0,0 +1,49 @@ +# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> +# +# SPDX-License-Identifier: GPL-3.0-or-later + +"""TBX11k simplified dataset for TB detection (cross validation fold 5) + +* Split reference: first 62.6% of CXR for "train", 16% for "validation", +* 21.4% for "test" +* This split consists of non-TB and active TB samples +* "healthy", "latent TB", and "sick & non-TB" samples are all merged under the label "non-TB" +* This configuration resolution: 512 x 512 (default) +* See :py:mod:`ptbench.data.tbx11k_simplified_v2` for dataset details +""" + +from clapper.logging import setup + +from .. import return_subsets +from ..base_datamodule import BaseDataModule +from . import _maker + +logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") + + +class Fold0Module(BaseDataModule): + def __init__( + self, + train_batch_size=1, + predict_batch_size=1, + drop_incomplete_batch=False, + multiproc_kwargs=None, + ): + super().__init__( + train_batch_size=train_batch_size, + predict_batch_size=predict_batch_size, + drop_incomplete_batch=drop_incomplete_batch, + multiproc_kwargs=multiproc_kwargs, + ) + + def setup(self, stage: str): + self.dataset = _maker("fold_5") + ( + self.train_dataset, + self.validation_dataset, + self.extra_validation_datasets, + self.predict_dataset, + ) = return_subsets(self.dataset) + + +datamodule = Fold0Module diff --git a/src/ptbench/data/tbx11k_simplified_v2/fold_5_rgb.py b/src/ptbench/data/tbx11k_simplified_v2/fold_5_rgb.py new file mode 100644 index 0000000000000000000000000000000000000000..a7ae16472f80f32b83dc31e8ffb9ed746dd0ed78 --- /dev/null +++ b/src/ptbench/data/tbx11k_simplified_v2/fold_5_rgb.py @@ -0,0 +1,49 @@ +# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> +# +# SPDX-License-Identifier: GPL-3.0-or-later + +"""TBX11k simplified dataset for TB detection (cross validation fold 5, RGB) + +* Split reference: first 62.6% of CXR for "train", 16% for "validation", +* 21.4% for "test" +* This split consists of non-TB and active TB samples +* "healthy", "latent TB", and "sick & non-TB" samples are all merged under the label "non-TB" +* This configuration resolution: 512 x 512 (default) +* See :py:mod:`ptbench.data.tbx11k_simplified_v2` for dataset details +""" + +from clapper.logging import setup + +from .. import return_subsets +from ..base_datamodule import BaseDataModule +from . import _maker + +logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") + + +class Fold0Module(BaseDataModule): + def __init__( + self, + train_batch_size=1, + predict_batch_size=1, + drop_incomplete_batch=False, + multiproc_kwargs=None, + ): + super().__init__( + train_batch_size=train_batch_size, + predict_batch_size=predict_batch_size, + drop_incomplete_batch=drop_incomplete_batch, + multiproc_kwargs=multiproc_kwargs, + ) + + def setup(self, stage: str): + self.dataset = _maker("fold_5", RGB=True) + ( + self.train_dataset, + self.validation_dataset, + self.extra_validation_datasets, + self.predict_dataset, + ) = return_subsets(self.dataset) + + +datamodule = Fold0Module diff --git a/src/ptbench/data/tbx11k_simplified_v2/fold_6.py b/src/ptbench/data/tbx11k_simplified_v2/fold_6.py new file mode 100644 index 0000000000000000000000000000000000000000..0a5199e1b9f21a2ddc058afe119ad816030e3a0b --- /dev/null +++ b/src/ptbench/data/tbx11k_simplified_v2/fold_6.py @@ -0,0 +1,49 @@ +# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> +# +# SPDX-License-Identifier: GPL-3.0-or-later + +"""TBX11k simplified dataset for TB detection (cross validation fold 6) + +* Split reference: first 62.6% of CXR for "train", 16% for "validation", +* 21.4% for "test" +* This split consists of non-TB and active TB samples +* "healthy", "latent TB", and "sick & non-TB" samples are all merged under the label "non-TB" +* This configuration resolution: 512 x 512 (default) +* See :py:mod:`ptbench.data.tbx11k_simplified_v2` for dataset details +""" + +from clapper.logging import setup + +from .. import return_subsets +from ..base_datamodule import BaseDataModule +from . import _maker + +logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") + + +class Fold0Module(BaseDataModule): + def __init__( + self, + train_batch_size=1, + predict_batch_size=1, + drop_incomplete_batch=False, + multiproc_kwargs=None, + ): + super().__init__( + train_batch_size=train_batch_size, + predict_batch_size=predict_batch_size, + drop_incomplete_batch=drop_incomplete_batch, + multiproc_kwargs=multiproc_kwargs, + ) + + def setup(self, stage: str): + self.dataset = _maker("fold_6") + ( + self.train_dataset, + self.validation_dataset, + self.extra_validation_datasets, + self.predict_dataset, + ) = return_subsets(self.dataset) + + +datamodule = Fold0Module diff --git a/src/ptbench/data/tbx11k_simplified_v2/fold_6_rgb.py b/src/ptbench/data/tbx11k_simplified_v2/fold_6_rgb.py new file mode 100644 index 0000000000000000000000000000000000000000..e46d97d57af3f676a6f390281f9d5e3cba8b8dbf --- /dev/null +++ b/src/ptbench/data/tbx11k_simplified_v2/fold_6_rgb.py @@ -0,0 +1,49 @@ +# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> +# +# SPDX-License-Identifier: GPL-3.0-or-later + +"""TBX11k simplified dataset for TB detection (cross validation fold 6, RGB) + +* Split reference: first 62.6% of CXR for "train", 16% for "validation", +* 21.4% for "test" +* This split consists of non-TB and active TB samples +* "healthy", "latent TB", and "sick & non-TB" samples are all merged under the label "non-TB" +* This configuration resolution: 512 x 512 (default) +* See :py:mod:`ptbench.data.tbx11k_simplified_v2` for dataset details +""" + +from clapper.logging import setup + +from .. import return_subsets +from ..base_datamodule import BaseDataModule +from . import _maker + +logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") + + +class Fold0Module(BaseDataModule): + def __init__( + self, + train_batch_size=1, + predict_batch_size=1, + drop_incomplete_batch=False, + multiproc_kwargs=None, + ): + super().__init__( + train_batch_size=train_batch_size, + predict_batch_size=predict_batch_size, + drop_incomplete_batch=drop_incomplete_batch, + multiproc_kwargs=multiproc_kwargs, + ) + + def setup(self, stage: str): + self.dataset = _maker("fold_6", RGB=True) + ( + self.train_dataset, + self.validation_dataset, + self.extra_validation_datasets, + self.predict_dataset, + ) = return_subsets(self.dataset) + + +datamodule = Fold0Module diff --git a/src/ptbench/data/tbx11k_simplified_v2/fold_7.py b/src/ptbench/data/tbx11k_simplified_v2/fold_7.py new file mode 100644 index 0000000000000000000000000000000000000000..f34d1d59883fd8ddf3c09eaac350b8782e45ca5f --- /dev/null +++ b/src/ptbench/data/tbx11k_simplified_v2/fold_7.py @@ -0,0 +1,49 @@ +# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> +# +# SPDX-License-Identifier: GPL-3.0-or-later + +"""TBX11k simplified dataset for TB detection (cross validation fold 7) + +* Split reference: first 62.6% of CXR for "train", 16% for "validation", +* 21.4% for "test" +* This split consists of non-TB and active TB samples +* "healthy", "latent TB", and "sick & non-TB" samples are all merged under the label "non-TB" +* This configuration resolution: 512 x 512 (default) +* See :py:mod:`ptbench.data.tbx11k_simplified_v2` for dataset details +""" + +from clapper.logging import setup + +from .. import return_subsets +from ..base_datamodule import BaseDataModule +from . import _maker + +logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") + + +class Fold0Module(BaseDataModule): + def __init__( + self, + train_batch_size=1, + predict_batch_size=1, + drop_incomplete_batch=False, + multiproc_kwargs=None, + ): + super().__init__( + train_batch_size=train_batch_size, + predict_batch_size=predict_batch_size, + drop_incomplete_batch=drop_incomplete_batch, + multiproc_kwargs=multiproc_kwargs, + ) + + def setup(self, stage: str): + self.dataset = _maker("fold_7") + ( + self.train_dataset, + self.validation_dataset, + self.extra_validation_datasets, + self.predict_dataset, + ) = return_subsets(self.dataset) + + +datamodule = Fold0Module diff --git a/src/ptbench/data/tbx11k_simplified_v2/fold_7_rgb.py b/src/ptbench/data/tbx11k_simplified_v2/fold_7_rgb.py new file mode 100644 index 0000000000000000000000000000000000000000..4c595e735fb47d63dacefc6b9eb4f3ef7fe9a9fc --- /dev/null +++ b/src/ptbench/data/tbx11k_simplified_v2/fold_7_rgb.py @@ -0,0 +1,49 @@ +# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> +# +# SPDX-License-Identifier: GPL-3.0-or-later + +"""TBX11k simplified dataset for TB detection (cross validation fold 7, RGB) + +* Split reference: first 62.6% of CXR for "train", 16% for "validation", +* 21.4% for "test" +* This split consists of non-TB and active TB samples +* "healthy", "latent TB", and "sick & non-TB" samples are all merged under the label "non-TB" +* This configuration resolution: 512 x 512 (default) +* See :py:mod:`ptbench.data.tbx11k_simplified_v2` for dataset details +""" + +from clapper.logging import setup + +from .. import return_subsets +from ..base_datamodule import BaseDataModule +from . import _maker + +logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") + + +class Fold0Module(BaseDataModule): + def __init__( + self, + train_batch_size=1, + predict_batch_size=1, + drop_incomplete_batch=False, + multiproc_kwargs=None, + ): + super().__init__( + train_batch_size=train_batch_size, + predict_batch_size=predict_batch_size, + drop_incomplete_batch=drop_incomplete_batch, + multiproc_kwargs=multiproc_kwargs, + ) + + def setup(self, stage: str): + self.dataset = _maker("fold_7", RGB=True) + ( + self.train_dataset, + self.validation_dataset, + self.extra_validation_datasets, + self.predict_dataset, + ) = return_subsets(self.dataset) + + +datamodule = Fold0Module diff --git a/src/ptbench/data/tbx11k_simplified_v2/fold_8.py b/src/ptbench/data/tbx11k_simplified_v2/fold_8.py new file mode 100644 index 0000000000000000000000000000000000000000..eb4a94aa48d8f7b4960ef134a5ad737ff0d88d4e --- /dev/null +++ b/src/ptbench/data/tbx11k_simplified_v2/fold_8.py @@ -0,0 +1,49 @@ +# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> +# +# SPDX-License-Identifier: GPL-3.0-or-later + +"""TBX11k simplified dataset for TB detection (cross validation fold 8) + +* Split reference: first 62.6% of CXR for "train", 16% for "validation", +* 21.4% for "test" +* This split consists of non-TB and active TB samples +* "healthy", "latent TB", and "sick & non-TB" samples are all merged under the label "non-TB" +* This configuration resolution: 512 x 512 (default) +* See :py:mod:`ptbench.data.tbx11k_simplified_v2` for dataset details +""" + +from clapper.logging import setup + +from .. import return_subsets +from ..base_datamodule import BaseDataModule +from . import _maker + +logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") + + +class Fold0Module(BaseDataModule): + def __init__( + self, + train_batch_size=1, + predict_batch_size=1, + drop_incomplete_batch=False, + multiproc_kwargs=None, + ): + super().__init__( + train_batch_size=train_batch_size, + predict_batch_size=predict_batch_size, + drop_incomplete_batch=drop_incomplete_batch, + multiproc_kwargs=multiproc_kwargs, + ) + + def setup(self, stage: str): + self.dataset = _maker("fold_8") + ( + self.train_dataset, + self.validation_dataset, + self.extra_validation_datasets, + self.predict_dataset, + ) = return_subsets(self.dataset) + + +datamodule = Fold0Module diff --git a/src/ptbench/data/tbx11k_simplified_v2/fold_8_rgb.py b/src/ptbench/data/tbx11k_simplified_v2/fold_8_rgb.py new file mode 100644 index 0000000000000000000000000000000000000000..70510ce5ee8c87cdff8190c0be875dd84e3d385c --- /dev/null +++ b/src/ptbench/data/tbx11k_simplified_v2/fold_8_rgb.py @@ -0,0 +1,49 @@ +# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> +# +# SPDX-License-Identifier: GPL-3.0-or-later + +"""TBX11k simplified dataset for TB detection (cross validation fold 8, RGB) + +* Split reference: first 62.6% of CXR for "train", 16% for "validation", +* 21.4% for "test" +* This split consists of non-TB and active TB samples +* "healthy", "latent TB", and "sick & non-TB" samples are all merged under the label "non-TB" +* This configuration resolution: 512 x 512 (default) +* See :py:mod:`ptbench.data.tbx11k_simplified_v2` for dataset details +""" + +from clapper.logging import setup + +from .. import return_subsets +from ..base_datamodule import BaseDataModule +from . import _maker + +logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") + + +class Fold0Module(BaseDataModule): + def __init__( + self, + train_batch_size=1, + predict_batch_size=1, + drop_incomplete_batch=False, + multiproc_kwargs=None, + ): + super().__init__( + train_batch_size=train_batch_size, + predict_batch_size=predict_batch_size, + drop_incomplete_batch=drop_incomplete_batch, + multiproc_kwargs=multiproc_kwargs, + ) + + def setup(self, stage: str): + self.dataset = _maker("fold_8", RGB=True) + ( + self.train_dataset, + self.validation_dataset, + self.extra_validation_datasets, + self.predict_dataset, + ) = return_subsets(self.dataset) + + +datamodule = Fold0Module diff --git a/src/ptbench/data/tbx11k_simplified_v2/fold_9.py b/src/ptbench/data/tbx11k_simplified_v2/fold_9.py new file mode 100644 index 0000000000000000000000000000000000000000..b15c1bf02990d9aae52a6c1c2b769ef54abf2321 --- /dev/null +++ b/src/ptbench/data/tbx11k_simplified_v2/fold_9.py @@ -0,0 +1,49 @@ +# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> +# +# SPDX-License-Identifier: GPL-3.0-or-later + +"""TBX11k simplified dataset for TB detection (cross validation fold 9) + +* Split reference: first 62.6% of CXR for "train", 16% for "validation", +* 21.4% for "test" +* This split consists of non-TB and active TB samples +* "healthy", "latent TB", and "sick & non-TB" samples are all merged under the label "non-TB" +* This configuration resolution: 512 x 512 (default) +* See :py:mod:`ptbench.data.tbx11k_simplified_v2` for dataset details +""" + +from clapper.logging import setup + +from .. import return_subsets +from ..base_datamodule import BaseDataModule +from . import _maker + +logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") + + +class Fold0Module(BaseDataModule): + def __init__( + self, + train_batch_size=1, + predict_batch_size=1, + drop_incomplete_batch=False, + multiproc_kwargs=None, + ): + super().__init__( + train_batch_size=train_batch_size, + predict_batch_size=predict_batch_size, + drop_incomplete_batch=drop_incomplete_batch, + multiproc_kwargs=multiproc_kwargs, + ) + + def setup(self, stage: str): + self.dataset = _maker("fold_9") + ( + self.train_dataset, + self.validation_dataset, + self.extra_validation_datasets, + self.predict_dataset, + ) = return_subsets(self.dataset) + + +datamodule = Fold0Module diff --git a/src/ptbench/data/tbx11k_simplified_v2/fold_9_rgb.py b/src/ptbench/data/tbx11k_simplified_v2/fold_9_rgb.py new file mode 100644 index 0000000000000000000000000000000000000000..271e33480a84ff839cc997e3ae5b881e0595ae68 --- /dev/null +++ b/src/ptbench/data/tbx11k_simplified_v2/fold_9_rgb.py @@ -0,0 +1,49 @@ +# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> +# +# SPDX-License-Identifier: GPL-3.0-or-later + +"""TBX11k simplified dataset for TB detection (cross validation fold 9, RGB) + +* Split reference: first 62.6% of CXR for "train", 16% for "validation", +* 21.4% for "test" +* This split consists of non-TB and active TB samples +* "healthy", "latent TB", and "sick & non-TB" samples are all merged under the label "non-TB" +* This configuration resolution: 512 x 512 (default) +* See :py:mod:`ptbench.data.tbx11k_simplified_v2` for dataset details +""" + +from clapper.logging import setup + +from .. import return_subsets +from ..base_datamodule import BaseDataModule +from . import _maker + +logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") + + +class Fold0Module(BaseDataModule): + def __init__( + self, + train_batch_size=1, + predict_batch_size=1, + drop_incomplete_batch=False, + multiproc_kwargs=None, + ): + super().__init__( + train_batch_size=train_batch_size, + predict_batch_size=predict_batch_size, + drop_incomplete_batch=drop_incomplete_batch, + multiproc_kwargs=multiproc_kwargs, + ) + + def setup(self, stage: str): + self.dataset = _maker("fold_9", RGB=True) + ( + self.train_dataset, + self.validation_dataset, + self.extra_validation_datasets, + self.predict_dataset, + ) = return_subsets(self.dataset) + + +datamodule = Fold0Module diff --git a/src/ptbench/data/tbx11k_simplified_v2/rgb.py b/src/ptbench/data/tbx11k_simplified_v2/rgb.py new file mode 100644 index 0000000000000000000000000000000000000000..13ac3a9f9cbdbbdb55c43026e54692909ac8299c --- /dev/null +++ b/src/ptbench/data/tbx11k_simplified_v2/rgb.py @@ -0,0 +1,50 @@ +# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> +# +# SPDX-License-Identifier: GPL-3.0-or-later + +"""TBX11k simplified dataset for TB detection (default protocol, converted in +RGB) + +* Split reference: first 62.6% of CXR for "train", 16% for "validation", +* 21.4% for "test" +* This split consists of non-TB and active TB samples +* "healthy", "latent TB", and "sick & non-TB" samples are all merged under the label "non-TB" +* This configuration resolution: 512 x 512 (default) +* See :py:mod:`ptbench.data.tbx11k_simplified_v2` for dataset details +""" + +from clapper.logging import setup + +from .. import return_subsets +from ..base_datamodule import BaseDataModule +from . import _maker + +logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") + + +class DefaultModule(BaseDataModule): + def __init__( + self, + train_batch_size=1, + predict_batch_size=1, + drop_incomplete_batch=False, + multiproc_kwargs=None, + ): + super().__init__( + train_batch_size=train_batch_size, + predict_batch_size=predict_batch_size, + drop_incomplete_batch=drop_incomplete_batch, + multiproc_kwargs=multiproc_kwargs, + ) + + def setup(self, stage: str): + self.dataset = _maker("default", RGB=True) + ( + self.train_dataset, + self.validation_dataset, + self.extra_validation_datasets, + self.predict_dataset, + ) = return_subsets(self.dataset) + + +datamodule = DefaultModule