diff --git a/doc/api.rst b/doc/api.rst index bcb3f2032ccef8e25f1087ad6f8582060a9e41b5..0114d9ff62b83dfc2178ade21426a7a2cb334dfe 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -57,8 +57,6 @@ Direct data-access through iterators. ptbench.data.tbx11k_simplified_RS ptbench.data.tbx11k_simplified_v2 ptbench.data.tbx11k_simplified_v2_RS - ptbench.data.tbx11k_simplified_v3 - ptbench.data.tbx11k_simplified_v3_RS .. _ptbench.api.models: diff --git a/doc/config.rst b/doc/config.rst index 4e402bca1d96513b36cc860e44d4b58c68214ffc..828bba07a4c3db58cdc4854c7ab1c4e407f21f17 100644 --- a/doc/config.rst +++ b/doc/config.rst @@ -80,9 +80,6 @@ if applicable. Use these datasets for training and evaluating your models. ptbench.configs.datasets.tbx11k_simplified_v2.default ptbench.configs.datasets.tbx11k_simplified_v2.rgb ptbench.configs.datasets.tbx11k_simplified_v2_RS.default - ptbench.configs.datasets.tbx11k_simplified_v3.default - ptbench.configs.datasets.tbx11k_simplified_v3.rgb - ptbench.configs.datasets.tbx11k_simplified_v3_RS.default .. _ptbench.configs.datasets.folds: @@ -133,9 +130,6 @@ datasets. Nine other folds are available for every configuration (from 1 to ptbench.configs.datasets.tbx11k_simplified_v2.fold_0 ptbench.configs.datasets.tbx11k_simplified_v2.fold_0_rgb ptbench.configs.datasets.tbx11k_simplified_v2_RS.fold_0 - ptbench.configs.datasets.tbx11k_simplified_v3.fold_0 - ptbench.configs.datasets.tbx11k_simplified_v3.fold_0_rgb - ptbench.configs.datasets.tbx11k_simplified_v3_RS.fold_0 .. include:: links.rst diff --git a/pyproject.toml b/pyproject.toml index 609a0a08be71c96e5f338c59aeb4f8ad289ae8aa..4f6bf7f730c48448d1edd4e0382028de8c1fd978 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -256,41 +256,6 @@ tbx11k_simplified_v2_rs_f6 = "ptbench.configs.datasets.tbx11k_simplified_v2_RS.f tbx11k_simplified_v2_rs_f7 = "ptbench.configs.datasets.tbx11k_simplified_v2_RS.fold_7" tbx11k_simplified_v2_rs_f8 = "ptbench.configs.datasets.tbx11k_simplified_v2_RS.fold_8" tbx11k_simplified_v2_rs_f9 = "ptbench.configs.datasets.tbx11k_simplified_v2_RS.fold_9" -# TBX11K simplified dataset split 3 (and cross-validation folds) -tbx11k_simplified_v3 = "ptbench.configs.datasets.tbx11k_simplified_v3.default" -tbx11k_simplified_v3_rgb = "ptbench.configs.datasets.tbx11k_simplified_v3.rgb" -tbx11k_simplified_v3_f0 = "ptbench.configs.datasets.tbx11k_simplified_v3.fold_0" -tbx11k_simplified_v3_f1 = "ptbench.configs.datasets.tbx11k_simplified_v3.fold_1" -tbx11k_simplified_v3_f2 = "ptbench.configs.datasets.tbx11k_simplified_v3.fold_2" -tbx11k_simplified_v3_f3 = "ptbench.configs.datasets.tbx11k_simplified_v3.fold_3" -tbx11k_simplified_v3_f4 = "ptbench.configs.datasets.tbx11k_simplified_v3.fold_4" -tbx11k_simplified_v3_f5 = "ptbench.configs.datasets.tbx11k_simplified_v3.fold_5" -tbx11k_simplified_v3_f6 = "ptbench.configs.datasets.tbx11k_simplified_v3.fold_6" -tbx11k_simplified_v3_f7 = "ptbench.configs.datasets.tbx11k_simplified_v3.fold_7" -tbx11k_simplified_v3_f8 = "ptbench.configs.datasets.tbx11k_simplified_v3.fold_8" -tbx11k_simplified_v3_f9 = "ptbench.configs.datasets.tbx11k_simplified_v3.fold_9" -tbx11k_simplified_v3_f0_rgb = "ptbench.configs.datasets.tbx11k_simplified_v3.fold_0_rgb" -tbx11k_simplified_v3_f1_rgb = "ptbench.configs.datasets.tbx11k_simplified_v3.fold_1_rgb" -tbx11k_simplified_v3_f2_rgb = "ptbench.configs.datasets.tbx11k_simplified_v3.fold_2_rgb" -tbx11k_simplified_v3_f3_rgb = "ptbench.configs.datasets.tbx11k_simplified_v3.fold_3_rgb" -tbx11k_simplified_v3_f4_rgb = "ptbench.configs.datasets.tbx11k_simplified_v3.fold_4_rgb" -tbx11k_simplified_v3_f5_rgb = "ptbench.configs.datasets.tbx11k_simplified_v3.fold_5_rgb" -tbx11k_simplified_v3_f6_rgb = "ptbench.configs.datasets.tbx11k_simplified_v3.fold_6_rgb" -tbx11k_simplified_v3_f7_rgb = "ptbench.configs.datasets.tbx11k_simplified_v3.fold_7_rgb" -tbx11k_simplified_v3_f8_rgb = "ptbench.configs.datasets.tbx11k_simplified_v3.fold_8_rgb" -tbx11k_simplified_v3_f9_rgb = "ptbench.configs.datasets.tbx11k_simplified_v3.fold_9_rgb" -# extended TBX11K simplified dataset split 3 (with radiological signs) -tbx11k_simplified_v3_rs = "ptbench.configs.datasets.tbx11k_simplified_v3_RS.default" -tbx11k_simplified_v3_rs_f0 = "ptbench.configs.datasets.tbx11k_simplified_v3_RS.fold_0" -tbx11k_simplified_v3_rs_f1 = "ptbench.configs.datasets.tbx11k_simplified_v3_RS.fold_1" -tbx11k_simplified_v3_rs_f2 = "ptbench.configs.datasets.tbx11k_simplified_v3_RS.fold_2" -tbx11k_simplified_v3_rs_f3 = "ptbench.configs.datasets.tbx11k_simplified_v3_RS.fold_3" -tbx11k_simplified_v3_rs_f4 = "ptbench.configs.datasets.tbx11k_simplified_v3_RS.fold_4" -tbx11k_simplified_v3_rs_f5 = "ptbench.configs.datasets.tbx11k_simplified_v3_RS.fold_5" -tbx11k_simplified_v3_rs_f6 = "ptbench.configs.datasets.tbx11k_simplified_v3_RS.fold_6" -tbx11k_simplified_v3_rs_f7 = "ptbench.configs.datasets.tbx11k_simplified_v3_RS.fold_7" -tbx11k_simplified_v3_rs_f8 = "ptbench.configs.datasets.tbx11k_simplified_v3_RS.fold_8" -tbx11k_simplified_v3_rs_f9 = "ptbench.configs.datasets.tbx11k_simplified_v3_RS.fold_9" # montgomery-shenzhen aggregated dataset mc_ch = "ptbench.configs.datasets.mc_ch.default" mc_ch_rgb = "ptbench.configs.datasets.mc_ch.rgb" diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_v3/__init__.py b/src/ptbench/configs/datasets/tbx11k_simplified_v3/__init__.py deleted file mode 100644 index bed38616924b7f736f5d33d12c7bdf7911dc590f..0000000000000000000000000000000000000000 --- a/src/ptbench/configs/datasets/tbx11k_simplified_v3/__init__.py +++ /dev/null @@ -1,25 +0,0 @@ -# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - - -def _maker(protocol, RGB=False): - from torchvision import transforms - - from ....data.tbx11k_simplified_v3 import dataset as raw - from ....data.transforms import ElasticDeformation - from .. import make_dataset as mk - - post_transforms = [] - if RGB: - post_transforms = [ - transforms.Lambda(lambda x: x.convert("RGB")), - transforms.ToTensor(), - ] - - return mk( - [raw.subsets(protocol)], - [], - [ElasticDeformation(p=0.8)], - post_transforms, - ) diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_v3/default.py b/src/ptbench/configs/datasets/tbx11k_simplified_v3/default.py deleted file mode 100644 index 3805a14219167fbcc1cd0becfa28d592c422813d..0000000000000000000000000000000000000000 --- a/src/ptbench/configs/datasets/tbx11k_simplified_v3/default.py +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""TBX11k simplified dataset for TB detection (default protocol) - -* Split reference: first 62.6% of CXR for "train", 16% for "validation", -* 21.4% for "test" -* This split consists of the 4 labels "healthy", "latent TB", "sick & non-TB", -* and "active TB" -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.tbx11k_v3` for dataset details -""" - -from . import _maker - -dataset = _maker("default") diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_0.py b/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_0.py deleted file mode 100644 index bed7342dd6f28a83ca3c5a4e7984551a153344d7..0000000000000000000000000000000000000000 --- a/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_0.py +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""TBX11k simplified dataset for TB detection (cross validation fold 0) - -* Split reference: first 62.6% of CXR for "train", 16% for "validation", -* 21.4% for "test" -* This split consists of the 4 labels "healthy", "latent TB", "sick & non-TB", -* and "active TB" -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.tbx11k_v3` for dataset details -""" - -from . import _maker - -dataset = _maker("fold_0") diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_0_rgb.py b/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_0_rgb.py deleted file mode 100644 index ed2f6d174d87521d3fe2ad12e211e0e32bab68e2..0000000000000000000000000000000000000000 --- a/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_0_rgb.py +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""TBX11k simplified dataset for TB detection (cross validation fold 0, RGB) - -* Split reference: first 62.6% of CXR for "train", 16% for "validation", -* 21.4% for "test" -* This split consists of the 4 labels "healthy", "latent TB", "sick & non-TB", -* and "active TB" -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.tbx11k_v3` for dataset details -""" - -from . import _maker - -dataset = _maker("fold_0", RGB=True) diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_1.py b/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_1.py deleted file mode 100644 index 92cc9ce31f3f296a52d9ba51eae87d39740cb160..0000000000000000000000000000000000000000 --- a/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_1.py +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""TBX11k simplified dataset for TB detection (cross validation fold 1) - -* Split reference: first 62.6% of CXR for "train", 16% for "validation", -* 21.4% for "test" -* This split consists of the 4 labels "healthy", "latent TB", "sick & non-TB", -* and "active TB" -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.tbx11k_v3` for dataset details -""" - -from . import _maker - -dataset = _maker("fold_1") diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_1_rgb.py b/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_1_rgb.py deleted file mode 100644 index 9380ec5b3910f463fea0cab48ea3cdb738647302..0000000000000000000000000000000000000000 --- a/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_1_rgb.py +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""TBX11k simplified dataset for TB detection (cross validation fold 1, RGB) - -* Split reference: first 62.6% of CXR for "train", 16% for "validation", -* 21.4% for "test" -* This split consists of the 4 labels "healthy", "latent TB", "sick & non-TB", -* and "active TB" -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.tbx11k_v3` for dataset details -""" - -from . import _maker - -dataset = _maker("fold_1", RGB=True) diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_2.py b/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_2.py deleted file mode 100644 index 460c96df367fa982dad4dca111c41ad5118f11f1..0000000000000000000000000000000000000000 --- a/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_2.py +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""TBX11k simplified dataset for TB detection (cross validation fold 2) - -* Split reference: first 62.6% of CXR for "train", 16% for "validation", -* 21.4% for "test" -* This split consists of the 4 labels "healthy", "latent TB", "sick & non-TB", -* and "active TB" -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.tbx11k_v3` for dataset details -""" - -from . import _maker - -dataset = _maker("fold_2") diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_2_rgb.py b/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_2_rgb.py deleted file mode 100644 index 69641e3acc24e1de79b09832eb7b8340879cfc4a..0000000000000000000000000000000000000000 --- a/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_2_rgb.py +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""TBX11k simplified dataset for TB detection (cross validation fold 2, RGB) - -* Split reference: first 62.6% of CXR for "train", 16% for "validation", -* 21.4% for "test" -* This split consists of the 4 labels "healthy", "latent TB", "sick & non-TB", -* and "active TB" -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.tbx11k_v3` for dataset details -""" - -from . import _maker - -dataset = _maker("fold_2", RGB=True) diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_3.py b/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_3.py deleted file mode 100644 index c799c7cb54044be807bc3cc615658c39ca47667c..0000000000000000000000000000000000000000 --- a/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_3.py +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""TBX11k simplified dataset for TB detection (cross validation fold 3) - -* Split reference: first 62.6% of CXR for "train", 16% for "validation", -* 21.4% for "test" -* This split consists of the 4 labels "healthy", "latent TB", "sick & non-TB", -* and "active TB" -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.tbx11k_v3` for dataset details -""" - -from . import _maker - -dataset = _maker("fold_3") diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_3_rgb.py b/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_3_rgb.py deleted file mode 100644 index 816eeca4c7eeae7ffc30bd58c6f527aec999b179..0000000000000000000000000000000000000000 --- a/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_3_rgb.py +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""TBX11k simplified dataset for TB detection (cross validation fold 3, RGB) - -* Split reference: first 62.6% of CXR for "train", 16% for "validation", -* 21.4% for "test" -* This split consists of the 4 labels "healthy", "latent TB", "sick & non-TB", -* and "active TB" -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.tbx11k_v3` for dataset details -""" - -from . import _maker - -dataset = _maker("fold_3", RGB=True) diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_4.py b/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_4.py deleted file mode 100644 index acceda91f4caa5773b5f0b478bfc5c1e9e4dc1d8..0000000000000000000000000000000000000000 --- a/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_4.py +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""TBX11k simplified dataset for TB detection (cross validation fold 4) - -* Split reference: first 62.6% of CXR for "train", 16% for "validation", -* 21.4% for "test" -* This split consists of the 4 labels "healthy", "latent TB", "sick & non-TB", -* and "active TB" -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.tbx11k_v3` for dataset details -""" - -from . import _maker - -dataset = _maker("fold_4") diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_4_rgb.py b/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_4_rgb.py deleted file mode 100644 index 07811e9c185f2c8165cc0e156dce5bb3292656c1..0000000000000000000000000000000000000000 --- a/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_4_rgb.py +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""TBX11k simplified dataset for TB detection (cross validation fold 4, RGB) - -* Split reference: first 62.6% of CXR for "train", 16% for "validation", -* 21.4% for "test" -* This split consists of the 4 labels "healthy", "latent TB", "sick & non-TB", -* and "active TB" -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.tbx11k_v3` for dataset details -""" - -from . import _maker - -dataset = _maker("fold_4", RGB=True) diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_5.py b/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_5.py deleted file mode 100644 index 20b048d5be87e947db7622ed34c1aaa30f166084..0000000000000000000000000000000000000000 --- a/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_5.py +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""TBX11k simplified dataset for TB detection (cross validation fold 5) - -* Split reference: first 62.6% of CXR for "train", 16% for "validation", -* 21.4% for "test" -* This split consists of the 4 labels "healthy", "latent TB", "sick & non-TB", -* and "active TB" -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.tbx11k_v3` for dataset details -""" - -from . import _maker - -dataset = _maker("fold_5") diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_5_rgb.py b/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_5_rgb.py deleted file mode 100644 index 6b851bdcdffb0029b2dd70fe74a4c9afffaff826..0000000000000000000000000000000000000000 --- a/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_5_rgb.py +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""TBX11k simplified dataset for TB detection (cross validation fold 5, RGB) - -* Split reference: first 62.6% of CXR for "train", 16% for "validation", -* 21.4% for "test" -* This split consists of the 4 labels "healthy", "latent TB", "sick & non-TB", -* and "active TB" -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.tbx11k_v3` for dataset details -""" - -from . import _maker - -dataset = _maker("fold_5", RGB=True) diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_6.py b/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_6.py deleted file mode 100644 index e99afb4c0ab99ac8de31522181910e89b6f61b3e..0000000000000000000000000000000000000000 --- a/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_6.py +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""TBX11k simplified dataset for TB detection (cross validation fold 6) - -* Split reference: first 62.6% of CXR for "train", 16% for "validation", -* 21.4% for "test" -* This split consists of the 4 labels "healthy", "latent TB", "sick & non-TB", -* and "active TB" -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.tbx11k_v3` for dataset details -""" - -from . import _maker - -dataset = _maker("fold_6") diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_6_rgb.py b/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_6_rgb.py deleted file mode 100644 index 01a94e08e348358983eb9bdd40f155818733555e..0000000000000000000000000000000000000000 --- a/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_6_rgb.py +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""TBX11k simplified dataset for TB detection (cross validation fold 6, RGB) - -* Split reference: first 62.6% of CXR for "train", 16% for "validation", -* 21.4% for "test" -* This split consists of the 4 labels "healthy", "latent TB", "sick & non-TB", -* and "active TB" -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.tbx11k_v3` for dataset details -""" - -from . import _maker - -dataset = _maker("fold_6", RGB=True) diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_7.py b/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_7.py deleted file mode 100644 index 53e5af59d4bc1cdc145af9756c9a8f494f2519cb..0000000000000000000000000000000000000000 --- a/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_7.py +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""TBX11k simplified dataset for TB detection (cross validation fold 7) - -* Split reference: first 62.6% of CXR for "train", 16% for "validation", -* 21.4% for "test" -* This split consists of the 4 labels "healthy", "latent TB", "sick & non-TB", -* and "active TB" -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.tbx11k_v3` for dataset details -""" - -from . import _maker - -dataset = _maker("fold_7") diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_7_rgb.py b/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_7_rgb.py deleted file mode 100644 index 546d63584886094cf6bd7a5267e0e131197faa3a..0000000000000000000000000000000000000000 --- a/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_7_rgb.py +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""TBX11k simplified dataset for TB detection (cross validation fold 7, RGB) - -* Split reference: first 62.6% of CXR for "train", 16% for "validation", -* 21.4% for "test" -* This split consists of the 4 labels "healthy", "latent TB", "sick & non-TB", -* and "active TB" -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.tbx11k_v3` for dataset details -""" - -from . import _maker - -dataset = _maker("fold_7", RGB=True) diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_8.py b/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_8.py deleted file mode 100644 index d59887df9ce9849fca1483d8c3c4adb97bf7d0b5..0000000000000000000000000000000000000000 --- a/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_8.py +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""TBX11k simplified dataset for TB detection (cross validation fold 8) - -* Split reference: first 62.6% of CXR for "train", 16% for "validation", -* 21.4% for "test" -* This split consists of the 4 labels "healthy", "latent TB", "sick & non-TB", -* and "active TB" -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.tbx11k_v3` for dataset details -""" - -from . import _maker - -dataset = _maker("fold_8") diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_8_rgb.py b/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_8_rgb.py deleted file mode 100644 index 243c1455a3d2a06e14b535b303b8a7bbfe70e8fa..0000000000000000000000000000000000000000 --- a/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_8_rgb.py +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""TBX11k simplified dataset for TB detection (cross validation fold 8, RGB) - -* Split reference: first 62.6% of CXR for "train", 16% for "validation", -* 21.4% for "test" -* This split consists of the 4 labels "healthy", "latent TB", "sick & non-TB", -* and "active TB" -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.tbx11k_v3` for dataset details -""" - -from . import _maker - -dataset = _maker("fold_8", RGB=True) diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_9.py b/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_9.py deleted file mode 100644 index 2f9a45f01f31a9f1c87d8b70aa9e5859cee83b04..0000000000000000000000000000000000000000 --- a/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_9.py +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""TBX11k simplified dataset for TB detection (cross validation fold 9) - -* Split reference: first 62.6% of CXR for "train", 16% for "validation", -* 21.4% for "test" -* This split consists of the 4 labels "healthy", "latent TB", "sick & non-TB", -* and "active TB" -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.tbx11k_v3` for dataset details -""" - -from . import _maker - -dataset = _maker("fold_9") diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_9_rgb.py b/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_9_rgb.py deleted file mode 100644 index ef3d504550a094970ad49d16b307e1d0ae0782db..0000000000000000000000000000000000000000 --- a/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_9_rgb.py +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""TBX11k simplified dataset for TB detection (cross validation fold 9, RGB) - -* Split reference: first 62.6% of CXR for "train", 16% for "validation", -* 21.4% for "test" -* This split consists of the 4 labels "healthy", "latent TB", "sick & non-TB", -* and "active TB" -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.tbx11k_v3` for dataset details -""" - -from . import _maker - -dataset = _maker("fold_9", RGB=True) diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_v3/rgb.py b/src/ptbench/configs/datasets/tbx11k_simplified_v3/rgb.py deleted file mode 100644 index 1c274f79252bff0a06bc52edff8e1590b265fccd..0000000000000000000000000000000000000000 --- a/src/ptbench/configs/datasets/tbx11k_simplified_v3/rgb.py +++ /dev/null @@ -1,18 +0,0 @@ -# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""TBX11k simplified dataset for TB detection (default protocol, converted in -RGB) - -* Split reference: first 62.6% of CXR for "train", 16% for "validation", -* 21.4% for "test" -* This split consists of the 4 labels "healthy", "latent TB", "sick & non-TB", -* and "active TB" -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.tbx11k_v3` for dataset details -""" - -from . import _maker - -dataset = _maker("default", RGB=True) diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_v3_RS/__init__.py b/src/ptbench/configs/datasets/tbx11k_simplified_v3_RS/__init__.py deleted file mode 100644 index 176a28448003e2107060b089d1e9b0418c49296a..0000000000000000000000000000000000000000 --- a/src/ptbench/configs/datasets/tbx11k_simplified_v3_RS/__init__.py +++ /dev/null @@ -1,10 +0,0 @@ -# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - - -def _maker(protocol): - from ....data.tbx11k_simplified_v3_RS import dataset as raw - from .. import make_dataset as mk - - return mk([raw.subsets(protocol)]) diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_v3_RS/default.py b/src/ptbench/configs/datasets/tbx11k_simplified_v3_RS/default.py deleted file mode 100644 index 7c62887d8a4eb2d66e9ad287cfa0fb20020c97b3..0000000000000000000000000000000000000000 --- a/src/ptbench/configs/datasets/tbx11k_simplified_v3_RS/default.py +++ /dev/null @@ -1,18 +0,0 @@ -# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""TBX11k simplified dataset for TB detection (default protocol) (extended with -DensenetRS predictions) - -* Split reference: first 62.6% of CXR for "train", 16% for "validation", -* 21.4% for "test" -* This split consists of the 4 labels "healthy", "latent TB", "sick & non-TB", -* and "active TB" -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.tbx11k_v3` for dataset details -""" - -from . import _maker - -dataset = _maker("default") diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_v3_RS/fold_0.py b/src/ptbench/configs/datasets/tbx11k_simplified_v3_RS/fold_0.py deleted file mode 100644 index bed7342dd6f28a83ca3c5a4e7984551a153344d7..0000000000000000000000000000000000000000 --- a/src/ptbench/configs/datasets/tbx11k_simplified_v3_RS/fold_0.py +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""TBX11k simplified dataset for TB detection (cross validation fold 0) - -* Split reference: first 62.6% of CXR for "train", 16% for "validation", -* 21.4% for "test" -* This split consists of the 4 labels "healthy", "latent TB", "sick & non-TB", -* and "active TB" -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.tbx11k_v3` for dataset details -""" - -from . import _maker - -dataset = _maker("fold_0") diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_v3_RS/fold_1.py b/src/ptbench/configs/datasets/tbx11k_simplified_v3_RS/fold_1.py deleted file mode 100644 index 92cc9ce31f3f296a52d9ba51eae87d39740cb160..0000000000000000000000000000000000000000 --- a/src/ptbench/configs/datasets/tbx11k_simplified_v3_RS/fold_1.py +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""TBX11k simplified dataset for TB detection (cross validation fold 1) - -* Split reference: first 62.6% of CXR for "train", 16% for "validation", -* 21.4% for "test" -* This split consists of the 4 labels "healthy", "latent TB", "sick & non-TB", -* and "active TB" -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.tbx11k_v3` for dataset details -""" - -from . import _maker - -dataset = _maker("fold_1") diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_v3_RS/fold_2.py b/src/ptbench/configs/datasets/tbx11k_simplified_v3_RS/fold_2.py deleted file mode 100644 index 460c96df367fa982dad4dca111c41ad5118f11f1..0000000000000000000000000000000000000000 --- a/src/ptbench/configs/datasets/tbx11k_simplified_v3_RS/fold_2.py +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""TBX11k simplified dataset for TB detection (cross validation fold 2) - -* Split reference: first 62.6% of CXR for "train", 16% for "validation", -* 21.4% for "test" -* This split consists of the 4 labels "healthy", "latent TB", "sick & non-TB", -* and "active TB" -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.tbx11k_v3` for dataset details -""" - -from . import _maker - -dataset = _maker("fold_2") diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_v3_RS/fold_3.py b/src/ptbench/configs/datasets/tbx11k_simplified_v3_RS/fold_3.py deleted file mode 100644 index c799c7cb54044be807bc3cc615658c39ca47667c..0000000000000000000000000000000000000000 --- a/src/ptbench/configs/datasets/tbx11k_simplified_v3_RS/fold_3.py +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""TBX11k simplified dataset for TB detection (cross validation fold 3) - -* Split reference: first 62.6% of CXR for "train", 16% for "validation", -* 21.4% for "test" -* This split consists of the 4 labels "healthy", "latent TB", "sick & non-TB", -* and "active TB" -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.tbx11k_v3` for dataset details -""" - -from . import _maker - -dataset = _maker("fold_3") diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_v3_RS/fold_4.py b/src/ptbench/configs/datasets/tbx11k_simplified_v3_RS/fold_4.py deleted file mode 100644 index acceda91f4caa5773b5f0b478bfc5c1e9e4dc1d8..0000000000000000000000000000000000000000 --- a/src/ptbench/configs/datasets/tbx11k_simplified_v3_RS/fold_4.py +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""TBX11k simplified dataset for TB detection (cross validation fold 4) - -* Split reference: first 62.6% of CXR for "train", 16% for "validation", -* 21.4% for "test" -* This split consists of the 4 labels "healthy", "latent TB", "sick & non-TB", -* and "active TB" -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.tbx11k_v3` for dataset details -""" - -from . import _maker - -dataset = _maker("fold_4") diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_v3_RS/fold_5.py b/src/ptbench/configs/datasets/tbx11k_simplified_v3_RS/fold_5.py deleted file mode 100644 index 20b048d5be87e947db7622ed34c1aaa30f166084..0000000000000000000000000000000000000000 --- a/src/ptbench/configs/datasets/tbx11k_simplified_v3_RS/fold_5.py +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""TBX11k simplified dataset for TB detection (cross validation fold 5) - -* Split reference: first 62.6% of CXR for "train", 16% for "validation", -* 21.4% for "test" -* This split consists of the 4 labels "healthy", "latent TB", "sick & non-TB", -* and "active TB" -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.tbx11k_v3` for dataset details -""" - -from . import _maker - -dataset = _maker("fold_5") diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_v3_RS/fold_6.py b/src/ptbench/configs/datasets/tbx11k_simplified_v3_RS/fold_6.py deleted file mode 100644 index e99afb4c0ab99ac8de31522181910e89b6f61b3e..0000000000000000000000000000000000000000 --- a/src/ptbench/configs/datasets/tbx11k_simplified_v3_RS/fold_6.py +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""TBX11k simplified dataset for TB detection (cross validation fold 6) - -* Split reference: first 62.6% of CXR for "train", 16% for "validation", -* 21.4% for "test" -* This split consists of the 4 labels "healthy", "latent TB", "sick & non-TB", -* and "active TB" -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.tbx11k_v3` for dataset details -""" - -from . import _maker - -dataset = _maker("fold_6") diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_v3_RS/fold_7.py b/src/ptbench/configs/datasets/tbx11k_simplified_v3_RS/fold_7.py deleted file mode 100644 index 53e5af59d4bc1cdc145af9756c9a8f494f2519cb..0000000000000000000000000000000000000000 --- a/src/ptbench/configs/datasets/tbx11k_simplified_v3_RS/fold_7.py +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""TBX11k simplified dataset for TB detection (cross validation fold 7) - -* Split reference: first 62.6% of CXR for "train", 16% for "validation", -* 21.4% for "test" -* This split consists of the 4 labels "healthy", "latent TB", "sick & non-TB", -* and "active TB" -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.tbx11k_v3` for dataset details -""" - -from . import _maker - -dataset = _maker("fold_7") diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_v3_RS/fold_8.py b/src/ptbench/configs/datasets/tbx11k_simplified_v3_RS/fold_8.py deleted file mode 100644 index d59887df9ce9849fca1483d8c3c4adb97bf7d0b5..0000000000000000000000000000000000000000 --- a/src/ptbench/configs/datasets/tbx11k_simplified_v3_RS/fold_8.py +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""TBX11k simplified dataset for TB detection (cross validation fold 8) - -* Split reference: first 62.6% of CXR for "train", 16% for "validation", -* 21.4% for "test" -* This split consists of the 4 labels "healthy", "latent TB", "sick & non-TB", -* and "active TB" -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.tbx11k_v3` for dataset details -""" - -from . import _maker - -dataset = _maker("fold_8") diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_v3_RS/fold_9.py b/src/ptbench/configs/datasets/tbx11k_simplified_v3_RS/fold_9.py deleted file mode 100644 index 2f9a45f01f31a9f1c87d8b70aa9e5859cee83b04..0000000000000000000000000000000000000000 --- a/src/ptbench/configs/datasets/tbx11k_simplified_v3_RS/fold_9.py +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""TBX11k simplified dataset for TB detection (cross validation fold 9) - -* Split reference: first 62.6% of CXR for "train", 16% for "validation", -* 21.4% for "test" -* This split consists of the 4 labels "healthy", "latent TB", "sick & non-TB", -* and "active TB" -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.tbx11k_v3` for dataset details -""" - -from . import _maker - -dataset = _maker("fold_9") diff --git a/src/ptbench/data/tbx11k_simplified_v3/__init__.py b/src/ptbench/data/tbx11k_simplified_v3/__init__.py deleted file mode 100644 index 7cf676b00d4570c8b2f6b42724f55eeb0eab2fc4..0000000000000000000000000000000000000000 --- a/src/ptbench/data/tbx11k_simplified_v3/__init__.py +++ /dev/null @@ -1,90 +0,0 @@ -# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""TBX11K simplified dataset for computer-aided diagnosis. - -The TBX11K database has been established to foster research -in computer-aided diagnosis of pulmonary diseases with a special -focus on tuberculosis (aTB). The dataset was specifically -designed to be used with CNNs. It contains 11,000 chest X-ray -images, each of a unique patient. They were labeled by expert -radiologists with 5 - 10+ years of experience. Possible labels -are: "healthy", "active TB", "latent TB", and "sick & non-tb". -The version of the dataset used in this benchmark is a simplified. - -* Reference: [TBX11K-SIMPLIFIED-2020]_ -* Original (released) resolution (height x width or width x height): 512 x 512 -* Split reference: none -* Protocol ``default``: - - * Training samples: 62.6% of CXR (including labels) - * Validation samples: 16% of CXR (including labels) - * Test samples: 21.4% of CXR (including labels) -""" - -import importlib.resources -import os - -from ...utils.rc import load_rc -from ..dataset import JSONDataset -from ..loader import load_pil_baw, make_delayed, make_delayed_bbox - -_protocols = [ - importlib.resources.files(__name__).joinpath("default.json.bz2"), - importlib.resources.files(__name__).joinpath("fold_0.json.bz2"), - importlib.resources.files(__name__).joinpath("fold_1.json.bz2"), - importlib.resources.files(__name__).joinpath("fold_2.json.bz2"), - importlib.resources.files(__name__).joinpath("fold_3.json.bz2"), - importlib.resources.files(__name__).joinpath("fold_4.json.bz2"), - importlib.resources.files(__name__).joinpath("fold_5.json.bz2"), - importlib.resources.files(__name__).joinpath("fold_6.json.bz2"), - importlib.resources.files(__name__).joinpath("fold_7.json.bz2"), - importlib.resources.files(__name__).joinpath("fold_8.json.bz2"), - importlib.resources.files(__name__).joinpath("fold_9.json.bz2"), -] - -_datadir = load_rc().get( - "datadir.tbx11k_simplified", os.path.realpath(os.curdir) -) - - -def _raw_data_loader(sample): - return dict( - data=load_pil_baw(os.path.join(_datadir, sample["data"])), # type: ignore - label=sample["label"], - ) - - -def _raw_data_loader_bbox(sample): - return dict( - data=load_pil_baw(os.path.join(_datadir, sample["data"])), # type: ignore - label=sample["label"], - bboxes=sample["bboxes"], - ) - - -def _loader(context, sample): - # "context" is ignored in this case - database is homogeneous - # we return delayed samples to avoid loading all images at once - return make_delayed(sample, _raw_data_loader) - - -def _loader_bbox(context, sample): - # "context" is ignored in this case - database is homogeneous - # we return delayed samples to avoid loading all images at once - return make_delayed_bbox(sample, _raw_data_loader_bbox) - - -dataset = JSONDataset( - protocols=_protocols, - fieldnames=("data", "label"), - loader=_loader, -) - -dataset_with_bboxes = JSONDataset( - protocols=_protocols, - fieldnames=("data", "label", "bboxes"), - loader=_loader_bbox, -) -"""TBX11K simplified dataset object.""" diff --git a/src/ptbench/data/tbx11k_simplified_v3/default.json.bz2 b/src/ptbench/data/tbx11k_simplified_v3/default.json.bz2 deleted file mode 100644 index 53b1c72281098faa727ce9b578210ae588e204c0..0000000000000000000000000000000000000000 Binary files a/src/ptbench/data/tbx11k_simplified_v3/default.json.bz2 and /dev/null differ diff --git a/src/ptbench/data/tbx11k_simplified_v3/fold_0.json.bz2 b/src/ptbench/data/tbx11k_simplified_v3/fold_0.json.bz2 deleted file mode 100644 index 881297d6973db88dc2d6d1e19eb0c29f9062ed8a..0000000000000000000000000000000000000000 Binary files a/src/ptbench/data/tbx11k_simplified_v3/fold_0.json.bz2 and /dev/null differ diff --git a/src/ptbench/data/tbx11k_simplified_v3/fold_1.json.bz2 b/src/ptbench/data/tbx11k_simplified_v3/fold_1.json.bz2 deleted file mode 100644 index d85439be416a48142310799e6bf32b286224dd6a..0000000000000000000000000000000000000000 Binary files a/src/ptbench/data/tbx11k_simplified_v3/fold_1.json.bz2 and /dev/null differ diff --git a/src/ptbench/data/tbx11k_simplified_v3/fold_2.json.bz2 b/src/ptbench/data/tbx11k_simplified_v3/fold_2.json.bz2 deleted file mode 100644 index fb8c89af52076546d2a4da4831eb5f650c93cf18..0000000000000000000000000000000000000000 Binary files a/src/ptbench/data/tbx11k_simplified_v3/fold_2.json.bz2 and /dev/null differ diff --git a/src/ptbench/data/tbx11k_simplified_v3/fold_3.json.bz2 b/src/ptbench/data/tbx11k_simplified_v3/fold_3.json.bz2 deleted file mode 100644 index 0a8cc88fd74d7847785ea4a74cd7ced8c72fa89f..0000000000000000000000000000000000000000 Binary files a/src/ptbench/data/tbx11k_simplified_v3/fold_3.json.bz2 and /dev/null differ diff --git a/src/ptbench/data/tbx11k_simplified_v3/fold_4.json.bz2 b/src/ptbench/data/tbx11k_simplified_v3/fold_4.json.bz2 deleted file mode 100644 index 1ab03dfa21c6d8fe60a910343c4544f98e723634..0000000000000000000000000000000000000000 Binary files a/src/ptbench/data/tbx11k_simplified_v3/fold_4.json.bz2 and /dev/null differ diff --git a/src/ptbench/data/tbx11k_simplified_v3/fold_5.json.bz2 b/src/ptbench/data/tbx11k_simplified_v3/fold_5.json.bz2 deleted file mode 100644 index 17467ae2146adc756ebe7843e234aea18c5b9b14..0000000000000000000000000000000000000000 Binary files a/src/ptbench/data/tbx11k_simplified_v3/fold_5.json.bz2 and /dev/null differ diff --git a/src/ptbench/data/tbx11k_simplified_v3/fold_6.json.bz2 b/src/ptbench/data/tbx11k_simplified_v3/fold_6.json.bz2 deleted file mode 100644 index 2359d5e1ee6f14491715842f81a244acd083f7da..0000000000000000000000000000000000000000 Binary files a/src/ptbench/data/tbx11k_simplified_v3/fold_6.json.bz2 and /dev/null differ diff --git a/src/ptbench/data/tbx11k_simplified_v3/fold_7.json.bz2 b/src/ptbench/data/tbx11k_simplified_v3/fold_7.json.bz2 deleted file mode 100644 index 4cd04d5fd74a65704de41da44b5e638fa609a7fb..0000000000000000000000000000000000000000 Binary files a/src/ptbench/data/tbx11k_simplified_v3/fold_7.json.bz2 and /dev/null differ diff --git a/src/ptbench/data/tbx11k_simplified_v3/fold_8.json.bz2 b/src/ptbench/data/tbx11k_simplified_v3/fold_8.json.bz2 deleted file mode 100644 index fd8e62f18bf68ef535dc8e13f1291f9bdd5ad800..0000000000000000000000000000000000000000 Binary files a/src/ptbench/data/tbx11k_simplified_v3/fold_8.json.bz2 and /dev/null differ diff --git a/src/ptbench/data/tbx11k_simplified_v3/fold_9.json.bz2 b/src/ptbench/data/tbx11k_simplified_v3/fold_9.json.bz2 deleted file mode 100644 index 7018d30fae5a4a50f7aca57961274c3e0ef30987..0000000000000000000000000000000000000000 Binary files a/src/ptbench/data/tbx11k_simplified_v3/fold_9.json.bz2 and /dev/null differ diff --git a/src/ptbench/data/tbx11k_simplified_v3_RS/__init__.py b/src/ptbench/data/tbx11k_simplified_v3_RS/__init__.py deleted file mode 100644 index f2bcf8c2a53cd117f556c5854774d5ea81fb2f6c..0000000000000000000000000000000000000000 --- a/src/ptbench/data/tbx11k_simplified_v3_RS/__init__.py +++ /dev/null @@ -1,63 +0,0 @@ -# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""Extended TBX11K simplified dataset for computer-aided diagnosis (extended -with DensenetRS predictions) - -The TBX11K database has been established to foster research -in computer-aided diagnosis of pulmonary diseases with a special -focus on tuberculosis (aTB). The dataset was specifically -designed to be used with CNNs. It contains 11,000 chest X-ray -images, each of a unique patient. They were labeled by expert -radiologists with 5 - 10+ years of experience. Possible labels -are: "healthy", "active TB", "latent TB", and "sick & non-tb". -The version of the dataset used in this benchmark is a simplified. - -* Reference: [TBX11K-SIMPLIFIED-2020]_ -* Original (released) resolution (height x width or width x height): 512 x 512 -* Split reference: none -* Protocol ``default``: - - * Training samples: 62.6% of CXR (including labels) - * Validation samples: 16% of CXR (including labels) - * Test samples: 21.4% of CXR (including labels) -""" - -import importlib.resources - -from ..dataset import JSONDataset -from ..loader import make_delayed - -_protocols = [ - importlib.resources.files(__name__).joinpath("default.json.bz2"), - importlib.resources.files(__name__).joinpath("fold_0.json.bz2"), - importlib.resources.files(__name__).joinpath("fold_1.json.bz2"), - importlib.resources.files(__name__).joinpath("fold_2.json.bz2"), - importlib.resources.files(__name__).joinpath("fold_3.json.bz2"), - importlib.resources.files(__name__).joinpath("fold_4.json.bz2"), - importlib.resources.files(__name__).joinpath("fold_5.json.bz2"), - importlib.resources.files(__name__).joinpath("fold_6.json.bz2"), - importlib.resources.files(__name__).joinpath("fold_7.json.bz2"), - importlib.resources.files(__name__).joinpath("fold_8.json.bz2"), - importlib.resources.files(__name__).joinpath("fold_9.json.bz2"), -] - - -def _raw_data_loader(sample): - return dict(data=sample["data"], label=sample["label"]) - - -def _loader(context, sample): - # "context" is ignored in this case - database is homogeneous - # we returned delayed samples to avoid loading all images at once - return make_delayed(sample, _raw_data_loader, key=sample["filename"]) - - -dataset = JSONDataset( - protocols=_protocols, - fieldnames=("filename", "label", "data"), - loader=_loader, -) - -"""Extended TBX11K simplified dataset object.""" diff --git a/src/ptbench/data/tbx11k_simplified_v3_RS/default.json.bz2 b/src/ptbench/data/tbx11k_simplified_v3_RS/default.json.bz2 deleted file mode 100644 index cc25eb3513cb826148474b6099c17e01c0337db9..0000000000000000000000000000000000000000 Binary files a/src/ptbench/data/tbx11k_simplified_v3_RS/default.json.bz2 and /dev/null differ diff --git a/src/ptbench/data/tbx11k_simplified_v3_RS/fold_0.json.bz2 b/src/ptbench/data/tbx11k_simplified_v3_RS/fold_0.json.bz2 deleted file mode 100644 index e3ed3611aba3587a53794658da76249462f11057..0000000000000000000000000000000000000000 Binary files a/src/ptbench/data/tbx11k_simplified_v3_RS/fold_0.json.bz2 and /dev/null differ diff --git a/src/ptbench/data/tbx11k_simplified_v3_RS/fold_1.json.bz2 b/src/ptbench/data/tbx11k_simplified_v3_RS/fold_1.json.bz2 deleted file mode 100644 index 66980d95ee4551812530722d3ae6d734f874c061..0000000000000000000000000000000000000000 Binary files a/src/ptbench/data/tbx11k_simplified_v3_RS/fold_1.json.bz2 and /dev/null differ diff --git a/src/ptbench/data/tbx11k_simplified_v3_RS/fold_2.json.bz2 b/src/ptbench/data/tbx11k_simplified_v3_RS/fold_2.json.bz2 deleted file mode 100644 index 423b892e1f47432b7c4e514815abc791be1613e0..0000000000000000000000000000000000000000 Binary files a/src/ptbench/data/tbx11k_simplified_v3_RS/fold_2.json.bz2 and /dev/null differ diff --git a/src/ptbench/data/tbx11k_simplified_v3_RS/fold_3.json.bz2 b/src/ptbench/data/tbx11k_simplified_v3_RS/fold_3.json.bz2 deleted file mode 100644 index f9ab24097cbcd4d81e3e329e0bb51f6876b1df87..0000000000000000000000000000000000000000 Binary files a/src/ptbench/data/tbx11k_simplified_v3_RS/fold_3.json.bz2 and /dev/null differ diff --git a/src/ptbench/data/tbx11k_simplified_v3_RS/fold_4.json.bz2 b/src/ptbench/data/tbx11k_simplified_v3_RS/fold_4.json.bz2 deleted file mode 100644 index bca9274bbd055bf821243c5739c08041b3013c25..0000000000000000000000000000000000000000 Binary files a/src/ptbench/data/tbx11k_simplified_v3_RS/fold_4.json.bz2 and /dev/null differ diff --git a/src/ptbench/data/tbx11k_simplified_v3_RS/fold_5.json.bz2 b/src/ptbench/data/tbx11k_simplified_v3_RS/fold_5.json.bz2 deleted file mode 100644 index c1ad523698e9f9d2fdc2d61028558a0196f23216..0000000000000000000000000000000000000000 Binary files a/src/ptbench/data/tbx11k_simplified_v3_RS/fold_5.json.bz2 and /dev/null differ diff --git a/src/ptbench/data/tbx11k_simplified_v3_RS/fold_6.json.bz2 b/src/ptbench/data/tbx11k_simplified_v3_RS/fold_6.json.bz2 deleted file mode 100644 index 518c12aa21e138241f5aa5a5d1829f2103a5a789..0000000000000000000000000000000000000000 Binary files a/src/ptbench/data/tbx11k_simplified_v3_RS/fold_6.json.bz2 and /dev/null differ diff --git a/src/ptbench/data/tbx11k_simplified_v3_RS/fold_7.json.bz2 b/src/ptbench/data/tbx11k_simplified_v3_RS/fold_7.json.bz2 deleted file mode 100644 index f9d53add2a73dfb115581763061d8f3675d5767d..0000000000000000000000000000000000000000 Binary files a/src/ptbench/data/tbx11k_simplified_v3_RS/fold_7.json.bz2 and /dev/null differ diff --git a/src/ptbench/data/tbx11k_simplified_v3_RS/fold_8.json.bz2 b/src/ptbench/data/tbx11k_simplified_v3_RS/fold_8.json.bz2 deleted file mode 100644 index 96667c33ee29d13ee056b4e0164a092b5d1b1149..0000000000000000000000000000000000000000 Binary files a/src/ptbench/data/tbx11k_simplified_v3_RS/fold_8.json.bz2 and /dev/null differ diff --git a/src/ptbench/data/tbx11k_simplified_v3_RS/fold_9.json.bz2 b/src/ptbench/data/tbx11k_simplified_v3_RS/fold_9.json.bz2 deleted file mode 100644 index 7d78901b8ec9e3efdf1d57bc9ca92d9d591de562..0000000000000000000000000000000000000000 Binary files a/src/ptbench/data/tbx11k_simplified_v3_RS/fold_9.json.bz2 and /dev/null differ diff --git a/tests/test_11k_v3.py b/tests/test_11k_v3.py deleted file mode 100644 index 848b48757d6dc745f83f03d994732ea4b11ca2b0..0000000000000000000000000000000000000000 --- a/tests/test_11k_v3.py +++ /dev/null @@ -1,270 +0,0 @@ -# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""Tests for TBX11K simplified dataset split 3.""" - -import pytest - - -def test_protocol_consistency(): - from ptbench.data.tbx11k_simplified_v3 import dataset - - # Default protocol - subset = dataset.subsets("default") - assert len(subset) == 3 - - assert "train" in subset - assert len(subset["train"]) == 5241 - for s in subset["train"]: - assert s.key.startswith("images/") - - assert "validation" in subset - assert len(subset["validation"]) == 1335 - for s in subset["validation"]: - assert s.key.startswith("images/") - - assert "test" in subset - assert len(subset["test"]) == 1793 - for s in subset["test"]: - assert s.key.startswith("images/") - - # Check labels - for s in subset["train"]: - assert s.label in [0.0, 1.0, 2.0, 3.0] - - for s in subset["validation"]: - assert s.label in [0.0, 1.0, 2.0, 3.0] - - for s in subset["test"]: - assert s.label in [0.0, 1.0, 2.0, 3.0] - - # Cross-validation fold 0-8 - for f in range(9): - subset = dataset.subsets("fold_" + str(f)) - assert len(subset) == 3 - - assert "train" in subset - assert len(subset["train"]) == 6003 - for s in subset["train"]: - assert s.key.startswith("images/") - - assert "validation" in subset - assert len(subset["validation"]) == 1529 - for s in subset["validation"]: - assert s.key.startswith("images/") - - assert "test" in subset - assert len(subset["test"]) == 837 - for s in subset["test"]: - assert s.key.startswith("images/") - - # Check labels - for s in subset["train"]: - assert s.label in [0.0, 1.0, 2.0, 3.0] - - for s in subset["validation"]: - assert s.label in [0.0, 1.0, 2.0, 3.0] - - for s in subset["test"]: - assert s.label in [0.0, 1.0, 2.0, 3.0] - - # Cross-validation fold 9 - subset = dataset.subsets("fold_9") - assert len(subset) == 3 - - assert "train" in subset - assert len(subset["train"]) == 6003 - for s in subset["train"]: - assert s.key.startswith("images/") - - assert "validation" in subset - assert len(subset["validation"]) == 1530 - for s in subset["validation"]: - assert s.key.startswith("images/") - - assert "test" in subset - assert len(subset["test"]) == 836 - for s in subset["test"]: - assert s.key.startswith("images/") - - # Check labels - for s in subset["train"]: - assert s.label in [0.0, 1.0, 2.0, 3.0] - - for s in subset["validation"]: - assert s.label in [0.0, 1.0, 2.0, 3.0] - - for s in subset["test"]: - assert s.label in [0.0, 1.0, 2.0, 3.0] - - -def test_protocol_consistency_bbox(): - from ptbench.data.tbx11k_simplified_v3 import dataset_with_bboxes - - # Default protocol - subset = dataset_with_bboxes.subsets("default") - assert len(subset) == 3 - - assert "train" in subset - assert len(subset["train"]) == 5241 - for s in subset["train"]: - assert s.key.startswith("images/") - - assert "validation" in subset - assert len(subset["validation"]) == 1335 - for s in subset["validation"]: - assert s.key.startswith("images/") - - assert "test" in subset - assert len(subset["test"]) == 1793 - for s in subset["test"]: - assert s.key.startswith("images/") - - # Check labels - for s in subset["train"]: - assert s.label in [0.0, 1.0, 2.0, 3.0] - - for s in subset["validation"]: - assert s.label in [0.0, 1.0, 2.0, 3.0] - - for s in subset["test"]: - assert s.label in [0.0, 1.0, 2.0, 3.0] - - # Check bounding boxes - for s in subset["train"]: - assert s.bboxes == "none" or s.bboxes[0].startswith("{'xmin':") - - # Cross-validation fold 0-8 - for f in range(9): - subset = dataset_with_bboxes.subsets("fold_" + str(f)) - assert len(subset) == 3 - - assert "train" in subset - assert len(subset["train"]) == 6003 - for s in subset["train"]: - assert s.key.startswith("images/") - - assert "validation" in subset - assert len(subset["validation"]) == 1529 - for s in subset["validation"]: - assert s.key.startswith("images/") - - assert "test" in subset - assert len(subset["test"]) == 837 - for s in subset["test"]: - assert s.key.startswith("images/") - - # Check labels - for s in subset["train"]: - assert s.label in [0.0, 1.0, 2.0, 3.0] - - for s in subset["validation"]: - assert s.label in [0.0, 1.0, 2.0, 3.0] - - for s in subset["test"]: - assert s.label in [0.0, 1.0, 2.0, 3.0] - - # Check bounding boxes - for s in subset["train"]: - assert s.bboxes == "none" or s.bboxes[0].startswith("{'xmin':") - - # Cross-validation fold 9 - subset = dataset_with_bboxes.subsets("fold_9") - assert len(subset) == 3 - - assert "train" in subset - assert len(subset["train"]) == 6003 - for s in subset["train"]: - assert s.key.startswith("images/") - - assert "validation" in subset - assert len(subset["validation"]) == 1530 - for s in subset["validation"]: - assert s.key.startswith("images/") - - assert "test" in subset - assert len(subset["test"]) == 836 - for s in subset["test"]: - assert s.key.startswith("images/") - - # Check labels - for s in subset["train"]: - assert s.label in [0.0, 1.0, 2.0, 3.0] - - for s in subset["validation"]: - assert s.label in [0.0, 1.0, 2.0, 3.0] - - for s in subset["test"]: - assert s.label in [0.0, 1.0, 2.0, 3.0] - - # Check bounding boxes - for s in subset["train"]: - assert s.bboxes == "none" or s.bboxes[0].startswith("{'xmin':") - - -@pytest.mark.skip_if_rc_var_not_set("datadir.tbx11k_simplified_v3") -def test_loading(): - from ptbench.data.tbx11k_simplified_v3 import dataset - - def _check_sample(s): - data = s.data - assert isinstance(data, dict) - assert len(data) == 2 - - assert "data" in data - assert data["data"].size == (512, 512) - - assert data["data"].mode == "L" # Check colors - - assert "label" in data - assert data["label"] in [0, 1, 2, 3] # Check labels - - limit = 30 # use this to limit testing to first images only, else None - - subset = dataset.subsets("default") - for s in subset["train"][:limit]: - _check_sample(s) - - -@pytest.mark.skip_if_rc_var_not_set("datadir.tbx11k_simplified_v3") -def test_loading_bbox(): - from ptbench.data.tbx11k_simplified_v3 import dataset_with_bboxes - - def _check_sample(s): - data = s.data - assert isinstance(data, dict) - assert len(data) == 3 - - assert "data" in data - assert data["data"].size == (512, 512) - - assert data["data"].mode == "L" # Check colors - - assert "label" in data - assert data["label"] in [0, 1, 2, 3] # Check labels - - assert "bboxes" in data - assert data["bboxes"] == "none" or data["bboxes"][0].startswith( - "{'xmin':" - ) - - limit = 30 # use this to limit testing to first images only, else None - - subset = dataset_with_bboxes.subsets("default") - for s in subset["train"][:limit]: - _check_sample(s) - - -@pytest.mark.skip_if_rc_var_not_set("datadir.tbx11k_simplified_v3") -def test_check(): - from ptbench.data.tbx11k_simplified_v3 import dataset - - assert dataset.check() == 0 - - -@pytest.mark.skip_if_rc_var_not_set("datadir.tbx11k_simplified_v3") -def test_check_bbox(): - from ptbench.data.tbx11k_simplified_v3 import dataset_with_bboxes - - assert dataset_with_bboxes.check() == 0 diff --git a/tests/test_11k_v3_RS.py b/tests/test_11k_v3_RS.py deleted file mode 100644 index 76a83499d6147a44df5e6deb2c1db0dde49554d6..0000000000000000000000000000000000000000 --- a/tests/test_11k_v3_RS.py +++ /dev/null @@ -1,117 +0,0 @@ -# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""Tests for Extended TBX11K simplified dataset split 3.""" - -import pytest - - -def test_protocol_consistency(): - from ptbench.data.tbx11k_simplified_v3_RS import dataset - - # Default protocol - subset = dataset.subsets("default") - assert len(subset) == 3 - - assert "train" in subset - assert len(subset["train"]) == 5241 - - assert "validation" in subset - assert len(subset["validation"]) == 1335 - - assert "test" in subset - assert len(subset["test"]) == 1793 - for s in subset["test"]: - assert s.key.startswith("images/") - - # Check labels - for s in subset["train"]: - assert s.label in [0.0, 1.0, 2.0, 3.0] - - for s in subset["validation"]: - assert s.label in [0.0, 1.0, 2.0, 3.0] - - for s in subset["test"]: - assert s.label in [0.0, 1.0, 2.0, 3.0] - - # Cross-validation fold 0-8 - for f in range(9): - subset = dataset.subsets("fold_" + str(f)) - assert len(subset) == 3 - - assert "train" in subset - assert len(subset["train"]) == 6003 - for s in subset["train"]: - assert s.key.startswith("images/") - - assert "validation" in subset - assert len(subset["validation"]) == 1529 - for s in subset["validation"]: - assert s.key.startswith("images/") - - assert "test" in subset - assert len(subset["test"]) == 837 - for s in subset["test"]: - assert s.key.startswith("images/") - - # Check labels - for s in subset["train"]: - assert s.label in [0.0, 1.0, 2.0, 3.0] - - for s in subset["validation"]: - assert s.label in [0.0, 1.0, 2.0, 3.0] - - for s in subset["test"]: - assert s.label in [0.0, 1.0, 2.0, 3.0] - - # Cross-validation fold 9 - subset = dataset.subsets("fold_9") - assert len(subset) == 3 - - assert "train" in subset - assert len(subset["train"]) == 6003 - for s in subset["train"]: - assert s.key.startswith("images/") - - assert "validation" in subset - assert len(subset["validation"]) == 1530 - for s in subset["validation"]: - assert s.key.startswith("images/") - - assert "test" in subset - assert len(subset["test"]) == 836 - for s in subset["test"]: - assert s.key.startswith("images/") - - # Check labels - for s in subset["train"]: - assert s.label in [0.0, 1.0, 2.0, 3.0] - - for s in subset["validation"]: - assert s.label in [0.0, 1.0, 2.0, 3.0] - - for s in subset["test"]: - assert s.label in [0.0, 1.0, 2.0, 3.0] - - -@pytest.mark.skip_if_rc_var_not_set("datadir.tbx11k_simplified") -def test_loading(): - from ptbench.data.tbx11k_simplified_v3_RS import dataset - - def _check_sample(s): - data = s.data - assert isinstance(data, dict) - assert len(data) == 2 - - assert "data" in data - assert len(data["data"]) == 14 # Check radiological signs - - assert "label" in data - assert data["label"] in [0, 1, 2, 3] # Check labels - - limit = 30 # use this to limit testing to first images only, else None - - subset = dataset.subsets("default") - for s in subset["train"][:limit]: - _check_sample(s)