diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_RS/default.py b/src/ptbench/configs/datasets/tbx11k_simplified_RS/default.py index 6aa1b81b16f4049b3998f434ee084eef2d11704b..b18326d9d61dfbb9898bc5c27a5acb7b7b182063 100644 --- a/src/ptbench/configs/datasets/tbx11k_simplified_RS/default.py +++ b/src/ptbench/configs/datasets/tbx11k_simplified_RS/default.py @@ -2,7 +2,8 @@ # # SPDX-License-Identifier: GPL-3.0-or-later -"""TBX11k simplified dataset for TB detection (default protocol) +"""TBX11k simplified dataset for TB detection (default protocol) (extended with +DensenetRS predictions) * Split reference: first 62.5% of TB and healthy CXR for "train" 15.9% for * "validation", 21.6% for "test" diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_RS/fold_0.py b/src/ptbench/configs/datasets/tbx11k_simplified_RS/fold_0.py index 10c7f549de385770c6585612ebd2b2bafb9310ee..90fc01324ebb5ee500cb511c6a2f4eef6fd79a1a 100644 --- a/src/ptbench/configs/datasets/tbx11k_simplified_RS/fold_0.py +++ b/src/ptbench/configs/datasets/tbx11k_simplified_RS/fold_0.py @@ -2,7 +2,7 @@ # # SPDX-License-Identifier: GPL-3.0-or-later -"""TBX11k simplified dataset for TB detection (default protocol) +"""TBX11k simplified dataset for TB detection (cross validation fold 0) * Split reference: first 62.5% of TB and healthy CXR for "train" 15.9% for * "validation", 21.6% for "test" diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_RS/fold_1.py b/src/ptbench/configs/datasets/tbx11k_simplified_RS/fold_1.py index ec8d22e871a1678553d37392a6cd4193d084130d..3c76739126d646cb55bce31ababe5edf5c20def0 100644 --- a/src/ptbench/configs/datasets/tbx11k_simplified_RS/fold_1.py +++ b/src/ptbench/configs/datasets/tbx11k_simplified_RS/fold_1.py @@ -2,7 +2,7 @@ # # SPDX-License-Identifier: GPL-3.0-or-later -"""TBX11k simplified dataset for TB detection (default protocol) +"""TBX11k simplified dataset for TB detection (cross validation fold 1) * Split reference: first 62.5% of TB and healthy CXR for "train" 15.9% for * "validation", 21.6% for "test" diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_RS/fold_2.py b/src/ptbench/configs/datasets/tbx11k_simplified_RS/fold_2.py index 77493339ddc90f708f0dbc7de00dbb88f3bd9ccf..1168b8acebefc27832a9cd20d3e2cab15d30ca75 100644 --- a/src/ptbench/configs/datasets/tbx11k_simplified_RS/fold_2.py +++ b/src/ptbench/configs/datasets/tbx11k_simplified_RS/fold_2.py @@ -2,7 +2,7 @@ # # SPDX-License-Identifier: GPL-3.0-or-later -"""TBX11k simplified dataset for TB detection (default protocol) +"""TBX11k simplified dataset for TB detection (cross validation fold 2) * Split reference: first 62.5% of TB and healthy CXR for "train" 15.9% for * "validation", 21.6% for "test" diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_RS/fold_3.py b/src/ptbench/configs/datasets/tbx11k_simplified_RS/fold_3.py index 4906ed7bf1310f3333bd64899c5645554238e3d9..7388e88fd7ee3ba969d2a46e27a3ce32076f1647 100644 --- a/src/ptbench/configs/datasets/tbx11k_simplified_RS/fold_3.py +++ b/src/ptbench/configs/datasets/tbx11k_simplified_RS/fold_3.py @@ -2,7 +2,7 @@ # # SPDX-License-Identifier: GPL-3.0-or-later -"""TBX11k simplified dataset for TB detection (default protocol) +"""TBX11k simplified dataset for TB detection (cross validation fold 3) * Split reference: first 62.5% of TB and healthy CXR for "train" 15.9% for * "validation", 21.6% for "test" diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_RS/fold_4.py b/src/ptbench/configs/datasets/tbx11k_simplified_RS/fold_4.py index 5179e359319e9face3b4e8b32eb8d5a86eb619b0..2da0d69a4c1c25e1a40b1e638b06e16068d68688 100644 --- a/src/ptbench/configs/datasets/tbx11k_simplified_RS/fold_4.py +++ b/src/ptbench/configs/datasets/tbx11k_simplified_RS/fold_4.py @@ -2,7 +2,7 @@ # # SPDX-License-Identifier: GPL-3.0-or-later -"""TBX11k simplified dataset for TB detection (default protocol) +"""TBX11k simplified dataset for TB detection (cross validation fold 4) * Split reference: first 62.5% of TB and healthy CXR for "train" 15.9% for * "validation", 21.6% for "test" diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_RS/fold_5.py b/src/ptbench/configs/datasets/tbx11k_simplified_RS/fold_5.py index 4ec01f68557b0cd016f9f6f5780c1fbb8db86a7b..c50320b5b6628a29476b683e7c143071028281cf 100644 --- a/src/ptbench/configs/datasets/tbx11k_simplified_RS/fold_5.py +++ b/src/ptbench/configs/datasets/tbx11k_simplified_RS/fold_5.py @@ -2,7 +2,7 @@ # # SPDX-License-Identifier: GPL-3.0-or-later -"""TBX11k simplified dataset for TB detection (default protocol) +"""TBX11k simplified dataset for TB detection (cross validation fold 5) * Split reference: first 62.5% of TB and healthy CXR for "train" 15.9% for * "validation", 21.6% for "test" diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_RS/fold_6.py b/src/ptbench/configs/datasets/tbx11k_simplified_RS/fold_6.py index f88776c9d0fb877e48cb555984bd2724a68af3ac..8d3a1cea5a0507361f72cd4361408027444e5848 100644 --- a/src/ptbench/configs/datasets/tbx11k_simplified_RS/fold_6.py +++ b/src/ptbench/configs/datasets/tbx11k_simplified_RS/fold_6.py @@ -2,7 +2,7 @@ # # SPDX-License-Identifier: GPL-3.0-or-later -"""TBX11k simplified dataset for TB detection (default protocol) +"""TBX11k simplified dataset for TB detection (cross validation fold 6) * Split reference: first 62.5% of TB and healthy CXR for "train" 15.9% for * "validation", 21.6% for "test" diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_RS/fold_7.py b/src/ptbench/configs/datasets/tbx11k_simplified_RS/fold_7.py index 756dc9c877ec3f14950b5406c7ecaa7036b8d9c0..5b1d08fc038899f0d78de6f0fc6dfac0d1c169b8 100644 --- a/src/ptbench/configs/datasets/tbx11k_simplified_RS/fold_7.py +++ b/src/ptbench/configs/datasets/tbx11k_simplified_RS/fold_7.py @@ -2,7 +2,7 @@ # # SPDX-License-Identifier: GPL-3.0-or-later -"""TBX11k simplified dataset for TB detection (default protocol) +"""TBX11k simplified dataset for TB detection (cross validation fold 7) * Split reference: first 62.5% of TB and healthy CXR for "train" 15.9% for * "validation", 21.6% for "test" diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_RS/fold_8.py b/src/ptbench/configs/datasets/tbx11k_simplified_RS/fold_8.py index 1b267cd5dfe004bf11217a0ac61489b9de8f720d..ccf8f5829a24740c2e4b2a7f08dfbbf29ff863a0 100644 --- a/src/ptbench/configs/datasets/tbx11k_simplified_RS/fold_8.py +++ b/src/ptbench/configs/datasets/tbx11k_simplified_RS/fold_8.py @@ -2,7 +2,7 @@ # # SPDX-License-Identifier: GPL-3.0-or-later -"""TBX11k simplified dataset for TB detection (default protocol) +"""TBX11k simplified dataset for TB detection (cross validation fold 8) * Split reference: first 62.5% of TB and healthy CXR for "train" 15.9% for * "validation", 21.6% for "test" diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_RS/fold_9.py b/src/ptbench/configs/datasets/tbx11k_simplified_RS/fold_9.py index 74696cf6f8fc31d0bfcbc3c4e63bcf51571cdb2f..b2da70361bc7187e6686ad9467e1fb74340c415b 100644 --- a/src/ptbench/configs/datasets/tbx11k_simplified_RS/fold_9.py +++ b/src/ptbench/configs/datasets/tbx11k_simplified_RS/fold_9.py @@ -2,7 +2,7 @@ # # SPDX-License-Identifier: GPL-3.0-or-later -"""TBX11k simplified dataset for TB detection (default protocol) +"""TBX11k simplified dataset for TB detection (cross validation fold 9) * Split reference: first 62.5% of TB and healthy CXR for "train" 15.9% for * "validation", 21.6% for "test" diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_RS_v2/__init__.py b/src/ptbench/configs/datasets/tbx11k_simplified_RS_v2/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..c7cb9fb8c67ad9b3376d4c33d6dc8c77f09a1fdd --- /dev/null +++ b/src/ptbench/configs/datasets/tbx11k_simplified_RS_v2/__init__.py @@ -0,0 +1,10 @@ +# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> +# +# SPDX-License-Identifier: GPL-3.0-or-later + + +def _maker(protocol): + from ....data.tbx11k_simplified_RS_v2 import dataset as raw + from .. import make_dataset as mk + + return mk([raw.subsets(protocol)]) diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_RS_v2/default.py b/src/ptbench/configs/datasets/tbx11k_simplified_RS_v2/default.py new file mode 100644 index 0000000000000000000000000000000000000000..cae4cc852f720b87af3b5227286245d1dd7a9a72 --- /dev/null +++ b/src/ptbench/configs/datasets/tbx11k_simplified_RS_v2/default.py @@ -0,0 +1,18 @@ +# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> +# +# SPDX-License-Identifier: GPL-3.0-or-later + +"""TBX11k simplified dataset for TB detection (default protocol) (extended with +DensenetRS predictions) + +* Split reference: first 62.6% of CXR for "train", 16% for "validation", +* 21.4% for "test" +* This split consists of non-TB and active TB samples +* "healthy", "latent TB", and "sick & non-TB" samples are all merged under the label "non-TB" +* This configuration resolution: 512 x 512 (default) +* See :py:mod:`ptbench.data.tbx11k_v2` for dataset details +""" + +from . import _maker + +dataset = _maker("default") diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_RS_v2/fold_0.py b/src/ptbench/configs/datasets/tbx11k_simplified_RS_v2/fold_0.py new file mode 100644 index 0000000000000000000000000000000000000000..99cb5aeb3cdece50dce136fc0a41c4322ee08ba2 --- /dev/null +++ b/src/ptbench/configs/datasets/tbx11k_simplified_RS_v2/fold_0.py @@ -0,0 +1,17 @@ +# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> +# +# SPDX-License-Identifier: GPL-3.0-or-later + +"""TBX11k simplified dataset for TB detection (cross validation fold 0) + +* Split reference: first 62.6% of CXR for "train", 16% for "validation", +* 21.4% for "test" +* This split consists of non-TB and active TB samples +* "healthy", "latent TB", and "sick & non-TB" samples are all merged under the label "non-TB" +* This configuration resolution: 512 x 512 (default) +* See :py:mod:`ptbench.data.tbx11k_v2` for dataset details +""" + +from . import _maker + +dataset = _maker("fold_0") diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_RS_v2/fold_1.py b/src/ptbench/configs/datasets/tbx11k_simplified_RS_v2/fold_1.py new file mode 100644 index 0000000000000000000000000000000000000000..dac2d72251980720aabaf313318cd944afdf0f3d --- /dev/null +++ b/src/ptbench/configs/datasets/tbx11k_simplified_RS_v2/fold_1.py @@ -0,0 +1,17 @@ +# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> +# +# SPDX-License-Identifier: GPL-3.0-or-later + +"""TBX11k simplified dataset for TB detection (cross validation fold 1) + +* Split reference: first 62.6% of CXR for "train", 16% for "validation", +* 21.4% for "test" +* This split consists of non-TB and active TB samples +* "healthy", "latent TB", and "sick & non-TB" samples are all merged under the label "non-TB" +* This configuration resolution: 512 x 512 (default) +* See :py:mod:`ptbench.data.tbx11k_v2` for dataset details +""" + +from . import _maker + +dataset = _maker("fold_1") diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_RS_v2/fold_2.py b/src/ptbench/configs/datasets/tbx11k_simplified_RS_v2/fold_2.py new file mode 100644 index 0000000000000000000000000000000000000000..d702c569f0cc88f2447d0180c2c040361cbb3334 --- /dev/null +++ b/src/ptbench/configs/datasets/tbx11k_simplified_RS_v2/fold_2.py @@ -0,0 +1,17 @@ +# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> +# +# SPDX-License-Identifier: GPL-3.0-or-later + +"""TBX11k simplified dataset for TB detection (cross validation fold 2) + +* Split reference: first 62.6% of CXR for "train", 16% for "validation", +* 21.4% for "test" +* This split consists of non-TB and active TB samples +* "healthy", "latent TB", and "sick & non-TB" samples are all merged under the label "non-TB" +* This configuration resolution: 512 x 512 (default) +* See :py:mod:`ptbench.data.tbx11k_v2` for dataset details +""" + +from . import _maker + +dataset = _maker("fold_2") diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_RS_v2/fold_3.py b/src/ptbench/configs/datasets/tbx11k_simplified_RS_v2/fold_3.py new file mode 100644 index 0000000000000000000000000000000000000000..5ea7185e53540c5cad781972051088691b815632 --- /dev/null +++ b/src/ptbench/configs/datasets/tbx11k_simplified_RS_v2/fold_3.py @@ -0,0 +1,17 @@ +# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> +# +# SPDX-License-Identifier: GPL-3.0-or-later + +"""TBX11k simplified dataset for TB detection (cross validation fold 3) + +* Split reference: first 62.6% of CXR for "train", 16% for "validation", +* 21.4% for "test" +* This split consists of non-TB and active TB samples +* "healthy", "latent TB", and "sick & non-TB" samples are all merged under the label "non-TB" +* This configuration resolution: 512 x 512 (default) +* See :py:mod:`ptbench.data.tbx11k_v2` for dataset details +""" + +from . import _maker + +dataset = _maker("fold_3") diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_RS_v2/fold_4.py b/src/ptbench/configs/datasets/tbx11k_simplified_RS_v2/fold_4.py new file mode 100644 index 0000000000000000000000000000000000000000..8386d9e4ddfd3f6499ec44d277a983588dfb3108 --- /dev/null +++ b/src/ptbench/configs/datasets/tbx11k_simplified_RS_v2/fold_4.py @@ -0,0 +1,17 @@ +# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> +# +# SPDX-License-Identifier: GPL-3.0-or-later + +"""TBX11k simplified dataset for TB detection (cross validation fold 4) + +* Split reference: first 62.6% of CXR for "train", 16% for "validation", +* 21.4% for "test" +* This split consists of non-TB and active TB samples +* "healthy", "latent TB", and "sick & non-TB" samples are all merged under the label "non-TB" +* This configuration resolution: 512 x 512 (default) +* See :py:mod:`ptbench.data.tbx11k_v2` for dataset details +""" + +from . import _maker + +dataset = _maker("fold_4") diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_RS_v2/fold_5.py b/src/ptbench/configs/datasets/tbx11k_simplified_RS_v2/fold_5.py new file mode 100644 index 0000000000000000000000000000000000000000..e10f550f7addb5e3b6dd0dd2e8fba84f602d3877 --- /dev/null +++ b/src/ptbench/configs/datasets/tbx11k_simplified_RS_v2/fold_5.py @@ -0,0 +1,17 @@ +# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> +# +# SPDX-License-Identifier: GPL-3.0-or-later + +"""TBX11k simplified dataset for TB detection (cross validation fold 5) + +* Split reference: first 62.6% of CXR for "train", 16% for "validation", +* 21.4% for "test" +* This split consists of non-TB and active TB samples +* "healthy", "latent TB", and "sick & non-TB" samples are all merged under the label "non-TB" +* This configuration resolution: 512 x 512 (default) +* See :py:mod:`ptbench.data.tbx11k_v2` for dataset details +""" + +from . import _maker + +dataset = _maker("fold_5") diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_RS_v2/fold_6.py b/src/ptbench/configs/datasets/tbx11k_simplified_RS_v2/fold_6.py new file mode 100644 index 0000000000000000000000000000000000000000..4e23e586c953bbee55ba08799a841e0e5b65a9c0 --- /dev/null +++ b/src/ptbench/configs/datasets/tbx11k_simplified_RS_v2/fold_6.py @@ -0,0 +1,17 @@ +# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> +# +# SPDX-License-Identifier: GPL-3.0-or-later + +"""TBX11k simplified dataset for TB detection (cross validation fold 6) + +* Split reference: first 62.6% of CXR for "train", 16% for "validation", +* 21.4% for "test" +* This split consists of non-TB and active TB samples +* "healthy", "latent TB", and "sick & non-TB" samples are all merged under the label "non-TB" +* This configuration resolution: 512 x 512 (default) +* See :py:mod:`ptbench.data.tbx11k_v2` for dataset details +""" + +from . import _maker + +dataset = _maker("fold_6") diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_RS_v2/fold_7.py b/src/ptbench/configs/datasets/tbx11k_simplified_RS_v2/fold_7.py new file mode 100644 index 0000000000000000000000000000000000000000..8999a16e1f17202d539e38b6f276224b9e4e0129 --- /dev/null +++ b/src/ptbench/configs/datasets/tbx11k_simplified_RS_v2/fold_7.py @@ -0,0 +1,17 @@ +# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> +# +# SPDX-License-Identifier: GPL-3.0-or-later + +"""TBX11k simplified dataset for TB detection (cross validation fold 7) + +* Split reference: first 62.6% of CXR for "train", 16% for "validation", +* 21.4% for "test" +* This split consists of non-TB and active TB samples +* "healthy", "latent TB", and "sick & non-TB" samples are all merged under the label "non-TB" +* This configuration resolution: 512 x 512 (default) +* See :py:mod:`ptbench.data.tbx11k_v2` for dataset details +""" + +from . import _maker + +dataset = _maker("fold_7") diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_RS_v2/fold_8.py b/src/ptbench/configs/datasets/tbx11k_simplified_RS_v2/fold_8.py new file mode 100644 index 0000000000000000000000000000000000000000..2bdb4201077a65aaa95dc3ced7bbb301f199c642 --- /dev/null +++ b/src/ptbench/configs/datasets/tbx11k_simplified_RS_v2/fold_8.py @@ -0,0 +1,17 @@ +# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> +# +# SPDX-License-Identifier: GPL-3.0-or-later + +"""TBX11k simplified dataset for TB detection (cross validation fold 8) + +* Split reference: first 62.6% of CXR for "train", 16% for "validation", +* 21.4% for "test" +* This split consists of non-TB and active TB samples +* "healthy", "latent TB", and "sick & non-TB" samples are all merged under the label "non-TB" +* This configuration resolution: 512 x 512 (default) +* See :py:mod:`ptbench.data.tbx11k_v2` for dataset details +""" + +from . import _maker + +dataset = _maker("fold_8") diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_RS_v2/fold_9.py b/src/ptbench/configs/datasets/tbx11k_simplified_RS_v2/fold_9.py new file mode 100644 index 0000000000000000000000000000000000000000..599998512441d19c1300aedbf73f504457df347c --- /dev/null +++ b/src/ptbench/configs/datasets/tbx11k_simplified_RS_v2/fold_9.py @@ -0,0 +1,17 @@ +# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> +# +# SPDX-License-Identifier: GPL-3.0-or-later + +"""TBX11k simplified dataset for TB detection (cross validation fold 9) + +* Split reference: first 62.6% of CXR for "train", 16% for "validation", +* 21.4% for "test" +* This split consists of non-TB and active TB samples +* "healthy", "latent TB", and "sick & non-TB" samples are all merged under the label "non-TB" +* This configuration resolution: 512 x 512 (default) +* See :py:mod:`ptbench.data.tbx11k_v2` for dataset details +""" + +from . import _maker + +dataset = _maker("fold_9") diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_RS_v3/__init__.py b/src/ptbench/configs/datasets/tbx11k_simplified_RS_v3/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a21cf7f43ee8d328a2d1dd615a18ad6842c8ea23 --- /dev/null +++ b/src/ptbench/configs/datasets/tbx11k_simplified_RS_v3/__init__.py @@ -0,0 +1,10 @@ +# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> +# +# SPDX-License-Identifier: GPL-3.0-or-later + + +def _maker(protocol): + from ....data.tbx11k_simplified_RS_v3 import dataset as raw + from .. import make_dataset as mk + + return mk([raw.subsets(protocol)]) diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_RS_v3/default.py b/src/ptbench/configs/datasets/tbx11k_simplified_RS_v3/default.py new file mode 100644 index 0000000000000000000000000000000000000000..088f9186c589017e1dcf6305cb50aa350eacc399 --- /dev/null +++ b/src/ptbench/configs/datasets/tbx11k_simplified_RS_v3/default.py @@ -0,0 +1,18 @@ +# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> +# +# SPDX-License-Identifier: GPL-3.0-or-later + +"""TBX11k simplified dataset for TB detection (default protocol) (extended with +DensenetRS predictions) + +* Split reference: first 62.6% of CXR for "train", 16% for "validation", +* 21.4% for "test" +* This split consists of the 4 labels "healthy", "latent TB", "sick & non-TB", +* and "active TB" +* This configuration resolution: 512 x 512 (default) +* See :py:mod:`ptbench.data.tbx11k_v3` for dataset details +""" + +from . import _maker + +dataset = _maker("default") diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_RS_v3/fold_0.py b/src/ptbench/configs/datasets/tbx11k_simplified_RS_v3/fold_0.py new file mode 100644 index 0000000000000000000000000000000000000000..20c7316f0a8a71864e10f9c5bcc99bb2eab32795 --- /dev/null +++ b/src/ptbench/configs/datasets/tbx11k_simplified_RS_v3/fold_0.py @@ -0,0 +1,17 @@ +# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> +# +# SPDX-License-Identifier: GPL-3.0-or-later + +"""TBX11k simplified dataset for TB detection (cross validation fold 0) + +* Split reference: first 62.6% of CXR for "train", 16% for "validation", +* 21.4% for "test" +* This split consists of the 4 labels "healthy", "latent TB", "sick & non-TB", +* and "active TB" +* This configuration resolution: 512 x 512 (default) +* See :py:mod:`ptbench.data.tbx11k_v3` for dataset details +""" + +from . import _maker + +dataset = _maker("fold_0") diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_RS_v3/fold_1.py b/src/ptbench/configs/datasets/tbx11k_simplified_RS_v3/fold_1.py new file mode 100644 index 0000000000000000000000000000000000000000..4d3c1961cb323f47d8422fb2b1b1183140208022 --- /dev/null +++ b/src/ptbench/configs/datasets/tbx11k_simplified_RS_v3/fold_1.py @@ -0,0 +1,17 @@ +# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> +# +# SPDX-License-Identifier: GPL-3.0-or-later + +"""TBX11k simplified dataset for TB detection (cross validation fold 1) + +* Split reference: first 62.6% of CXR for "train", 16% for "validation", +* 21.4% for "test" +* This split consists of the 4 labels "healthy", "latent TB", "sick & non-TB", +* and "active TB" +* This configuration resolution: 512 x 512 (default) +* See :py:mod:`ptbench.data.tbx11k_v3` for dataset details +""" + +from . import _maker + +dataset = _maker("fold_1") diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_RS_v3/fold_2.py b/src/ptbench/configs/datasets/tbx11k_simplified_RS_v3/fold_2.py new file mode 100644 index 0000000000000000000000000000000000000000..bc8088a01469bca888bc3b3a4ec5a94a42e33851 --- /dev/null +++ b/src/ptbench/configs/datasets/tbx11k_simplified_RS_v3/fold_2.py @@ -0,0 +1,17 @@ +# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> +# +# SPDX-License-Identifier: GPL-3.0-or-later + +"""TBX11k simplified dataset for TB detection (cross validation fold 2) + +* Split reference: first 62.6% of CXR for "train", 16% for "validation", +* 21.4% for "test" +* This split consists of the 4 labels "healthy", "latent TB", "sick & non-TB", +* and "active TB" +* This configuration resolution: 512 x 512 (default) +* See :py:mod:`ptbench.data.tbx11k_v3` for dataset details +""" + +from . import _maker + +dataset = _maker("fold_2") diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_RS_v3/fold_3.py b/src/ptbench/configs/datasets/tbx11k_simplified_RS_v3/fold_3.py new file mode 100644 index 0000000000000000000000000000000000000000..3fba1cda757a7d84f16b9b6b69c072fa59fef2b6 --- /dev/null +++ b/src/ptbench/configs/datasets/tbx11k_simplified_RS_v3/fold_3.py @@ -0,0 +1,17 @@ +# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> +# +# SPDX-License-Identifier: GPL-3.0-or-later + +"""TBX11k simplified dataset for TB detection (cross validation fold 3) + +* Split reference: first 62.6% of CXR for "train", 16% for "validation", +* 21.4% for "test" +* This split consists of the 4 labels "healthy", "latent TB", "sick & non-TB", +* and "active TB" +* This configuration resolution: 512 x 512 (default) +* See :py:mod:`ptbench.data.tbx11k_v3` for dataset details +""" + +from . import _maker + +dataset = _maker("fold_3") diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_RS_v3/fold_4.py b/src/ptbench/configs/datasets/tbx11k_simplified_RS_v3/fold_4.py new file mode 100644 index 0000000000000000000000000000000000000000..d5917fefec6682c5d10066fa9a75fef9afd6a899 --- /dev/null +++ b/src/ptbench/configs/datasets/tbx11k_simplified_RS_v3/fold_4.py @@ -0,0 +1,17 @@ +# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> +# +# SPDX-License-Identifier: GPL-3.0-or-later + +"""TBX11k simplified dataset for TB detection (cross validation fold 4) + +* Split reference: first 62.6% of CXR for "train", 16% for "validation", +* 21.4% for "test" +* This split consists of the 4 labels "healthy", "latent TB", "sick & non-TB", +* and "active TB" +* This configuration resolution: 512 x 512 (default) +* See :py:mod:`ptbench.data.tbx11k_v3` for dataset details +""" + +from . import _maker + +dataset = _maker("fold_4") diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_RS_v3/fold_5.py b/src/ptbench/configs/datasets/tbx11k_simplified_RS_v3/fold_5.py new file mode 100644 index 0000000000000000000000000000000000000000..2cf02786cbc0d4efc2fed4bd01d23d747d5fb33e --- /dev/null +++ b/src/ptbench/configs/datasets/tbx11k_simplified_RS_v3/fold_5.py @@ -0,0 +1,17 @@ +# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> +# +# SPDX-License-Identifier: GPL-3.0-or-later + +"""TBX11k simplified dataset for TB detection (cross validation fold 5) + +* Split reference: first 62.6% of CXR for "train", 16% for "validation", +* 21.4% for "test" +* This split consists of the 4 labels "healthy", "latent TB", "sick & non-TB", +* and "active TB" +* This configuration resolution: 512 x 512 (default) +* See :py:mod:`ptbench.data.tbx11k_v3` for dataset details +""" + +from . import _maker + +dataset = _maker("fold_5") diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_RS_v3/fold_6.py b/src/ptbench/configs/datasets/tbx11k_simplified_RS_v3/fold_6.py new file mode 100644 index 0000000000000000000000000000000000000000..c11e5b2f1b677145512fd6e13262018252d1420d --- /dev/null +++ b/src/ptbench/configs/datasets/tbx11k_simplified_RS_v3/fold_6.py @@ -0,0 +1,17 @@ +# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> +# +# SPDX-License-Identifier: GPL-3.0-or-later + +"""TBX11k simplified dataset for TB detection (cross validation fold 6) + +* Split reference: first 62.6% of CXR for "train", 16% for "validation", +* 21.4% for "test" +* This split consists of the 4 labels "healthy", "latent TB", "sick & non-TB", +* and "active TB" +* This configuration resolution: 512 x 512 (default) +* See :py:mod:`ptbench.data.tbx11k_v3` for dataset details +""" + +from . import _maker + +dataset = _maker("fold_6") diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_RS_v3/fold_7.py b/src/ptbench/configs/datasets/tbx11k_simplified_RS_v3/fold_7.py new file mode 100644 index 0000000000000000000000000000000000000000..d9cbb56661ac46c1b54dbd980bebbf922abbfd61 --- /dev/null +++ b/src/ptbench/configs/datasets/tbx11k_simplified_RS_v3/fold_7.py @@ -0,0 +1,17 @@ +# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> +# +# SPDX-License-Identifier: GPL-3.0-or-later + +"""TBX11k simplified dataset for TB detection (cross validation fold 7) + +* Split reference: first 62.6% of CXR for "train", 16% for "validation", +* 21.4% for "test" +* This split consists of the 4 labels "healthy", "latent TB", "sick & non-TB", +* and "active TB" +* This configuration resolution: 512 x 512 (default) +* See :py:mod:`ptbench.data.tbx11k_v3` for dataset details +""" + +from . import _maker + +dataset = _maker("fold_7") diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_RS_v3/fold_8.py b/src/ptbench/configs/datasets/tbx11k_simplified_RS_v3/fold_8.py new file mode 100644 index 0000000000000000000000000000000000000000..5592912a4a8fbb96498d984e0640b32e198332dd --- /dev/null +++ b/src/ptbench/configs/datasets/tbx11k_simplified_RS_v3/fold_8.py @@ -0,0 +1,17 @@ +# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> +# +# SPDX-License-Identifier: GPL-3.0-or-later + +"""TBX11k simplified dataset for TB detection (cross validation fold 8) + +* Split reference: first 62.6% of CXR for "train", 16% for "validation", +* 21.4% for "test" +* This split consists of the 4 labels "healthy", "latent TB", "sick & non-TB", +* and "active TB" +* This configuration resolution: 512 x 512 (default) +* See :py:mod:`ptbench.data.tbx11k_v3` for dataset details +""" + +from . import _maker + +dataset = _maker("fold_8") diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_RS_v3/fold_9.py b/src/ptbench/configs/datasets/tbx11k_simplified_RS_v3/fold_9.py new file mode 100644 index 0000000000000000000000000000000000000000..79410450c17a194d70572ddb2c72590ae91b536e --- /dev/null +++ b/src/ptbench/configs/datasets/tbx11k_simplified_RS_v3/fold_9.py @@ -0,0 +1,17 @@ +# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> +# +# SPDX-License-Identifier: GPL-3.0-or-later + +"""TBX11k simplified dataset for TB detection (cross validation fold 9) + +* Split reference: first 62.6% of CXR for "train", 16% for "validation", +* 21.4% for "test" +* This split consists of the 4 labels "healthy", "latent TB", "sick & non-TB", +* and "active TB" +* This configuration resolution: 512 x 512 (default) +* See :py:mod:`ptbench.data.tbx11k_v3` for dataset details +""" + +from . import _maker + +dataset = _maker("fold_9") diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_v2/__init__.py b/src/ptbench/configs/datasets/tbx11k_simplified_v2/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a1e6f50ca64a471417fef6ba47253cce9af9b12f --- /dev/null +++ b/src/ptbench/configs/datasets/tbx11k_simplified_v2/__init__.py @@ -0,0 +1,25 @@ +# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> +# +# SPDX-License-Identifier: GPL-3.0-or-later + + +def _maker(protocol, RGB=False): + from torchvision import transforms + + from ....data.tbx11k_simplified_v2 import dataset as raw + from ....data.transforms import ElasticDeformation + from .. import make_dataset as mk + + post_transforms = [] + if RGB: + post_transforms = [ + transforms.Lambda(lambda x: x.convert("RGB")), + transforms.ToTensor(), + ] + + return mk( + [raw.subsets(protocol)], + [], + [ElasticDeformation(p=0.8)], + post_transforms, + ) diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_v2/default.py b/src/ptbench/configs/datasets/tbx11k_simplified_v2/default.py new file mode 100644 index 0000000000000000000000000000000000000000..19000aef6faa723fc7cb20fc6ba01c1190f237be --- /dev/null +++ b/src/ptbench/configs/datasets/tbx11k_simplified_v2/default.py @@ -0,0 +1,17 @@ +# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> +# +# SPDX-License-Identifier: GPL-3.0-or-later + +"""TBX11k simplified dataset for TB detection (default protocol) + +* Split reference: first 62.6% of CXR for "train", 16% for "validation", +* 21.4% for "test" +* This split consists of non-TB and active TB samples +* "healthy", "latent TB", and "sick & non-TB" samples are all merged under the label "non-TB" +* This configuration resolution: 512 x 512 (default) +* See :py:mod:`ptbench.data.tbx11k_v2` for dataset details +""" + +from . import _maker + +dataset = _maker("default") diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_0.py b/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_0.py new file mode 100644 index 0000000000000000000000000000000000000000..99cb5aeb3cdece50dce136fc0a41c4322ee08ba2 --- /dev/null +++ b/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_0.py @@ -0,0 +1,17 @@ +# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> +# +# SPDX-License-Identifier: GPL-3.0-or-later + +"""TBX11k simplified dataset for TB detection (cross validation fold 0) + +* Split reference: first 62.6% of CXR for "train", 16% for "validation", +* 21.4% for "test" +* This split consists of non-TB and active TB samples +* "healthy", "latent TB", and "sick & non-TB" samples are all merged under the label "non-TB" +* This configuration resolution: 512 x 512 (default) +* See :py:mod:`ptbench.data.tbx11k_v2` for dataset details +""" + +from . import _maker + +dataset = _maker("fold_0") diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_0_rgb.py b/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_0_rgb.py new file mode 100644 index 0000000000000000000000000000000000000000..473ed248c201a271a6f8a4a95d695dc5fa17159e --- /dev/null +++ b/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_0_rgb.py @@ -0,0 +1,17 @@ +# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> +# +# SPDX-License-Identifier: GPL-3.0-or-later + +"""TBX11k simplified dataset for TB detection (cross validation fold 0, RGB) + +* Split reference: first 62.6% of CXR for "train", 16% for "validation", +* 21.4% for "test" +* This split consists of non-TB and active TB samples +* "healthy", "latent TB", and "sick & non-TB" samples are all merged under the label "non-TB" +* This configuration resolution: 512 x 512 (default) +* See :py:mod:`ptbench.data.tbx11k_v2` for dataset details +""" + +from . import _maker + +dataset = _maker("fold_0", RGB=True) diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_1.py b/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_1.py new file mode 100644 index 0000000000000000000000000000000000000000..dac2d72251980720aabaf313318cd944afdf0f3d --- /dev/null +++ b/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_1.py @@ -0,0 +1,17 @@ +# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> +# +# SPDX-License-Identifier: GPL-3.0-or-later + +"""TBX11k simplified dataset for TB detection (cross validation fold 1) + +* Split reference: first 62.6% of CXR for "train", 16% for "validation", +* 21.4% for "test" +* This split consists of non-TB and active TB samples +* "healthy", "latent TB", and "sick & non-TB" samples are all merged under the label "non-TB" +* This configuration resolution: 512 x 512 (default) +* See :py:mod:`ptbench.data.tbx11k_v2` for dataset details +""" + +from . import _maker + +dataset = _maker("fold_1") diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_1_rgb.py b/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_1_rgb.py new file mode 100644 index 0000000000000000000000000000000000000000..6b982365ca92acd63903a8968bcfe844e5dd0803 --- /dev/null +++ b/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_1_rgb.py @@ -0,0 +1,17 @@ +# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> +# +# SPDX-License-Identifier: GPL-3.0-or-later + +"""TBX11k simplified dataset for TB detection (cross validation fold 1, RGB) + +* Split reference: first 62.6% of CXR for "train", 16% for "validation", +* 21.4% for "test" +* This split consists of non-TB and active TB samples +* "healthy", "latent TB", and "sick & non-TB" samples are all merged under the label "non-TB" +* This configuration resolution: 512 x 512 (default) +* See :py:mod:`ptbench.data.tbx11k_v2` for dataset details +""" + +from . import _maker + +dataset = _maker("fold_1", RGB=True) diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_2.py b/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_2.py new file mode 100644 index 0000000000000000000000000000000000000000..d702c569f0cc88f2447d0180c2c040361cbb3334 --- /dev/null +++ b/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_2.py @@ -0,0 +1,17 @@ +# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> +# +# SPDX-License-Identifier: GPL-3.0-or-later + +"""TBX11k simplified dataset for TB detection (cross validation fold 2) + +* Split reference: first 62.6% of CXR for "train", 16% for "validation", +* 21.4% for "test" +* This split consists of non-TB and active TB samples +* "healthy", "latent TB", and "sick & non-TB" samples are all merged under the label "non-TB" +* This configuration resolution: 512 x 512 (default) +* See :py:mod:`ptbench.data.tbx11k_v2` for dataset details +""" + +from . import _maker + +dataset = _maker("fold_2") diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_2_rgb.py b/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_2_rgb.py new file mode 100644 index 0000000000000000000000000000000000000000..2fe57611ae38d6d1ab799de0aa0b61e02ee10f68 --- /dev/null +++ b/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_2_rgb.py @@ -0,0 +1,17 @@ +# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> +# +# SPDX-License-Identifier: GPL-3.0-or-later + +"""TBX11k simplified dataset for TB detection (cross validation fold 2, RGB) + +* Split reference: first 62.6% of CXR for "train", 16% for "validation", +* 21.4% for "test" +* This split consists of non-TB and active TB samples +* "healthy", "latent TB", and "sick & non-TB" samples are all merged under the label "non-TB" +* This configuration resolution: 512 x 512 (default) +* See :py:mod:`ptbench.data.tbx11k_v2` for dataset details +""" + +from . import _maker + +dataset = _maker("fold_2", RGB=True) diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_3.py b/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_3.py new file mode 100644 index 0000000000000000000000000000000000000000..5ea7185e53540c5cad781972051088691b815632 --- /dev/null +++ b/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_3.py @@ -0,0 +1,17 @@ +# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> +# +# SPDX-License-Identifier: GPL-3.0-or-later + +"""TBX11k simplified dataset for TB detection (cross validation fold 3) + +* Split reference: first 62.6% of CXR for "train", 16% for "validation", +* 21.4% for "test" +* This split consists of non-TB and active TB samples +* "healthy", "latent TB", and "sick & non-TB" samples are all merged under the label "non-TB" +* This configuration resolution: 512 x 512 (default) +* See :py:mod:`ptbench.data.tbx11k_v2` for dataset details +""" + +from . import _maker + +dataset = _maker("fold_3") diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_3_rgb.py b/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_3_rgb.py new file mode 100644 index 0000000000000000000000000000000000000000..31fc772ed8ee9d6cb38a1a806b8bf440e5552ee9 --- /dev/null +++ b/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_3_rgb.py @@ -0,0 +1,17 @@ +# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> +# +# SPDX-License-Identifier: GPL-3.0-or-later + +"""TBX11k simplified dataset for TB detection (cross validation fold 3, RGB) + +* Split reference: first 62.6% of CXR for "train", 16% for "validation", +* 21.4% for "test" +* This split consists of non-TB and active TB samples +* "healthy", "latent TB", and "sick & non-TB" samples are all merged under the label "non-TB" +* This configuration resolution: 512 x 512 (default) +* See :py:mod:`ptbench.data.tbx11k_v2` for dataset details +""" + +from . import _maker + +dataset = _maker("fold_3", RGB=True) diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_4.py b/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_4.py new file mode 100644 index 0000000000000000000000000000000000000000..8386d9e4ddfd3f6499ec44d277a983588dfb3108 --- /dev/null +++ b/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_4.py @@ -0,0 +1,17 @@ +# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> +# +# SPDX-License-Identifier: GPL-3.0-or-later + +"""TBX11k simplified dataset for TB detection (cross validation fold 4) + +* Split reference: first 62.6% of CXR for "train", 16% for "validation", +* 21.4% for "test" +* This split consists of non-TB and active TB samples +* "healthy", "latent TB", and "sick & non-TB" samples are all merged under the label "non-TB" +* This configuration resolution: 512 x 512 (default) +* See :py:mod:`ptbench.data.tbx11k_v2` for dataset details +""" + +from . import _maker + +dataset = _maker("fold_4") diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_4_rgb.py b/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_4_rgb.py new file mode 100644 index 0000000000000000000000000000000000000000..aea87b63c9880036f1dde238c4ecb03802739f9b --- /dev/null +++ b/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_4_rgb.py @@ -0,0 +1,17 @@ +# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> +# +# SPDX-License-Identifier: GPL-3.0-or-later + +"""TBX11k simplified dataset for TB detection (cross validation fold 4, RGB) + +* Split reference: first 62.6% of CXR for "train", 16% for "validation", +* 21.4% for "test" +* This split consists of non-TB and active TB samples +* "healthy", "latent TB", and "sick & non-TB" samples are all merged under the label "non-TB" +* This configuration resolution: 512 x 512 (default) +* See :py:mod:`ptbench.data.tbx11k_v2` for dataset details +""" + +from . import _maker + +dataset = _maker("fold_4", RGB=True) diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_5.py b/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_5.py new file mode 100644 index 0000000000000000000000000000000000000000..e10f550f7addb5e3b6dd0dd2e8fba84f602d3877 --- /dev/null +++ b/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_5.py @@ -0,0 +1,17 @@ +# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> +# +# SPDX-License-Identifier: GPL-3.0-or-later + +"""TBX11k simplified dataset for TB detection (cross validation fold 5) + +* Split reference: first 62.6% of CXR for "train", 16% for "validation", +* 21.4% for "test" +* This split consists of non-TB and active TB samples +* "healthy", "latent TB", and "sick & non-TB" samples are all merged under the label "non-TB" +* This configuration resolution: 512 x 512 (default) +* See :py:mod:`ptbench.data.tbx11k_v2` for dataset details +""" + +from . import _maker + +dataset = _maker("fold_5") diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_5_rgb.py b/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_5_rgb.py new file mode 100644 index 0000000000000000000000000000000000000000..e7494300c8365308ccbc6308421a3302a6efeac9 --- /dev/null +++ b/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_5_rgb.py @@ -0,0 +1,17 @@ +# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> +# +# SPDX-License-Identifier: GPL-3.0-or-later + +"""TBX11k simplified dataset for TB detection (cross validation fold 5, RGB) + +* Split reference: first 62.6% of CXR for "train", 16% for "validation", +* 21.4% for "test" +* This split consists of non-TB and active TB samples +* "healthy", "latent TB", and "sick & non-TB" samples are all merged under the label "non-TB" +* This configuration resolution: 512 x 512 (default) +* See :py:mod:`ptbench.data.tbx11k_v2` for dataset details +""" + +from . import _maker + +dataset = _maker("fold_5", RGB=True) diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_6.py b/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_6.py new file mode 100644 index 0000000000000000000000000000000000000000..4e23e586c953bbee55ba08799a841e0e5b65a9c0 --- /dev/null +++ b/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_6.py @@ -0,0 +1,17 @@ +# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> +# +# SPDX-License-Identifier: GPL-3.0-or-later + +"""TBX11k simplified dataset for TB detection (cross validation fold 6) + +* Split reference: first 62.6% of CXR for "train", 16% for "validation", +* 21.4% for "test" +* This split consists of non-TB and active TB samples +* "healthy", "latent TB", and "sick & non-TB" samples are all merged under the label "non-TB" +* This configuration resolution: 512 x 512 (default) +* See :py:mod:`ptbench.data.tbx11k_v2` for dataset details +""" + +from . import _maker + +dataset = _maker("fold_6") diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_6_rgb.py b/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_6_rgb.py new file mode 100644 index 0000000000000000000000000000000000000000..60768cef2c892fa2583437bfcc159eb8cae11352 --- /dev/null +++ b/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_6_rgb.py @@ -0,0 +1,17 @@ +# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> +# +# SPDX-License-Identifier: GPL-3.0-or-later + +"""TBX11k simplified dataset for TB detection (cross validation fold 6, RGB) + +* Split reference: first 62.6% of CXR for "train", 16% for "validation", +* 21.4% for "test" +* This split consists of non-TB and active TB samples +* "healthy", "latent TB", and "sick & non-TB" samples are all merged under the label "non-TB" +* This configuration resolution: 512 x 512 (default) +* See :py:mod:`ptbench.data.tbx11k_v2` for dataset details +""" + +from . import _maker + +dataset = _maker("fold_6", RGB=True) diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_7.py b/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_7.py new file mode 100644 index 0000000000000000000000000000000000000000..8999a16e1f17202d539e38b6f276224b9e4e0129 --- /dev/null +++ b/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_7.py @@ -0,0 +1,17 @@ +# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> +# +# SPDX-License-Identifier: GPL-3.0-or-later + +"""TBX11k simplified dataset for TB detection (cross validation fold 7) + +* Split reference: first 62.6% of CXR for "train", 16% for "validation", +* 21.4% for "test" +* This split consists of non-TB and active TB samples +* "healthy", "latent TB", and "sick & non-TB" samples are all merged under the label "non-TB" +* This configuration resolution: 512 x 512 (default) +* See :py:mod:`ptbench.data.tbx11k_v2` for dataset details +""" + +from . import _maker + +dataset = _maker("fold_7") diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_7_rgb.py b/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_7_rgb.py new file mode 100644 index 0000000000000000000000000000000000000000..73115436d06db5ef692e8ccd1255d6c2fcd1740b --- /dev/null +++ b/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_7_rgb.py @@ -0,0 +1,17 @@ +# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> +# +# SPDX-License-Identifier: GPL-3.0-or-later + +"""TBX11k simplified dataset for TB detection (cross validation fold 7, RGB) + +* Split reference: first 62.6% of CXR for "train", 16% for "validation", +* 21.4% for "test" +* This split consists of non-TB and active TB samples +* "healthy", "latent TB", and "sick & non-TB" samples are all merged under the label "non-TB" +* This configuration resolution: 512 x 512 (default) +* See :py:mod:`ptbench.data.tbx11k_v2` for dataset details +""" + +from . import _maker + +dataset = _maker("fold_7", RGB=True) diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_8.py b/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_8.py new file mode 100644 index 0000000000000000000000000000000000000000..2bdb4201077a65aaa95dc3ced7bbb301f199c642 --- /dev/null +++ b/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_8.py @@ -0,0 +1,17 @@ +# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> +# +# SPDX-License-Identifier: GPL-3.0-or-later + +"""TBX11k simplified dataset for TB detection (cross validation fold 8) + +* Split reference: first 62.6% of CXR for "train", 16% for "validation", +* 21.4% for "test" +* This split consists of non-TB and active TB samples +* "healthy", "latent TB", and "sick & non-TB" samples are all merged under the label "non-TB" +* This configuration resolution: 512 x 512 (default) +* See :py:mod:`ptbench.data.tbx11k_v2` for dataset details +""" + +from . import _maker + +dataset = _maker("fold_8") diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_8_rgb.py b/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_8_rgb.py new file mode 100644 index 0000000000000000000000000000000000000000..c223d6507ff45eb6a550da2c74641ed1fdc9fedb --- /dev/null +++ b/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_8_rgb.py @@ -0,0 +1,17 @@ +# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> +# +# SPDX-License-Identifier: GPL-3.0-or-later + +"""TBX11k simplified dataset for TB detection (cross validation fold 8, RGB) + +* Split reference: first 62.6% of CXR for "train", 16% for "validation", +* 21.4% for "test" +* This split consists of non-TB and active TB samples +* "healthy", "latent TB", and "sick & non-TB" samples are all merged under the label "non-TB" +* This configuration resolution: 512 x 512 (default) +* See :py:mod:`ptbench.data.tbx11k_v2` for dataset details +""" + +from . import _maker + +dataset = _maker("fold_8", RGB=True) diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_9.py b/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_9.py new file mode 100644 index 0000000000000000000000000000000000000000..599998512441d19c1300aedbf73f504457df347c --- /dev/null +++ b/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_9.py @@ -0,0 +1,17 @@ +# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> +# +# SPDX-License-Identifier: GPL-3.0-or-later + +"""TBX11k simplified dataset for TB detection (cross validation fold 9) + +* Split reference: first 62.6% of CXR for "train", 16% for "validation", +* 21.4% for "test" +* This split consists of non-TB and active TB samples +* "healthy", "latent TB", and "sick & non-TB" samples are all merged under the label "non-TB" +* This configuration resolution: 512 x 512 (default) +* See :py:mod:`ptbench.data.tbx11k_v2` for dataset details +""" + +from . import _maker + +dataset = _maker("fold_9") diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_9_rgb.py b/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_9_rgb.py new file mode 100644 index 0000000000000000000000000000000000000000..b232998fae1a86fad74f837c878dda35133f5c1c --- /dev/null +++ b/src/ptbench/configs/datasets/tbx11k_simplified_v2/fold_9_rgb.py @@ -0,0 +1,17 @@ +# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> +# +# SPDX-License-Identifier: GPL-3.0-or-later + +"""TBX11k simplified dataset for TB detection (cross validation fold 9, RGB) + +* Split reference: first 62.6% of CXR for "train", 16% for "validation", +* 21.4% for "test" +* This split consists of non-TB and active TB samples +* "healthy", "latent TB", and "sick & non-TB" samples are all merged under the label "non-TB" +* This configuration resolution: 512 x 512 (default) +* See :py:mod:`ptbench.data.tbx11k_v2` for dataset details +""" + +from . import _maker + +dataset = _maker("fold_9", RGB=True) diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_v2/rgb.py b/src/ptbench/configs/datasets/tbx11k_simplified_v2/rgb.py new file mode 100644 index 0000000000000000000000000000000000000000..1af86d940eb0c89639abacf5379f0374431c93cf --- /dev/null +++ b/src/ptbench/configs/datasets/tbx11k_simplified_v2/rgb.py @@ -0,0 +1,17 @@ +# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> +# +# SPDX-License-Identifier: GPL-3.0-or-later + +"""TBX11k simplified dataset for TB detection (default protocol, converted in RGB) + +* Split reference: first 62.6% of CXR for "train", 16% for "validation", +* 21.4% for "test" +* This split consists of non-TB and active TB samples +* "healthy", "latent TB", and "sick & non-TB" samples are all merged under the label "non-TB" +* This configuration resolution: 512 x 512 (default) +* See :py:mod:`ptbench.data.tbx11k_v2` for dataset details +""" + +from . import _maker + +dataset = _maker("default", RGB=True) diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_v3/__init__.py b/src/ptbench/configs/datasets/tbx11k_simplified_v3/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..bed38616924b7f736f5d33d12c7bdf7911dc590f --- /dev/null +++ b/src/ptbench/configs/datasets/tbx11k_simplified_v3/__init__.py @@ -0,0 +1,25 @@ +# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> +# +# SPDX-License-Identifier: GPL-3.0-or-later + + +def _maker(protocol, RGB=False): + from torchvision import transforms + + from ....data.tbx11k_simplified_v3 import dataset as raw + from ....data.transforms import ElasticDeformation + from .. import make_dataset as mk + + post_transforms = [] + if RGB: + post_transforms = [ + transforms.Lambda(lambda x: x.convert("RGB")), + transforms.ToTensor(), + ] + + return mk( + [raw.subsets(protocol)], + [], + [ElasticDeformation(p=0.8)], + post_transforms, + ) diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_v3/default.py b/src/ptbench/configs/datasets/tbx11k_simplified_v3/default.py new file mode 100644 index 0000000000000000000000000000000000000000..7208fc7d89144fe610fe4084fc04af25e4880043 --- /dev/null +++ b/src/ptbench/configs/datasets/tbx11k_simplified_v3/default.py @@ -0,0 +1,17 @@ +# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> +# +# SPDX-License-Identifier: GPL-3.0-or-later + +"""TBX11k simplified dataset for TB detection (default protocol) + +* Split reference: first 62.6% of CXR for "train", 16% for "validation", +* 21.4% for "test" +* This split consists of the 4 labels "healthy", "latent TB", "sick & non-TB", +* and "active TB" +* This configuration resolution: 512 x 512 (default) +* See :py:mod:`ptbench.data.tbx11k_v3` for dataset details +""" + +from . import _maker + +dataset = _maker("default") diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_0.py b/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_0.py new file mode 100644 index 0000000000000000000000000000000000000000..20c7316f0a8a71864e10f9c5bcc99bb2eab32795 --- /dev/null +++ b/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_0.py @@ -0,0 +1,17 @@ +# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> +# +# SPDX-License-Identifier: GPL-3.0-or-later + +"""TBX11k simplified dataset for TB detection (cross validation fold 0) + +* Split reference: first 62.6% of CXR for "train", 16% for "validation", +* 21.4% for "test" +* This split consists of the 4 labels "healthy", "latent TB", "sick & non-TB", +* and "active TB" +* This configuration resolution: 512 x 512 (default) +* See :py:mod:`ptbench.data.tbx11k_v3` for dataset details +""" + +from . import _maker + +dataset = _maker("fold_0") diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_0_rgb.py b/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_0_rgb.py new file mode 100644 index 0000000000000000000000000000000000000000..ce57f3bc6c965074283372cc625771810edced67 --- /dev/null +++ b/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_0_rgb.py @@ -0,0 +1,17 @@ +# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> +# +# SPDX-License-Identifier: GPL-3.0-or-later + +"""TBX11k simplified dataset for TB detection (cross validation fold 0, RGB) + +* Split reference: first 62.6% of CXR for "train", 16% for "validation", +* 21.4% for "test" +* This split consists of the 4 labels "healthy", "latent TB", "sick & non-TB", +* and "active TB" +* This configuration resolution: 512 x 512 (default) +* See :py:mod:`ptbench.data.tbx11k_v3` for dataset details +""" + +from . import _maker + +dataset = _maker("fold_0", RGB=True) diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_1.py b/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_1.py new file mode 100644 index 0000000000000000000000000000000000000000..4d3c1961cb323f47d8422fb2b1b1183140208022 --- /dev/null +++ b/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_1.py @@ -0,0 +1,17 @@ +# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> +# +# SPDX-License-Identifier: GPL-3.0-or-later + +"""TBX11k simplified dataset for TB detection (cross validation fold 1) + +* Split reference: first 62.6% of CXR for "train", 16% for "validation", +* 21.4% for "test" +* This split consists of the 4 labels "healthy", "latent TB", "sick & non-TB", +* and "active TB" +* This configuration resolution: 512 x 512 (default) +* See :py:mod:`ptbench.data.tbx11k_v3` for dataset details +""" + +from . import _maker + +dataset = _maker("fold_1") diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_1_rgb.py b/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_1_rgb.py new file mode 100644 index 0000000000000000000000000000000000000000..227af63411e06c4a55231742a1dc8b42d227a042 --- /dev/null +++ b/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_1_rgb.py @@ -0,0 +1,17 @@ +# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> +# +# SPDX-License-Identifier: GPL-3.0-or-later + +"""TBX11k simplified dataset for TB detection (cross validation fold 1, RGB) + +* Split reference: first 62.6% of CXR for "train", 16% for "validation", +* 21.4% for "test" +* This split consists of the 4 labels "healthy", "latent TB", "sick & non-TB", +* and "active TB" +* This configuration resolution: 512 x 512 (default) +* See :py:mod:`ptbench.data.tbx11k_v3` for dataset details +""" + +from . import _maker + +dataset = _maker("fold_1", RGB=True) diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_2.py b/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_2.py new file mode 100644 index 0000000000000000000000000000000000000000..bc8088a01469bca888bc3b3a4ec5a94a42e33851 --- /dev/null +++ b/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_2.py @@ -0,0 +1,17 @@ +# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> +# +# SPDX-License-Identifier: GPL-3.0-or-later + +"""TBX11k simplified dataset for TB detection (cross validation fold 2) + +* Split reference: first 62.6% of CXR for "train", 16% for "validation", +* 21.4% for "test" +* This split consists of the 4 labels "healthy", "latent TB", "sick & non-TB", +* and "active TB" +* This configuration resolution: 512 x 512 (default) +* See :py:mod:`ptbench.data.tbx11k_v3` for dataset details +""" + +from . import _maker + +dataset = _maker("fold_2") diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_2_rgb.py b/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_2_rgb.py new file mode 100644 index 0000000000000000000000000000000000000000..1ee2b3304a9dd4d2b025872f6a5d59a367e67be7 --- /dev/null +++ b/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_2_rgb.py @@ -0,0 +1,17 @@ +# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> +# +# SPDX-License-Identifier: GPL-3.0-or-later + +"""TBX11k simplified dataset for TB detection (cross validation fold 2, RGB) + +* Split reference: first 62.6% of CXR for "train", 16% for "validation", +* 21.4% for "test" +* This split consists of the 4 labels "healthy", "latent TB", "sick & non-TB", +* and "active TB" +* This configuration resolution: 512 x 512 (default) +* See :py:mod:`ptbench.data.tbx11k_v3` for dataset details +""" + +from . import _maker + +dataset = _maker("fold_2", RGB=True) diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_3.py b/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_3.py new file mode 100644 index 0000000000000000000000000000000000000000..3fba1cda757a7d84f16b9b6b69c072fa59fef2b6 --- /dev/null +++ b/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_3.py @@ -0,0 +1,17 @@ +# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> +# +# SPDX-License-Identifier: GPL-3.0-or-later + +"""TBX11k simplified dataset for TB detection (cross validation fold 3) + +* Split reference: first 62.6% of CXR for "train", 16% for "validation", +* 21.4% for "test" +* This split consists of the 4 labels "healthy", "latent TB", "sick & non-TB", +* and "active TB" +* This configuration resolution: 512 x 512 (default) +* See :py:mod:`ptbench.data.tbx11k_v3` for dataset details +""" + +from . import _maker + +dataset = _maker("fold_3") diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_3_rgb.py b/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_3_rgb.py new file mode 100644 index 0000000000000000000000000000000000000000..75ac8cd374c11becbee1a3cb44321b7b64cd79e8 --- /dev/null +++ b/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_3_rgb.py @@ -0,0 +1,17 @@ +# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> +# +# SPDX-License-Identifier: GPL-3.0-or-later + +"""TBX11k simplified dataset for TB detection (cross validation fold 3, RGB) + +* Split reference: first 62.6% of CXR for "train", 16% for "validation", +* 21.4% for "test" +* This split consists of the 4 labels "healthy", "latent TB", "sick & non-TB", +* and "active TB" +* This configuration resolution: 512 x 512 (default) +* See :py:mod:`ptbench.data.tbx11k_v3` for dataset details +""" + +from . import _maker + +dataset = _maker("fold_3", RGB=True) diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_4.py b/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_4.py new file mode 100644 index 0000000000000000000000000000000000000000..d5917fefec6682c5d10066fa9a75fef9afd6a899 --- /dev/null +++ b/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_4.py @@ -0,0 +1,17 @@ +# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> +# +# SPDX-License-Identifier: GPL-3.0-or-later + +"""TBX11k simplified dataset for TB detection (cross validation fold 4) + +* Split reference: first 62.6% of CXR for "train", 16% for "validation", +* 21.4% for "test" +* This split consists of the 4 labels "healthy", "latent TB", "sick & non-TB", +* and "active TB" +* This configuration resolution: 512 x 512 (default) +* See :py:mod:`ptbench.data.tbx11k_v3` for dataset details +""" + +from . import _maker + +dataset = _maker("fold_4") diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_4_rgb.py b/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_4_rgb.py new file mode 100644 index 0000000000000000000000000000000000000000..130539f97a62a0bd94b5ae4895578d2b589e26fb --- /dev/null +++ b/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_4_rgb.py @@ -0,0 +1,17 @@ +# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> +# +# SPDX-License-Identifier: GPL-3.0-or-later + +"""TBX11k simplified dataset for TB detection (cross validation fold 4, RGB) + +* Split reference: first 62.6% of CXR for "train", 16% for "validation", +* 21.4% for "test" +* This split consists of the 4 labels "healthy", "latent TB", "sick & non-TB", +* and "active TB" +* This configuration resolution: 512 x 512 (default) +* See :py:mod:`ptbench.data.tbx11k_v3` for dataset details +""" + +from . import _maker + +dataset = _maker("fold_4", RGB=True) diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_5.py b/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_5.py new file mode 100644 index 0000000000000000000000000000000000000000..2cf02786cbc0d4efc2fed4bd01d23d747d5fb33e --- /dev/null +++ b/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_5.py @@ -0,0 +1,17 @@ +# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> +# +# SPDX-License-Identifier: GPL-3.0-or-later + +"""TBX11k simplified dataset for TB detection (cross validation fold 5) + +* Split reference: first 62.6% of CXR for "train", 16% for "validation", +* 21.4% for "test" +* This split consists of the 4 labels "healthy", "latent TB", "sick & non-TB", +* and "active TB" +* This configuration resolution: 512 x 512 (default) +* See :py:mod:`ptbench.data.tbx11k_v3` for dataset details +""" + +from . import _maker + +dataset = _maker("fold_5") diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_5_rgb.py b/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_5_rgb.py new file mode 100644 index 0000000000000000000000000000000000000000..ab5dc438c29d3582211563d6214688995ecc8afd --- /dev/null +++ b/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_5_rgb.py @@ -0,0 +1,17 @@ +# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> +# +# SPDX-License-Identifier: GPL-3.0-or-later + +"""TBX11k simplified dataset for TB detection (cross validation fold 5, RGB) + +* Split reference: first 62.6% of CXR for "train", 16% for "validation", +* 21.4% for "test" +* This split consists of the 4 labels "healthy", "latent TB", "sick & non-TB", +* and "active TB" +* This configuration resolution: 512 x 512 (default) +* See :py:mod:`ptbench.data.tbx11k_v3` for dataset details +""" + +from . import _maker + +dataset = _maker("fold_5", RGB=True) diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_6.py b/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_6.py new file mode 100644 index 0000000000000000000000000000000000000000..c11e5b2f1b677145512fd6e13262018252d1420d --- /dev/null +++ b/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_6.py @@ -0,0 +1,17 @@ +# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> +# +# SPDX-License-Identifier: GPL-3.0-or-later + +"""TBX11k simplified dataset for TB detection (cross validation fold 6) + +* Split reference: first 62.6% of CXR for "train", 16% for "validation", +* 21.4% for "test" +* This split consists of the 4 labels "healthy", "latent TB", "sick & non-TB", +* and "active TB" +* This configuration resolution: 512 x 512 (default) +* See :py:mod:`ptbench.data.tbx11k_v3` for dataset details +""" + +from . import _maker + +dataset = _maker("fold_6") diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_6_rgb.py b/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_6_rgb.py new file mode 100644 index 0000000000000000000000000000000000000000..4785cfc3aedb75fc2580bbf98e8050550bc97fba --- /dev/null +++ b/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_6_rgb.py @@ -0,0 +1,17 @@ +# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> +# +# SPDX-License-Identifier: GPL-3.0-or-later + +"""TBX11k simplified dataset for TB detection (cross validation fold 6, RGB) + +* Split reference: first 62.6% of CXR for "train", 16% for "validation", +* 21.4% for "test" +* This split consists of the 4 labels "healthy", "latent TB", "sick & non-TB", +* and "active TB" +* This configuration resolution: 512 x 512 (default) +* See :py:mod:`ptbench.data.tbx11k_v3` for dataset details +""" + +from . import _maker + +dataset = _maker("fold_6", RGB=True) diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_7.py b/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_7.py new file mode 100644 index 0000000000000000000000000000000000000000..d9cbb56661ac46c1b54dbd980bebbf922abbfd61 --- /dev/null +++ b/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_7.py @@ -0,0 +1,17 @@ +# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> +# +# SPDX-License-Identifier: GPL-3.0-or-later + +"""TBX11k simplified dataset for TB detection (cross validation fold 7) + +* Split reference: first 62.6% of CXR for "train", 16% for "validation", +* 21.4% for "test" +* This split consists of the 4 labels "healthy", "latent TB", "sick & non-TB", +* and "active TB" +* This configuration resolution: 512 x 512 (default) +* See :py:mod:`ptbench.data.tbx11k_v3` for dataset details +""" + +from . import _maker + +dataset = _maker("fold_7") diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_7_rgb.py b/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_7_rgb.py new file mode 100644 index 0000000000000000000000000000000000000000..6d914cee001d07aac35c63af97ff1e19256b4f10 --- /dev/null +++ b/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_7_rgb.py @@ -0,0 +1,17 @@ +# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> +# +# SPDX-License-Identifier: GPL-3.0-or-later + +"""TBX11k simplified dataset for TB detection (cross validation fold 7, RGB) + +* Split reference: first 62.6% of CXR for "train", 16% for "validation", +* 21.4% for "test" +* This split consists of the 4 labels "healthy", "latent TB", "sick & non-TB", +* and "active TB" +* This configuration resolution: 512 x 512 (default) +* See :py:mod:`ptbench.data.tbx11k_v3` for dataset details +""" + +from . import _maker + +dataset = _maker("fold_7", RGB=True) diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_8.py b/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_8.py new file mode 100644 index 0000000000000000000000000000000000000000..5592912a4a8fbb96498d984e0640b32e198332dd --- /dev/null +++ b/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_8.py @@ -0,0 +1,17 @@ +# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> +# +# SPDX-License-Identifier: GPL-3.0-or-later + +"""TBX11k simplified dataset for TB detection (cross validation fold 8) + +* Split reference: first 62.6% of CXR for "train", 16% for "validation", +* 21.4% for "test" +* This split consists of the 4 labels "healthy", "latent TB", "sick & non-TB", +* and "active TB" +* This configuration resolution: 512 x 512 (default) +* See :py:mod:`ptbench.data.tbx11k_v3` for dataset details +""" + +from . import _maker + +dataset = _maker("fold_8") diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_8_rgb.py b/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_8_rgb.py new file mode 100644 index 0000000000000000000000000000000000000000..6db0e80294fb0e5cea97c32cb61497ce9d81fe47 --- /dev/null +++ b/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_8_rgb.py @@ -0,0 +1,17 @@ +# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> +# +# SPDX-License-Identifier: GPL-3.0-or-later + +"""TBX11k simplified dataset for TB detection (cross validation fold 8, RGB) + +* Split reference: first 62.6% of CXR for "train", 16% for "validation", +* 21.4% for "test" +* This split consists of the 4 labels "healthy", "latent TB", "sick & non-TB", +* and "active TB" +* This configuration resolution: 512 x 512 (default) +* See :py:mod:`ptbench.data.tbx11k_v3` for dataset details +""" + +from . import _maker + +dataset = _maker("fold_8", RGB=True) diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_9.py b/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_9.py new file mode 100644 index 0000000000000000000000000000000000000000..79410450c17a194d70572ddb2c72590ae91b536e --- /dev/null +++ b/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_9.py @@ -0,0 +1,17 @@ +# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> +# +# SPDX-License-Identifier: GPL-3.0-or-later + +"""TBX11k simplified dataset for TB detection (cross validation fold 9) + +* Split reference: first 62.6% of CXR for "train", 16% for "validation", +* 21.4% for "test" +* This split consists of the 4 labels "healthy", "latent TB", "sick & non-TB", +* and "active TB" +* This configuration resolution: 512 x 512 (default) +* See :py:mod:`ptbench.data.tbx11k_v3` for dataset details +""" + +from . import _maker + +dataset = _maker("fold_9") diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_9_rgb.py b/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_9_rgb.py new file mode 100644 index 0000000000000000000000000000000000000000..82af3049c91c52479876228ceaea230460634cd6 --- /dev/null +++ b/src/ptbench/configs/datasets/tbx11k_simplified_v3/fold_9_rgb.py @@ -0,0 +1,17 @@ +# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> +# +# SPDX-License-Identifier: GPL-3.0-or-later + +"""TBX11k simplified dataset for TB detection (cross validation fold 9, RGB) + +* Split reference: first 62.6% of CXR for "train", 16% for "validation", +* 21.4% for "test" +* This split consists of the 4 labels "healthy", "latent TB", "sick & non-TB", +* and "active TB" +* This configuration resolution: 512 x 512 (default) +* See :py:mod:`ptbench.data.tbx11k_v3` for dataset details +""" + +from . import _maker + +dataset = _maker("fold_9", RGB=True) diff --git a/src/ptbench/configs/datasets/tbx11k_simplified_v3/rgb.py b/src/ptbench/configs/datasets/tbx11k_simplified_v3/rgb.py new file mode 100644 index 0000000000000000000000000000000000000000..9729d5100fe03ca7befc900892485bf5c9220ed7 --- /dev/null +++ b/src/ptbench/configs/datasets/tbx11k_simplified_v3/rgb.py @@ -0,0 +1,17 @@ +# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> +# +# SPDX-License-Identifier: GPL-3.0-or-later + +"""TBX11k simplified dataset for TB detection (default protocol, converted in RGB) + +* Split reference: first 62.6% of CXR for "train", 16% for "validation", +* 21.4% for "test" +* This split consists of the 4 labels "healthy", "latent TB", "sick & non-TB", +* and "active TB" +* This configuration resolution: 512 x 512 (default) +* See :py:mod:`ptbench.data.tbx11k_v3` for dataset details +""" + +from . import _maker + +dataset = _maker("default", RGB=True) diff --git a/src/ptbench/data/tbx11k_simplified/__init__.py b/src/ptbench/data/tbx11k_simplified/__init__.py index 0b47442003653f438592fa001ab005a363da086c..e5910379ee8065c7f571a69a1bc60c2d3ef94e60 100644 --- a/src/ptbench/data/tbx11k_simplified/__init__.py +++ b/src/ptbench/data/tbx11k_simplified/__init__.py @@ -2,7 +2,7 @@ # # SPDX-License-Identifier: GPL-3.0-or-later -"""TBX11K simplified dataset for computer-aided diagnosis. +"""TBX11K simplified dataset for computer-aided diagnosis The TBX11K database has been established to foster research in computer-aided diagnosis of pulmonary diseases with a special @@ -14,7 +14,7 @@ are: "healthy", "active TB", "latent TB", and "sick & non-tb". The version of the dataset used in this benchmark is a simplified. * Reference: [TBX11K-SIMPLIFIED-2020]_ -* Original resolution (height x width or width x height): 4020 x 4892 +* Original (released) resolution (height x width or width x height): 512 x 512 * Split reference: none * Protocol ``default``: diff --git a/src/ptbench/data/tbx11k_simplified_RS_v2/__init__.py b/src/ptbench/data/tbx11k_simplified_RS_v2/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..bec7223465a8b7265c497d42d604b95cfef68457 --- /dev/null +++ b/src/ptbench/data/tbx11k_simplified_RS_v2/__init__.py @@ -0,0 +1,62 @@ +# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> +# +# SPDX-License-Identifier: GPL-3.0-or-later + +"""Extended TBX11K simplified dataset for computer-aided diagnosis (extended +with DensenetRS predictions) + +The TBX11K database has been established to foster research +in computer-aided diagnosis of pulmonary diseases with a special +focus on tuberculosis (aTB). The dataset was specifically +designed to be used with CNNs. It contains 11,000 chest X-ray +images, each of a unique patient. They were labeled by expert +radiologists with 5 - 10+ years of experience. Possible labels +are: "healthy", "active TB", "latent TB", and "sick & non-tb". +The version of the dataset used in this benchmark is a simplified. + +* Reference: [TBX11K-SIMPLIFIED-2020]_ +* Original (released) resolution (height x width or width x height): 512 x 512 +* Split reference: none +* Protocol ``default``: + + * Training samples: 62.6% of CXR (including labels) + * Validation samples: 16% of CXR (including labels) + * Test samples: 21.4% of CXR (including labels) +""" + +import importlib.resources + +from ..dataset import JSONDataset +from ..loader import make_delayed + +_protocols = [ + importlib.resources.files(__name__).joinpath("default.json.bz2"), + # importlib.resources.files(__name__).joinpath("fold_0.json.bz2"), + # importlib.resources.files(__name__).joinpath("fold_1.json.bz2"), + # importlib.resources.files(__name__).joinpath("fold_2.json.bz2"), + # importlib.resources.files(__name__).joinpath("fold_3.json.bz2"), + # importlib.resources.files(__name__).joinpath("fold_4.json.bz2"), + # importlib.resources.files(__name__).joinpath("fold_5.json.bz2"), + # importlib.resources.files(__name__).joinpath("fold_6.json.bz2"), + # importlib.resources.files(__name__).joinpath("fold_7.json.bz2"), + # importlib.resources.files(__name__).joinpath("fold_8.json.bz2"), + # importlib.resources.files(__name__).joinpath("fold_9.json.bz2"), +] + + +def _raw_data_loader(sample): + return dict(data=sample["data"], label=sample["label"]) + + +def _loader(context, sample): + # "context" is ignored in this case - database is homogeneous + # we returned delayed samples to avoid loading all images at once + return make_delayed(sample, _raw_data_loader, key=sample["filename"]) + + +dataset = JSONDataset( + protocols=_protocols, + fieldnames=("filename", "label", "data"), + loader=_loader, +) +"""Extended TBX11K simplified dataset object.""" diff --git a/src/ptbench/data/tbx11k_simplified_RS_v3/__init__.py b/src/ptbench/data/tbx11k_simplified_RS_v3/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..bec7223465a8b7265c497d42d604b95cfef68457 --- /dev/null +++ b/src/ptbench/data/tbx11k_simplified_RS_v3/__init__.py @@ -0,0 +1,62 @@ +# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> +# +# SPDX-License-Identifier: GPL-3.0-or-later + +"""Extended TBX11K simplified dataset for computer-aided diagnosis (extended +with DensenetRS predictions) + +The TBX11K database has been established to foster research +in computer-aided diagnosis of pulmonary diseases with a special +focus on tuberculosis (aTB). The dataset was specifically +designed to be used with CNNs. It contains 11,000 chest X-ray +images, each of a unique patient. They were labeled by expert +radiologists with 5 - 10+ years of experience. Possible labels +are: "healthy", "active TB", "latent TB", and "sick & non-tb". +The version of the dataset used in this benchmark is a simplified. + +* Reference: [TBX11K-SIMPLIFIED-2020]_ +* Original (released) resolution (height x width or width x height): 512 x 512 +* Split reference: none +* Protocol ``default``: + + * Training samples: 62.6% of CXR (including labels) + * Validation samples: 16% of CXR (including labels) + * Test samples: 21.4% of CXR (including labels) +""" + +import importlib.resources + +from ..dataset import JSONDataset +from ..loader import make_delayed + +_protocols = [ + importlib.resources.files(__name__).joinpath("default.json.bz2"), + # importlib.resources.files(__name__).joinpath("fold_0.json.bz2"), + # importlib.resources.files(__name__).joinpath("fold_1.json.bz2"), + # importlib.resources.files(__name__).joinpath("fold_2.json.bz2"), + # importlib.resources.files(__name__).joinpath("fold_3.json.bz2"), + # importlib.resources.files(__name__).joinpath("fold_4.json.bz2"), + # importlib.resources.files(__name__).joinpath("fold_5.json.bz2"), + # importlib.resources.files(__name__).joinpath("fold_6.json.bz2"), + # importlib.resources.files(__name__).joinpath("fold_7.json.bz2"), + # importlib.resources.files(__name__).joinpath("fold_8.json.bz2"), + # importlib.resources.files(__name__).joinpath("fold_9.json.bz2"), +] + + +def _raw_data_loader(sample): + return dict(data=sample["data"], label=sample["label"]) + + +def _loader(context, sample): + # "context" is ignored in this case - database is homogeneous + # we returned delayed samples to avoid loading all images at once + return make_delayed(sample, _raw_data_loader, key=sample["filename"]) + + +dataset = JSONDataset( + protocols=_protocols, + fieldnames=("filename", "label", "data"), + loader=_loader, +) +"""Extended TBX11K simplified dataset object.""" diff --git a/src/ptbench/data/tbx11k_simplified_v2/__init__.py b/src/ptbench/data/tbx11k_simplified_v2/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..bf06074dc67890e9b5e843fac543964b9f032778 --- /dev/null +++ b/src/ptbench/data/tbx11k_simplified_v2/__init__.py @@ -0,0 +1,70 @@ +# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> +# +# SPDX-License-Identifier: GPL-3.0-or-later + +"""TBX11K simplified dataset for computer-aided diagnosis + +The TBX11K database has been established to foster research +in computer-aided diagnosis of pulmonary diseases with a special +focus on tuberculosis (aTB). The dataset was specifically +designed to be used with CNNs. It contains 11,000 chest X-ray +images, each of a unique patient. They were labeled by expert +radiologists with 5 - 10+ years of experience. Possible labels +are: "healthy", "active TB", "latent TB", and "sick & non-tb". +The version of the dataset used in this benchmark is a simplified. + +* Reference: [TBX11K-SIMPLIFIED-2020]_ +* Original (released) resolution (height x width or width x height): 512 x 512 +* Split reference: none +* Protocol ``default``: + + * Training samples: 62.6% of CXR (including labels) + * Validation samples: 16% of CXR (including labels) + * Test samples: 21.4% of CXR (including labels) +""" + +import importlib.resources +import os + +from ...utils.rc import load_rc +from ..dataset import JSONDataset +from ..loader import load_pil_baw, make_delayed + +_protocols = [ + importlib.resources.files(__name__).joinpath("default.json.bz2"), + importlib.resources.files(__name__).joinpath("fold_0.json.bz2"), + importlib.resources.files(__name__).joinpath("fold_1.json.bz2"), + importlib.resources.files(__name__).joinpath("fold_2.json.bz2"), + importlib.resources.files(__name__).joinpath("fold_3.json.bz2"), + importlib.resources.files(__name__).joinpath("fold_4.json.bz2"), + importlib.resources.files(__name__).joinpath("fold_5.json.bz2"), + importlib.resources.files(__name__).joinpath("fold_6.json.bz2"), + importlib.resources.files(__name__).joinpath("fold_7.json.bz2"), + importlib.resources.files(__name__).joinpath("fold_8.json.bz2"), + importlib.resources.files(__name__).joinpath("fold_9.json.bz2"), +] + +_datadir = load_rc().get( + "datadir.tbx11k_simplified_v2", os.path.realpath(os.curdir) +) + + +def _raw_data_loader(sample): + return dict( + data=load_pil_baw(os.path.join(_datadir, sample["data"])), # type: ignore + label=sample["label"], + ) + + +def _loader(context, sample): + # "context" is ignored in this case - database is homogeneous + # we return delayed samples to avoid loading all images at once + return make_delayed(sample, _raw_data_loader) + + +dataset = JSONDataset( + protocols=_protocols, + fieldnames=("data", "label"), + loader=_loader, +) +"""TBX11K simplified dataset object.""" diff --git a/src/ptbench/data/tbx11k_simplified_v2/default.json.bz2 b/src/ptbench/data/tbx11k_simplified_v2/default.json.bz2 new file mode 100644 index 0000000000000000000000000000000000000000..35dfaa9db2b0155f617489ab15672fd59b50182a Binary files /dev/null and b/src/ptbench/data/tbx11k_simplified_v2/default.json.bz2 differ diff --git a/src/ptbench/data/tbx11k_simplified_v2/fold_0.json.bz2 b/src/ptbench/data/tbx11k_simplified_v2/fold_0.json.bz2 new file mode 100644 index 0000000000000000000000000000000000000000..5e1cb71cf40feb4e7ebeca09e496d5dd82d40f62 Binary files /dev/null and b/src/ptbench/data/tbx11k_simplified_v2/fold_0.json.bz2 differ diff --git a/src/ptbench/data/tbx11k_simplified_v2/fold_1.json.bz2 b/src/ptbench/data/tbx11k_simplified_v2/fold_1.json.bz2 new file mode 100644 index 0000000000000000000000000000000000000000..ec40d777e0f96eb15dc290d7497fb16faa94b64c Binary files /dev/null and b/src/ptbench/data/tbx11k_simplified_v2/fold_1.json.bz2 differ diff --git a/src/ptbench/data/tbx11k_simplified_v2/fold_2.json.bz2 b/src/ptbench/data/tbx11k_simplified_v2/fold_2.json.bz2 new file mode 100644 index 0000000000000000000000000000000000000000..26577c6d61cdd450c283085ab244a9457fa21aba Binary files /dev/null and b/src/ptbench/data/tbx11k_simplified_v2/fold_2.json.bz2 differ diff --git a/src/ptbench/data/tbx11k_simplified_v2/fold_3.json.bz2 b/src/ptbench/data/tbx11k_simplified_v2/fold_3.json.bz2 new file mode 100644 index 0000000000000000000000000000000000000000..f325642aea39e1651f715fc5894fdb6d2704a7ee Binary files /dev/null and b/src/ptbench/data/tbx11k_simplified_v2/fold_3.json.bz2 differ diff --git a/src/ptbench/data/tbx11k_simplified_v2/fold_4.json.bz2 b/src/ptbench/data/tbx11k_simplified_v2/fold_4.json.bz2 new file mode 100644 index 0000000000000000000000000000000000000000..5ad8447d9d5871801cdbcffb32b33c8517cda2eb Binary files /dev/null and b/src/ptbench/data/tbx11k_simplified_v2/fold_4.json.bz2 differ diff --git a/src/ptbench/data/tbx11k_simplified_v2/fold_5.json.bz2 b/src/ptbench/data/tbx11k_simplified_v2/fold_5.json.bz2 new file mode 100644 index 0000000000000000000000000000000000000000..616fb3e137ff1a1c7859ca8da81251905726a189 Binary files /dev/null and b/src/ptbench/data/tbx11k_simplified_v2/fold_5.json.bz2 differ diff --git a/src/ptbench/data/tbx11k_simplified_v2/fold_6.json.bz2 b/src/ptbench/data/tbx11k_simplified_v2/fold_6.json.bz2 new file mode 100644 index 0000000000000000000000000000000000000000..3b39872196ed10c2311c1da832639b695e5e00e8 Binary files /dev/null and b/src/ptbench/data/tbx11k_simplified_v2/fold_6.json.bz2 differ diff --git a/src/ptbench/data/tbx11k_simplified_v2/fold_7.json.bz2 b/src/ptbench/data/tbx11k_simplified_v2/fold_7.json.bz2 new file mode 100644 index 0000000000000000000000000000000000000000..4365591d11874f6d3ecf09e7bc7a22cccd61d25e Binary files /dev/null and b/src/ptbench/data/tbx11k_simplified_v2/fold_7.json.bz2 differ diff --git a/src/ptbench/data/tbx11k_simplified_v2/fold_8.json.bz2 b/src/ptbench/data/tbx11k_simplified_v2/fold_8.json.bz2 new file mode 100644 index 0000000000000000000000000000000000000000..3fc324f8c8928db0a341211d9a751c94f8f2078a Binary files /dev/null and b/src/ptbench/data/tbx11k_simplified_v2/fold_8.json.bz2 differ diff --git a/src/ptbench/data/tbx11k_simplified_v2/fold_9.json.bz2 b/src/ptbench/data/tbx11k_simplified_v2/fold_9.json.bz2 new file mode 100644 index 0000000000000000000000000000000000000000..571da49ae1811ed93d0346a12e9f44ca3c1ee9b5 Binary files /dev/null and b/src/ptbench/data/tbx11k_simplified_v2/fold_9.json.bz2 differ diff --git a/src/ptbench/data/tbx11k_simplified_v3/__init__.py b/src/ptbench/data/tbx11k_simplified_v3/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e6adab57a7dcce33f089339c24ed50fdc9ce2eea --- /dev/null +++ b/src/ptbench/data/tbx11k_simplified_v3/__init__.py @@ -0,0 +1,70 @@ +# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> +# +# SPDX-License-Identifier: GPL-3.0-or-later + +"""TBX11K simplified dataset for computer-aided diagnosis + +The TBX11K database has been established to foster research +in computer-aided diagnosis of pulmonary diseases with a special +focus on tuberculosis (aTB). The dataset was specifically +designed to be used with CNNs. It contains 11,000 chest X-ray +images, each of a unique patient. They were labeled by expert +radiologists with 5 - 10+ years of experience. Possible labels +are: "healthy", "active TB", "latent TB", and "sick & non-tb". +The version of the dataset used in this benchmark is a simplified. + +* Reference: [TBX11K-SIMPLIFIED-2020]_ +* Original (released) resolution (height x width or width x height): 512 x 512 +* Split reference: none +* Protocol ``default``: + + * Training samples: 62.6% of CXR (including labels) + * Validation samples: 16% of CXR (including labels) + * Test samples: 21.4% of CXR (including labels) +""" + +import importlib.resources +import os + +from ...utils.rc import load_rc +from ..dataset import JSONDataset +from ..loader import load_pil_baw, make_delayed + +_protocols = [ + importlib.resources.files(__name__).joinpath("default.json.bz2"), + importlib.resources.files(__name__).joinpath("fold_0.json.bz2"), + importlib.resources.files(__name__).joinpath("fold_1.json.bz2"), + importlib.resources.files(__name__).joinpath("fold_2.json.bz2"), + importlib.resources.files(__name__).joinpath("fold_3.json.bz2"), + importlib.resources.files(__name__).joinpath("fold_4.json.bz2"), + importlib.resources.files(__name__).joinpath("fold_5.json.bz2"), + importlib.resources.files(__name__).joinpath("fold_6.json.bz2"), + importlib.resources.files(__name__).joinpath("fold_7.json.bz2"), + importlib.resources.files(__name__).joinpath("fold_8.json.bz2"), + importlib.resources.files(__name__).joinpath("fold_9.json.bz2"), +] + +_datadir = load_rc().get( + "datadir.tbx11k_simplified_v3", os.path.realpath(os.curdir) +) + + +def _raw_data_loader(sample): + return dict( + data=load_pil_baw(os.path.join(_datadir, sample["data"])), # type: ignore + label=sample["label"], + ) + + +def _loader(context, sample): + # "context" is ignored in this case - database is homogeneous + # we return delayed samples to avoid loading all images at once + return make_delayed(sample, _raw_data_loader) + + +dataset = JSONDataset( + protocols=_protocols, + fieldnames=("data", "label"), + loader=_loader, +) +"""TBX11K simplified dataset object.""" diff --git a/src/ptbench/data/tbx11k_simplified_v3/default.json.bz2 b/src/ptbench/data/tbx11k_simplified_v3/default.json.bz2 new file mode 100644 index 0000000000000000000000000000000000000000..7a65587786d98af86654bd90be946abd68becd6a Binary files /dev/null and b/src/ptbench/data/tbx11k_simplified_v3/default.json.bz2 differ diff --git a/src/ptbench/data/tbx11k_simplified_v3/fold_0.json.bz2 b/src/ptbench/data/tbx11k_simplified_v3/fold_0.json.bz2 new file mode 100644 index 0000000000000000000000000000000000000000..9e68e54e2e2b822d9962f8e87cf2e40f830acbc2 Binary files /dev/null and b/src/ptbench/data/tbx11k_simplified_v3/fold_0.json.bz2 differ diff --git a/src/ptbench/data/tbx11k_simplified_v3/fold_1.json.bz2 b/src/ptbench/data/tbx11k_simplified_v3/fold_1.json.bz2 new file mode 100644 index 0000000000000000000000000000000000000000..199672c075e1482bc598235b150a05a933eecfc0 Binary files /dev/null and b/src/ptbench/data/tbx11k_simplified_v3/fold_1.json.bz2 differ diff --git a/src/ptbench/data/tbx11k_simplified_v3/fold_2.json.bz2 b/src/ptbench/data/tbx11k_simplified_v3/fold_2.json.bz2 new file mode 100644 index 0000000000000000000000000000000000000000..088d989c764808d1430676360a5b66f393157fe0 Binary files /dev/null and b/src/ptbench/data/tbx11k_simplified_v3/fold_2.json.bz2 differ diff --git a/src/ptbench/data/tbx11k_simplified_v3/fold_3.json.bz2 b/src/ptbench/data/tbx11k_simplified_v3/fold_3.json.bz2 new file mode 100644 index 0000000000000000000000000000000000000000..a6319e583e74c79284ca37b7822cc6ccfe8eba6e Binary files /dev/null and b/src/ptbench/data/tbx11k_simplified_v3/fold_3.json.bz2 differ diff --git a/src/ptbench/data/tbx11k_simplified_v3/fold_4.json.bz2 b/src/ptbench/data/tbx11k_simplified_v3/fold_4.json.bz2 new file mode 100644 index 0000000000000000000000000000000000000000..1fa5cf8234d3c130adbf45f24c32f200a781f018 Binary files /dev/null and b/src/ptbench/data/tbx11k_simplified_v3/fold_4.json.bz2 differ diff --git a/src/ptbench/data/tbx11k_simplified_v3/fold_5.json.bz2 b/src/ptbench/data/tbx11k_simplified_v3/fold_5.json.bz2 new file mode 100644 index 0000000000000000000000000000000000000000..685ceb0019c18f95e0b0b806be6d0488375d3f96 Binary files /dev/null and b/src/ptbench/data/tbx11k_simplified_v3/fold_5.json.bz2 differ diff --git a/src/ptbench/data/tbx11k_simplified_v3/fold_6.json.bz2 b/src/ptbench/data/tbx11k_simplified_v3/fold_6.json.bz2 new file mode 100644 index 0000000000000000000000000000000000000000..5f425f6229f9f760a6165056291b546234e33e6c Binary files /dev/null and b/src/ptbench/data/tbx11k_simplified_v3/fold_6.json.bz2 differ diff --git a/src/ptbench/data/tbx11k_simplified_v3/fold_7.json.bz2 b/src/ptbench/data/tbx11k_simplified_v3/fold_7.json.bz2 new file mode 100644 index 0000000000000000000000000000000000000000..37b6670bbb585c49be067281636a5a55aec70220 Binary files /dev/null and b/src/ptbench/data/tbx11k_simplified_v3/fold_7.json.bz2 differ diff --git a/src/ptbench/data/tbx11k_simplified_v3/fold_8.json.bz2 b/src/ptbench/data/tbx11k_simplified_v3/fold_8.json.bz2 new file mode 100644 index 0000000000000000000000000000000000000000..d99506cf3dc59eb2b1952819c7ad4262bbbaec20 Binary files /dev/null and b/src/ptbench/data/tbx11k_simplified_v3/fold_8.json.bz2 differ diff --git a/src/ptbench/data/tbx11k_simplified_v3/fold_9.json.bz2 b/src/ptbench/data/tbx11k_simplified_v3/fold_9.json.bz2 new file mode 100644 index 0000000000000000000000000000000000000000..a9361b6513ae6e610ae6d06e736a775736fbfe1e Binary files /dev/null and b/src/ptbench/data/tbx11k_simplified_v3/fold_9.json.bz2 differ