diff --git a/pyproject.toml b/pyproject.toml index a114328012d653f01d6815f4e0396aa9c65a1df9..f75f61b5d48abd0e41fd0896428d47e1dc370d43 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -114,7 +114,6 @@ indian_f8 = "ptbench.data.indian.datamodules:fold_8" indian_f9 = "ptbench.data.indian.datamodules:fold_9" # TBX11K simplified dataset split 1 (and cross-validation folds) tbx11k_simplified = "ptbench.data.tbx11k_simplified.default" -tbx11k_simplified_rgb = "ptbench.data.tbx11k_simplified.rgb" tbx11k_simplified_f0 = "ptbench.data.tbx11k_simplified.fold_0" tbx11k_simplified_f1 = "ptbench.data.tbx11k_simplified.fold_1" tbx11k_simplified_f2 = "ptbench.data.tbx11k_simplified.fold_2" @@ -125,19 +124,8 @@ tbx11k_simplified_f6 = "ptbench.data.tbx11k_simplified.fold_6" tbx11k_simplified_f7 = "ptbench.data.tbx11k_simplified.fold_7" tbx11k_simplified_f8 = "ptbench.data.tbx11k_simplified.fold_8" tbx11k_simplified_f9 = "ptbench.data.tbx11k_simplified.fold_9" -tbx11k_simplified_f0_rgb = "ptbench.data.tbx11k_simplified.fold_0_rgb" -tbx11k_simplified_f1_rgb = "ptbench.data.tbx11k_simplified.fold_1_rgb" -tbx11k_simplified_f2_rgb = "ptbench.data.tbx11k_simplified.fold_2_rgb" -tbx11k_simplified_f3_rgb = "ptbench.data.tbx11k_simplified.fold_3_rgb" -tbx11k_simplified_f4_rgb = "ptbench.data.tbx11k_simplified.fold_4_rgb" -tbx11k_simplified_f5_rgb = "ptbench.data.tbx11k_simplified.fold_5_rgb" -tbx11k_simplified_f6_rgb = "ptbench.data.tbx11k_simplified.fold_6_rgb" -tbx11k_simplified_f7_rgb = "ptbench.data.tbx11k_simplified.fold_7_rgb" -tbx11k_simplified_f8_rgb = "ptbench.data.tbx11k_simplified.fold_8_rgb" -tbx11k_simplified_f9_rgb = "ptbench.data.tbx11k_simplified.fold_9_rgb" # TBX11K simplified dataset split 2 (and cross-validation folds) tbx11k_simplified_v2 = "ptbench.data.tbx11k_simplified_v2.default" -tbx11k_simplified_v2_rgb = "ptbench.data.tbx11k_simplified_v2.rgb" tbx11k_simplified_v2_f0 = "ptbench.data.tbx11k_simplified_v2.fold_0" tbx11k_simplified_v2_f1 = "ptbench.data.tbx11k_simplified_v2.fold_1" tbx11k_simplified_v2_f2 = "ptbench.data.tbx11k_simplified_v2.fold_2" @@ -148,19 +136,8 @@ tbx11k_simplified_v2_f6 = "ptbench.data.tbx11k_simplified_v2.fold_6" tbx11k_simplified_v2_f7 = "ptbench.data.tbx11k_simplified_v2.fold_7" tbx11k_simplified_v2_f8 = "ptbench.data.tbx11k_simplified_v2.fold_8" tbx11k_simplified_v2_f9 = "ptbench.data.tbx11k_simplified_v2.fold_9" -tbx11k_simplified_v2_f0_rgb = "ptbench.data.tbx11k_simplified_v2.fold_0_rgb" -tbx11k_simplified_v2_f1_rgb = "ptbench.data.tbx11k_simplified_v2.fold_1_rgb" -tbx11k_simplified_v2_f2_rgb = "ptbench.data.tbx11k_simplified_v2.fold_2_rgb" -tbx11k_simplified_v2_f3_rgb = "ptbench.data.tbx11k_simplified_v2.fold_3_rgb" -tbx11k_simplified_v2_f4_rgb = "ptbench.data.tbx11k_simplified_v2.fold_4_rgb" -tbx11k_simplified_v2_f5_rgb = "ptbench.data.tbx11k_simplified_v2.fold_5_rgb" -tbx11k_simplified_v2_f6_rgb = "ptbench.data.tbx11k_simplified_v2.fold_6_rgb" -tbx11k_simplified_v2_f7_rgb = "ptbench.data.tbx11k_simplified_v2.fold_7_rgb" -tbx11k_simplified_v2_f8_rgb = "ptbench.data.tbx11k_simplified_v2.fold_8_rgb" -tbx11k_simplified_v2_f9_rgb = "ptbench.data.tbx11k_simplified_v2.fold_9_rgb" # montgomery-shenzhen aggregated dataset mc_ch = "ptbench.data.mc_ch.default" -mc_ch_rgb = "ptbench.data.mc_ch.rgb" mc_ch_f0 = "ptbench.data.mc_ch.fold_0" mc_ch_f1 = "ptbench.data.mc_ch.fold_1" mc_ch_f2 = "ptbench.data.mc_ch.fold_2" @@ -171,19 +148,8 @@ mc_ch_f6 = "ptbench.data.mc_ch.fold_6" mc_ch_f7 = "ptbench.data.mc_ch.fold_7" mc_ch_f8 = "ptbench.data.mc_ch.fold_8" mc_ch_f9 = "ptbench.data.mc_ch.fold_9" -mc_ch_f0_rgb = "ptbench.data.mc_ch.fold_0_rgb" -mc_ch_f1_rgb = "ptbench.data.mc_ch.fold_1_rgb" -mc_ch_f2_rgb = "ptbench.data.mc_ch.fold_2_rgb" -mc_ch_f3_rgb = "ptbench.data.mc_ch.fold_3_rgb" -mc_ch_f4_rgb = "ptbench.data.mc_ch.fold_4_rgb" -mc_ch_f5_rgb = "ptbench.data.mc_ch.fold_5_rgb" -mc_ch_f6_rgb = "ptbench.data.mc_ch.fold_6_rgb" -mc_ch_f7_rgb = "ptbench.data.mc_ch.fold_7_rgb" -mc_ch_f8_rgb = "ptbench.data.mc_ch.fold_8_rgb" -mc_ch_f9_rgb = "ptbench.data.mc_ch.fold_9_rgb" # montgomery-shenzhen-indian aggregated dataset mc_ch_in = "ptbench.data.mc_ch_in.default" -mc_ch_in_rgb = "ptbench.data.mc_ch_in.rgb" mc_ch_in_f0 = "ptbench.data.mc_ch_in.fold_0" mc_ch_in_f1 = "ptbench.data.mc_ch_in.fold_1" mc_ch_in_f2 = "ptbench.data.mc_ch_in.fold_2" @@ -194,19 +160,8 @@ mc_ch_in_f6 = "ptbench.data.mc_ch_in.fold_6" mc_ch_in_f7 = "ptbench.data.mc_ch_in.fold_7" mc_ch_in_f8 = "ptbench.data.mc_ch_in.fold_8" mc_ch_in_f9 = "ptbench.data.mc_ch_in.fold_9" -mc_ch_in_f0_rgb = "ptbench.data.mc_ch_in.fold_0_rgb" -mc_ch_in_f1_rgb = "ptbench.data.mc_ch_in.fold_1_rgb" -mc_ch_in_f2_rgb = "ptbench.data.mc_ch_in.fold_2_rgb" -mc_ch_in_f3_rgb = "ptbench.data.mc_ch_in.fold_3_rgb" -mc_ch_in_f4_rgb = "ptbench.data.mc_ch_in.fold_4_rgb" -mc_ch_in_f5_rgb = "ptbench.data.mc_ch_in.fold_5_rgb" -mc_ch_in_f6_rgb = "ptbench.data.mc_ch_in.fold_6_rgb" -mc_ch_in_f7_rgb = "ptbench.data.mc_ch_in.fold_7_rgb" -mc_ch_in_f8_rgb = "ptbench.data.mc_ch_in.fold_8_rgb" -mc_ch_in_f9_rgb = "ptbench.data.mc_ch_in.fold_9_rgb" # montgomery-shenzhen-indian-tbx11k aggregated dataset mc_ch_in_11k = "ptbench.data.mc_ch_in_11k.default" -mc_ch_in_11k_rgb = "ptbench.data.mc_ch_in_11k.rgb" mc_ch_in_11k_f0 = "ptbench.data.mc_ch_in_11k.fold_0" mc_ch_in_11k_f1 = "ptbench.data.mc_ch_in_11k.fold_1" mc_ch_in_11k_f2 = "ptbench.data.mc_ch_in_11k.fold_2" @@ -217,19 +172,8 @@ mc_ch_in_11k_f6 = "ptbench.data.mc_ch_in_11k.fold_6" mc_ch_in_11k_f7 = "ptbench.data.mc_ch_in_11k.fold_7" mc_ch_in_11k_f8 = "ptbench.data.mc_ch_in_11k.fold_8" mc_ch_in_11k_f9 = "ptbench.data.mc_ch_in_11k.fold_9" -mc_ch_in_11k_f0_rgb = "ptbench.data.mc_ch_in_11k.fold_0_rgb" -mc_ch_in_11k_f1_rgb = "ptbench.data.mc_ch_in_11k.fold_1_rgb" -mc_ch_in_11k_f2_rgb = "ptbench.data.mc_ch_in_11k.fold_2_rgb" -mc_ch_in_11k_f3_rgb = "ptbench.data.mc_ch_in_11k.fold_3_rgb" -mc_ch_in_11k_f4_rgb = "ptbench.data.mc_ch_in_11k.fold_4_rgb" -mc_ch_in_11k_f5_rgb = "ptbench.data.mc_ch_in_11k.fold_5_rgb" -mc_ch_in_11k_f6_rgb = "ptbench.data.mc_ch_in_11k.fold_6_rgb" -mc_ch_in_11k_f7_rgb = "ptbench.data.mc_ch_in_11k.fold_7_rgb" -mc_ch_in_11k_f8_rgb = "ptbench.data.mc_ch_in_11k.fold_8_rgb" -mc_ch_in_11k_f9_rgb = "ptbench.data.mc_ch_in_11k.fold_9_rgb" # montgomery-shenzhen-indian-tbx11kv2 aggregated dataset mc_ch_in_11kv2 = "ptbench.data.mc_ch_in_11kv2.default" -mc_ch_in_11kv2_rgb = "ptbench.data.mc_ch_in_11kv2.rgb" mc_ch_in_11kv2_f0 = "ptbench.data.mc_ch_in_11kv2.fold_0" mc_ch_in_11kv2_f1 = "ptbench.data.mc_ch_in_11kv2.fold_1" mc_ch_in_11kv2_f2 = "ptbench.data.mc_ch_in_11kv2.fold_2" @@ -240,16 +184,6 @@ mc_ch_in_11kv2_f6 = "ptbench.data.mc_ch_in_11kv2.fold_6" mc_ch_in_11kv2_f7 = "ptbench.data.mc_ch_in_11kv2.fold_7" mc_ch_in_11kv2_f8 = "ptbench.data.mc_ch_in_11kv2.fold_8" mc_ch_in_11kv2_f9 = "ptbench.data.mc_ch_in_11kv2.fold_9" -mc_ch_in_11kv2_f0_rgb = "ptbench.data.mc_ch_in_11kv2.fold_0_rgb" -mc_ch_in_11kv2_f1_rgb = "ptbench.data.mc_ch_in_11kv2.fold_1_rgb" -mc_ch_in_11kv2_f2_rgb = "ptbench.data.mc_ch_in_11kv2.fold_2_rgb" -mc_ch_in_11kv2_f3_rgb = "ptbench.data.mc_ch_in_11kv2.fold_3_rgb" -mc_ch_in_11kv2_f4_rgb = "ptbench.data.mc_ch_in_11kv2.fold_4_rgb" -mc_ch_in_11kv2_f5_rgb = "ptbench.data.mc_ch_in_11kv2.fold_5_rgb" -mc_ch_in_11kv2_f6_rgb = "ptbench.data.mc_ch_in_11kv2.fold_6_rgb" -mc_ch_in_11kv2_f7_rgb = "ptbench.data.mc_ch_in_11kv2.fold_7_rgb" -mc_ch_in_11kv2_f8_rgb = "ptbench.data.mc_ch_in_11kv2.fold_8_rgb" -mc_ch_in_11kv2_f9_rgb = "ptbench.data.mc_ch_in_11kv2.fold_9_rgb" # tbpoc dataset (and cross-validation folds) tbpoc_f0 = "ptbench.data.tbpoc.fold_0" tbpoc_f1 = "ptbench.data.tbpoc.fold_1" @@ -261,16 +195,6 @@ tbpoc_f6 = "ptbench.data.tbpoc.fold_6" tbpoc_f7 = "ptbench.data.tbpoc.fold_7" tbpoc_f8 = "ptbench.data.tbpoc.fold_8" tbpoc_f9 = "ptbench.data.tbpoc.fold_9" -tbpoc_f0_rgb = "ptbench.data.tbpoc.fold_0_rgb" -tbpoc_f1_rgb = "ptbench.data.tbpoc.fold_1_rgb" -tbpoc_f2_rgb = "ptbench.data.tbpoc.fold_2_rgb" -tbpoc_f3_rgb = "ptbench.data.tbpoc.fold_3_rgb" -tbpoc_f4_rgb = "ptbench.data.tbpoc.fold_4_rgb" -tbpoc_f5_rgb = "ptbench.data.tbpoc.fold_5_rgb" -tbpoc_f6_rgb = "ptbench.data.tbpoc.fold_6_rgb" -tbpoc_f7_rgb = "ptbench.data.tbpoc.fold_7_rgb" -tbpoc_f8_rgb = "ptbench.data.tbpoc.fold_8_rgb" -tbpoc_f9_rgb = "ptbench.data.tbpoc.fold_9_rgb" # hivtb dataset (and cross-validation folds) hivtb_f0 = "ptbench.data.hivtb.fold_0" hivtb_f1 = "ptbench.data.hivtb.fold_1" @@ -282,19 +206,8 @@ hivtb_f6 = "ptbench.data.hivtb.fold_6" hivtb_f7 = "ptbench.data.hivtb.fold_7" hivtb_f8 = "ptbench.data.hivtb.fold_8" hivtb_f9 = "ptbench.data.hivtb.fold_9" -hivtb_f0_rgb = "ptbench.data.hivtb.fold_0_rgb" -hivtb_f1_rgb = "ptbench.data.hivtb.fold_1_rgb" -hivtb_f2_rgb = "ptbench.data.hivtb.fold_2_rgb" -hivtb_f3_rgb = "ptbench.data.hivtb.fold_3_rgb" -hivtb_f4_rgb = "ptbench.data.hivtb.fold_4_rgb" -hivtb_f5_rgb = "ptbench.data.hivtb.fold_5_rgb" -hivtb_f6_rgb = "ptbench.data.hivtb.fold_6_rgb" -hivtb_f7_rgb = "ptbench.data.hivtb.fold_7_rgb" -hivtb_f8_rgb = "ptbench.data.hivtb.fold_8_rgb" -hivtb_f9_rgb = "ptbench.data.hivtb.fold_9_rgb" # montgomery-shenzhen-indian-padchest aggregated dataset mc_ch_in_pc = "ptbench.data.mc_ch_in_pc.default" -mc_ch_in_pc_rgb = "ptbench.data.mc_ch_in_pc.rgb" # NIH CXR14 (relabeled) nih_cxr14 = "ptbench.data.nih_cxr14_re.default" nih_cxr14_cm = "ptbench.data.nih_cxr14_re.cardiomegaly" @@ -304,7 +217,6 @@ nih_cxr14_pc_idiap = "ptbench.data.nih_cxr14_re_pc.idiap" padchest_idiap = "ptbench.data.padchest.idiap" padchest_tb_idiap = "ptbench.data.padchest.tb_idiap" padchest_no_tb_idiap = "ptbench.data.padchest.no_tb_idiap" -padchest_tb_idiap_rgb = "ptbench.data.padchest.tb_idiap_rgb" padchest_cm_idiap = "ptbench.data.padchest.cardiomegaly_idiap" [tool.setuptools] diff --git a/src/ptbench/data/indian/rgb.py b/src/ptbench/data/indian/rgb.py deleted file mode 100644 index f8df027ddf1c67870aa76445932441b1e0c2587d..0000000000000000000000000000000000000000 --- a/src/ptbench/data/indian/rgb.py +++ /dev/null @@ -1,46 +0,0 @@ -# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""Indian dataset for TB detection (default protocol, converted in RGB) - -* Split reference: [INDIAN-2013]_ with 20% of train set for the validation set -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.indian` for dataset details -""" - -from clapper.logging import setup - -from .. import return_subsets -from ..base_datamodule import BaseDataModule -from . import _maker - -logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") - - -class DefaultModule(BaseDataModule): - def __init__( - self, - train_batch_size=1, - predict_batch_size=1, - drop_incomplete_batch=False, - multiproc_kwargs=None, - ): - super().__init__( - train_batch_size=train_batch_size, - predict_batch_size=predict_batch_size, - drop_incomplete_batch=drop_incomplete_batch, - multiproc_kwargs=multiproc_kwargs, - ) - - def setup(self, stage: str): - self.dataset = _maker("default", RGB=True) - ( - self.train_dataset, - self.validation_dataset, - self.extra_validation_datasets, - self.predict_dataset, - ) = return_subsets(self.dataset) - - -datamodule = DefaultModule diff --git a/src/ptbench/data/mc_ch/rgb.py b/src/ptbench/data/mc_ch/rgb.py deleted file mode 100644 index 53778be115606d49140fba2ce63eb12c469ac6d2..0000000000000000000000000000000000000000 --- a/src/ptbench/data/mc_ch/rgb.py +++ /dev/null @@ -1,72 +0,0 @@ -# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""Aggregated dataset composed of Montgomery and Shenzhen (RGB) datasets.""" - -from clapper.logging import setup -from torch.utils.data.dataset import ConcatDataset - -from .. import return_subsets -from ..base_datamodule import BaseDataModule, get_dataset_from_module -from ..montgomery.rgb import datamodule as mc_datamodule -from ..shenzhen.rgb import datamodule as ch_datamodule - -logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") - - -class DefaultModule(BaseDataModule): - def __init__( - self, - train_batch_size=1, - predict_batch_size=1, - drop_incomplete_batch=False, - multiproc_kwargs=None, - ): - self.train_batch_size = train_batch_size - self.predict_batch_size = predict_batch_size - self.drop_incomplete_batch = drop_incomplete_batch - self.multiproc_kwargs = multiproc_kwargs - - super().__init__( - train_batch_size=train_batch_size, - predict_batch_size=predict_batch_size, - drop_incomplete_batch=drop_incomplete_batch, - multiproc_kwargs=multiproc_kwargs, - ) - - def setup(self, stage: str): - # Instantiate other datamodules and get their datasets - module_args = { - "train_batch_size": self.train_batch_size, - "predict_batch_size": self.predict_batch_size, - "drop_incomplete_batch": self.drop_incomplete_batch, - "multiproc_kwargs": self.multiproc_kwargs, - } - - mc = get_dataset_from_module(mc_datamodule, stage, **module_args) - ch = get_dataset_from_module(ch_datamodule, stage, **module_args) - - # Combine datasets - self.dataset = {} - self.dataset["__train__"] = ConcatDataset( - [mc["__train__"], ch["__train__"]] - ) - self.dataset["train"] = ConcatDataset([mc["train"], ch["train"]]) - self.dataset["__valid__"] = ConcatDataset( - [mc["__valid__"], ch["__valid__"]] - ) - self.dataset["validation"] = ConcatDataset( - [mc["validation"], ch["validation"]] - ) - self.dataset["test"] = ConcatDataset([mc["test"], ch["test"]]) - - ( - self.train_dataset, - self.validation_dataset, - self.extra_validation_datasets, - self.predict_dataset, - ) = return_subsets(self.dataset) - - -datamodule = DefaultModule diff --git a/src/ptbench/data/mc_ch_in/rgb.py b/src/ptbench/data/mc_ch_in/rgb.py deleted file mode 100644 index 26c44a2e722d0d83a62f3693768d44ec680ba2e9..0000000000000000000000000000000000000000 --- a/src/ptbench/data/mc_ch_in/rgb.py +++ /dev/null @@ -1,82 +0,0 @@ -# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""Aggregated dataset composed of Montgomery, Shenzhen and Indian (RGB) -datasets.""" - -from clapper.logging import setup -from torch.utils.data.dataset import ConcatDataset - -from .. import return_subsets -from ..base_datamodule import BaseDataModule, get_dataset_from_module -from ..indian.rgb import datamodule as indian_datamodule -from ..montgomery.rgb import datamodule as mc_datamodule -from ..shenzhen.rgb import datamodule as ch_datamodule - -logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") - - -class DefaultModule(BaseDataModule): - def __init__( - self, - train_batch_size=1, - predict_batch_size=1, - drop_incomplete_batch=False, - multiproc_kwargs=None, - ): - self.train_batch_size = train_batch_size - self.predict_batch_size = predict_batch_size - self.drop_incomplete_batch = drop_incomplete_batch - self.multiproc_kwargs = multiproc_kwargs - - super().__init__( - train_batch_size=train_batch_size, - predict_batch_size=predict_batch_size, - drop_incomplete_batch=drop_incomplete_batch, - multiproc_kwargs=multiproc_kwargs, - ) - - def setup(self, stage: str): - # Instantiate other datamodules and get their datasets - - module_args = { - "train_batch_size": self.train_batch_size, - "predict_batch_size": self.predict_batch_size, - "drop_incomplete_batch": self.drop_incomplete_batch, - "multiproc_kwargs": self.multiproc_kwargs, - } - - mc = get_dataset_from_module(mc_datamodule, stage, **module_args) - ch = get_dataset_from_module(ch_datamodule, stage, **module_args) - indian = get_dataset_from_module( - indian_datamodule, stage, **module_args - ) - - # Combine datasets - self.dataset = {} - self.dataset["__train__"] = ConcatDataset( - [mc["__train__"], ch["__train__"], indian["__train__"]] - ) - self.dataset["train"] = ConcatDataset( - [mc["train"], ch["train"], indian["train"]] - ) - self.dataset["__valid__"] = ConcatDataset( - [mc["__valid__"], ch["__valid__"], indian["__valid__"]] - ) - self.dataset["validation"] = ConcatDataset( - [mc["validation"], ch["validation"], indian["validation"]] - ) - self.dataset["test"] = ConcatDataset( - [mc["test"], ch["test"], indian["test"]] - ) - - ( - self.train_dataset, - self.validation_dataset, - self.extra_validation_datasets, - self.predict_dataset, - ) = return_subsets(self.dataset) - - -datamodule = DefaultModule diff --git a/src/ptbench/data/mc_ch_in_11k/rgb.py b/src/ptbench/data/mc_ch_in_11k/rgb.py deleted file mode 100644 index a8482e9802b73bcd175ac7fc55a40a561055d8f0..0000000000000000000000000000000000000000 --- a/src/ptbench/data/mc_ch_in_11k/rgb.py +++ /dev/null @@ -1,101 +0,0 @@ -# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""Aggregated dataset composed of Montgomery, Shenzhen, Indian and the default -TBX11K-simplified datasets (RGB)""" - -from clapper.logging import setup -from torch.utils.data.dataset import ConcatDataset - -from .. import return_subsets -from ..base_datamodule import BaseDataModule, get_dataset_from_module -from ..indian.rgb import datamodule as indian_datamodule -from ..montgomery.rgb import datamodule as mc_datamodule -from ..shenzhen.rgb import datamodule as ch_datamodule -from ..tbx11k_simplified.rgb import datamodule as tbx11k_datamodule - -logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") - - -class DefaultModule(BaseDataModule): - def __init__( - self, - train_batch_size=1, - predict_batch_size=1, - drop_incomplete_batch=False, - multiproc_kwargs=None, - ): - self.train_batch_size = train_batch_size - self.predict_batch_size = predict_batch_size - self.drop_incomplete_batch = drop_incomplete_batch - self.multiproc_kwargs = multiproc_kwargs - - super().__init__( - train_batch_size=train_batch_size, - predict_batch_size=predict_batch_size, - drop_incomplete_batch=drop_incomplete_batch, - multiproc_kwargs=multiproc_kwargs, - ) - - def setup(self, stage: str): - # Instantiate other datamodules and get their datasets - - module_args = { - "train_batch_size": self.train_batch_size, - "predict_batch_size": self.predict_batch_size, - "drop_incomplete_batch": self.drop_incomplete_batch, - "multiproc_kwargs": self.multiproc_kwargs, - } - - mc = get_dataset_from_module(mc_datamodule, stage, **module_args) - ch = get_dataset_from_module(ch_datamodule, stage, **module_args) - indian = get_dataset_from_module( - indian_datamodule, stage, **module_args - ) - tbx11k = get_dataset_from_module( - tbx11k_datamodule, stage, **module_args - ) - - # Combine datasets - self.dataset = {} - self.dataset["__train__"] = ConcatDataset( - [ - mc["__train__"], - ch["__train__"], - indian["__train__"], - tbx11k["__train__"], - ] - ) - self.dataset["train"] = ConcatDataset( - [mc["train"], ch["train"], indian["train"], tbx11k["train"]] - ) - self.dataset["__valid__"] = ConcatDataset( - [ - mc["__valid__"], - ch["__valid__"], - indian["__valid__"], - tbx11k["__valid__"], - ] - ) - self.dataset["validation"] = ConcatDataset( - [ - mc["validation"], - ch["validation"], - indian["validation"], - tbx11k["validation"], - ] - ) - self.dataset["test"] = ConcatDataset( - [mc["test"], ch["test"], indian["test"], tbx11k["test"]] - ) - - ( - self.train_dataset, - self.validation_dataset, - self.extra_validation_datasets, - self.predict_dataset, - ) = return_subsets(self.dataset) - - -datamodule = DefaultModule diff --git a/src/ptbench/data/mc_ch_in_11kv2/rgb.py b/src/ptbench/data/mc_ch_in_11kv2/rgb.py deleted file mode 100644 index c99b8fe902afb572dd3dae0dd2766b8fd9d961cb..0000000000000000000000000000000000000000 --- a/src/ptbench/data/mc_ch_in_11kv2/rgb.py +++ /dev/null @@ -1,101 +0,0 @@ -# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""Aggregated dataset composed of Montgomery, Shenzhen, Indian and the default -TBX11K-simplified datasets (RGB)""" - -from clapper.logging import setup -from torch.utils.data.dataset import ConcatDataset - -from .. import return_subsets -from ..base_datamodule import BaseDataModule, get_dataset_from_module -from ..indian.rgb import datamodule as indian_datamodule -from ..montgomery.rgb import datamodule as mc_datamodule -from ..shenzhen.rgb import datamodule as ch_datamodule -from ..tbx11k_simplified_v2.rgb import datamodule as tbx11kv2_datamodule - -logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") - - -class DefaultModule(BaseDataModule): - def __init__( - self, - train_batch_size=1, - predict_batch_size=1, - drop_incomplete_batch=False, - multiproc_kwargs=None, - ): - self.train_batch_size = train_batch_size - self.predict_batch_size = predict_batch_size - self.drop_incomplete_batch = drop_incomplete_batch - self.multiproc_kwargs = multiproc_kwargs - - super().__init__( - train_batch_size=train_batch_size, - predict_batch_size=predict_batch_size, - drop_incomplete_batch=drop_incomplete_batch, - multiproc_kwargs=multiproc_kwargs, - ) - - def setup(self, stage: str): - # Instantiate other datamodules and get their datasets - - module_args = { - "train_batch_size": self.train_batch_size, - "predict_batch_size": self.predict_batch_size, - "drop_incomplete_batch": self.drop_incomplete_batch, - "multiproc_kwargs": self.multiproc_kwargs, - } - - mc = get_dataset_from_module(mc_datamodule, stage, **module_args) - ch = get_dataset_from_module(ch_datamodule, stage, **module_args) - indian = get_dataset_from_module( - indian_datamodule, stage, **module_args - ) - tbx11kv2 = get_dataset_from_module( - tbx11kv2_datamodule, stage, **module_args - ) - - # Combine datasets - self.dataset = {} - self.dataset["__train__"] = ConcatDataset( - [ - mc["__train__"], - ch["__train__"], - indian["__train__"], - tbx11kv2["__train__"], - ] - ) - self.dataset["train"] = ConcatDataset( - [mc["train"], ch["train"], indian["train"], tbx11kv2["train"]] - ) - self.dataset["__valid__"] = ConcatDataset( - [ - mc["__valid__"], - ch["__valid__"], - indian["__valid__"], - tbx11kv2["__valid__"], - ] - ) - self.dataset["validation"] = ConcatDataset( - [ - mc["validation"], - ch["validation"], - indian["validation"], - tbx11kv2["validation"], - ] - ) - self.dataset["test"] = ConcatDataset( - [mc["test"], ch["test"], indian["test"], tbx11kv2["test"]] - ) - - ( - self.train_dataset, - self.validation_dataset, - self.extra_validation_datasets, - self.predict_dataset, - ) = return_subsets(self.dataset) - - -datamodule = DefaultModule diff --git a/src/ptbench/data/mc_ch_in_pc/rgb.py b/src/ptbench/data/mc_ch_in_pc/rgb.py deleted file mode 100644 index 47b6845d00b5330a4ca140d26e0ae2d2ef357fe4..0000000000000000000000000000000000000000 --- a/src/ptbench/data/mc_ch_in_pc/rgb.py +++ /dev/null @@ -1,91 +0,0 @@ -# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""Aggregated dataset composed of Montgomery, Shenzhen, Indian and Padchest -(RGB) datasets.""" - -from clapper.logging import setup -from torch.utils.data.dataset import ConcatDataset - -from .. import return_subsets -from ..base_datamodule import BaseDataModule, get_dataset_from_module -from ..indian.rgb import datamodule as indian_datamodule -from ..montgomery.rgb import datamodule as mc_datamodule -from ..padchest.tb_idiap_rgb import datamodule as pc_datamodule -from ..shenzhen.rgb import datamodule as ch_datamodule - -logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") - - -class DefaultModule(BaseDataModule): - def __init__( - self, - train_batch_size=1, - predict_batch_size=1, - drop_incomplete_batch=False, - multiproc_kwargs=None, - ): - self.train_batch_size = train_batch_size - self.predict_batch_size = predict_batch_size - self.drop_incomplete_batch = drop_incomplete_batch - self.multiproc_kwargs = multiproc_kwargs - - super().__init__( - train_batch_size=train_batch_size, - predict_batch_size=predict_batch_size, - drop_incomplete_batch=drop_incomplete_batch, - multiproc_kwargs=multiproc_kwargs, - ) - - def setup(self, stage: str): - # Instantiate other datamodules and get their datasets - - module_args = { - "train_batch_size": self.train_batch_size, - "predict_batch_size": self.predict_batch_size, - "drop_incomplete_batch": self.drop_incomplete_batch, - "multiproc_kwargs": self.multiproc_kwargs, - } - - mc = get_dataset_from_module(mc_datamodule, stage, **module_args) - ch = get_dataset_from_module(ch_datamodule, stage, **module_args) - indian = get_dataset_from_module( - indian_datamodule, stage, **module_args - ) - pc = get_dataset_from_module(pc_datamodule, stage, **module_args) - - # Combine datasets - self.dataset = {} - self.dataset["__train__"] = ConcatDataset( - [ - mc["__train__"], - ch["__train__"], - indian["__train__"], - pc["__train__"], - ] - ) - self.dataset["train"] = ConcatDataset( - [mc["train"], ch["train"], indian["train"], pc["train"]] - ) - self.dataset["__valid__"] = ConcatDataset( - [ - mc["__valid__"], - ch["__valid__"], - indian["__valid__"], - pc["__valid__"], - ] - ) - self.dataset["test"] = ConcatDataset( - [mc["test"], ch["test"], indian["test"], pc["test"]] - ) - - ( - self.train_dataset, - self.validation_dataset, - self.extra_validation_datasets, - self.predict_dataset, - ) = return_subsets(self.dataset) - - -datamodule = DefaultModule diff --git a/src/ptbench/data/padchest/tb_idiap_rgb.py b/src/ptbench/data/padchest/tb_idiap_rgb.py deleted file mode 100644 index f13876f725a0610ca2babbadb4011d56c2b8599f..0000000000000000000000000000000000000000 --- a/src/ptbench/data/padchest/tb_idiap_rgb.py +++ /dev/null @@ -1,50 +0,0 @@ -# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""Padchest tuberculosis (idiap protocol, rgb) dataset for computer-aided -diagnosis. - -The 125 healthy images are the first 125 padchest images with the following -parameters: Label = "Normal", MethodLabel = "Physician", Projection = "PA" - -* Split reference: first 80% of TB and healthy CXR for "train", rest for "test" -* See :py:mod:`ptbench.data.padchest` for dataset details -* This configuration resolution: 224 x 224 (default) -""" - -from clapper.logging import setup - -from .. import return_subsets -from ..base_datamodule import BaseDataModule -from . import _maker - -logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") - - -class DefaultModule(BaseDataModule): - def __init__( - self, - train_batch_size=1, - predict_batch_size=1, - drop_incomplete_batch=False, - multiproc_kwargs=None, - ): - super().__init__( - train_batch_size=train_batch_size, - predict_batch_size=predict_batch_size, - drop_incomplete_batch=drop_incomplete_batch, - multiproc_kwargs=multiproc_kwargs, - ) - - def setup(self, stage: str): - self.dataset = _maker("tb_idiap", resize_size=256, cc_size=224) - ( - self.train_dataset, - self.validation_dataset, - self.extra_validation_datasets, - self.predict_dataset, - ) = return_subsets(self.dataset) - - -datamodule = DefaultModule diff --git a/src/ptbench/data/tbx11k_simplified/rgb.py b/src/ptbench/data/tbx11k_simplified/rgb.py deleted file mode 100644 index 4a0b72fbf2d4d5641275353eb75b59817e0dfd7b..0000000000000000000000000000000000000000 --- a/src/ptbench/data/tbx11k_simplified/rgb.py +++ /dev/null @@ -1,49 +0,0 @@ -# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""TBX11k simplified dataset for TB detection (default protocol) - -* Split reference: first 62.5% of TB and healthy CXR for "train" 15.9% for -* "validation", 21.6% for "test" -* This split only consists of healthy and active TB samples -* "Latent TB" or "sick & non-TB" samples are not included in this configuration -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.tbx11k_simplified` for dataset details -""" - -from clapper.logging import setup - -from .. import return_subsets -from ..base_datamodule import BaseDataModule -from . import _maker - -logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") - - -class DefaultModule(BaseDataModule): - def __init__( - self, - train_batch_size=1, - predict_batch_size=1, - drop_incomplete_batch=False, - multiproc_kwargs=None, - ): - super().__init__( - train_batch_size=train_batch_size, - predict_batch_size=predict_batch_size, - drop_incomplete_batch=drop_incomplete_batch, - multiproc_kwargs=multiproc_kwargs, - ) - - def setup(self, stage: str): - self.dataset = _maker("default", RGB=True) - ( - self.train_dataset, - self.validation_dataset, - self.extra_validation_datasets, - self.predict_dataset, - ) = return_subsets(self.dataset) - - -datamodule = DefaultModule diff --git a/src/ptbench/data/tbx11k_simplified_v2/rgb.py b/src/ptbench/data/tbx11k_simplified_v2/rgb.py deleted file mode 100644 index 13ac3a9f9cbdbbdb55c43026e54692909ac8299c..0000000000000000000000000000000000000000 --- a/src/ptbench/data/tbx11k_simplified_v2/rgb.py +++ /dev/null @@ -1,50 +0,0 @@ -# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""TBX11k simplified dataset for TB detection (default protocol, converted in -RGB) - -* Split reference: first 62.6% of CXR for "train", 16% for "validation", -* 21.4% for "test" -* This split consists of non-TB and active TB samples -* "healthy", "latent TB", and "sick & non-TB" samples are all merged under the label "non-TB" -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.tbx11k_simplified_v2` for dataset details -""" - -from clapper.logging import setup - -from .. import return_subsets -from ..base_datamodule import BaseDataModule -from . import _maker - -logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") - - -class DefaultModule(BaseDataModule): - def __init__( - self, - train_batch_size=1, - predict_batch_size=1, - drop_incomplete_batch=False, - multiproc_kwargs=None, - ): - super().__init__( - train_batch_size=train_batch_size, - predict_batch_size=predict_batch_size, - drop_incomplete_batch=drop_incomplete_batch, - multiproc_kwargs=multiproc_kwargs, - ) - - def setup(self, stage: str): - self.dataset = _maker("default", RGB=True) - ( - self.train_dataset, - self.validation_dataset, - self.extra_validation_datasets, - self.predict_dataset, - ) = return_subsets(self.dataset) - - -datamodule = DefaultModule