diff --git a/pyproject.toml b/pyproject.toml index e478819245b56d510124f3958ee177765353e219..5fc0981beca79e63840a613fd2cb1d088aa27af2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -476,11 +476,11 @@ nih_cxr14_cm = "ptbench.configs.datasets.nih_cxr14_re.cardiomegaly" # NIH CXR14 / PadChest aggregated dataset nih_cxr14_pc_idiap = "ptbench.configs.datasets.nih_cxr14_re_pc.idiap" # PadChest -padchest_idiap = "ptbench.configs.datasets.padchest.idiap" -padchest_tb_idiap = "ptbench.configs.datasets.padchest.tb_idiap" -padchest_no_tb_idiap = "ptbench.configs.datasets.padchest.no_tb_idiap" -padchest_tb_idiap_rgb = "ptbench.configs.datasets.padchest.tb_idiap_rgb" -padchest_cm_idiap = "ptbench.configs.datasets.padchest.cardiomegaly_idiap" +padchest_idiap = "ptbench.data.padchest.idiap" +padchest_tb_idiap = "ptbench.data.padchest.tb_idiap" +padchest_no_tb_idiap = "ptbench.data.padchest.no_tb_idiap" +padchest_tb_idiap_rgb = "ptbench.data.padchest.tb_idiap_rgb" +padchest_cm_idiap = "ptbench.data.padchest.cardiomegaly_idiap" # extended PadChestTB dataset (with radiological signs) padchest_tb_idiap_rs = "ptbench.configs.datasets.padchest_RS.tb_idiap" diff --git a/src/ptbench/configs/datasets/padchest/__init__.py b/src/ptbench/configs/datasets/padchest/__init__.py deleted file mode 100644 index 4ae4ebfb139353d7740cc49c9a741b07cbc20757..0000000000000000000000000000000000000000 --- a/src/ptbench/configs/datasets/padchest/__init__.py +++ /dev/null @@ -1,26 +0,0 @@ -# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - - -def _maker(protocol, resize_size=512, cc_size=512, RGB=True): - import torchvision.transforms as transforms - - from ....data.padchest import dataset as raw - from ....data.transforms import SingleAutoLevel16to8 - from .. import make_dataset as mk - - post_transforms = [] - if not RGB: - post_transforms = [transforms.Lambda(lambda x: x.convert("L"))] - - return mk( - [raw.subsets(protocol)], - [ - SingleAutoLevel16to8(), - transforms.Resize(resize_size), - transforms.CenterCrop(cc_size), - ], - [transforms.RandomHorizontalFlip()], - post_transforms, - ) diff --git a/src/ptbench/configs/datasets/padchest/cardiomegaly_idiap.py b/src/ptbench/configs/datasets/padchest/cardiomegaly_idiap.py deleted file mode 100644 index a12a6b402f29570d800ed347d62a13161cd174b1..0000000000000000000000000000000000000000 --- a/src/ptbench/configs/datasets/padchest/cardiomegaly_idiap.py +++ /dev/null @@ -1,17 +0,0 @@ -# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""Padchest cardiomegaly (idiap protocol) dataset for computer-aided diagnosis. - -The first 40 images with cardiomegaly. -parameters: Label = "Normal", MethodLabel = "Physician", Projection = "PA" - -* Split reference: first 100% of cardiomegaly for "train" -* See :py:mod:`ptbench.data.padchest` for dataset details -* This configuration resolution: 512 x 512 (default) -""" - -from . import _maker - -dataset = _maker("cardiomegaly_idiap", RGB=False) diff --git a/src/ptbench/configs/datasets/padchest/idiap.py b/src/ptbench/configs/datasets/padchest/idiap.py deleted file mode 100644 index b6ba25a3b287164bf45c1cdd4fecf43821ad2f33..0000000000000000000000000000000000000000 --- a/src/ptbench/configs/datasets/padchest/idiap.py +++ /dev/null @@ -1,13 +0,0 @@ -# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""Padchest (idiap protocol) dataset for computer-aided diagnosis. - -* See :py:mod:`ptbench.data.padchest` for dataset details -* This configuration resolution: 512 x 512 (default) -""" - -from . import _maker - -dataset = _maker("idiap") diff --git a/src/ptbench/configs/datasets/padchest/no_tb_idiap.py b/src/ptbench/configs/datasets/padchest/no_tb_idiap.py deleted file mode 100644 index 333b95a878d28ec291d99c0a3ed3ca17b735d191..0000000000000000000000000000000000000000 --- a/src/ptbench/configs/datasets/padchest/no_tb_idiap.py +++ /dev/null @@ -1,24 +0,0 @@ -# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""Padchest tuberculosis (no TB idiap protocol) dataset for computer-aided -diagnosis. - -* Protocol ``no TB idiap``: - - * Training samples: 20'126 - * Validation samples: 1'500 - * Test samples: 0 - -* Images path adapted to Idiap infrastructure - -* Labels: - cardiomegaly, emphysema, effusion, hernia, infiltration, - mass, nodule, atelectasis, pneumothorax, pleural thickening, pneumonia, - fibrosis, edema and consolidation -""" - -from . import _maker - -dataset = _maker("no_tb_idiap") diff --git a/src/ptbench/configs/datasets/padchest/tb_idiap.py b/src/ptbench/configs/datasets/padchest/tb_idiap.py deleted file mode 100644 index fd3690a57c5d20e7a7c9e0f8f255c1af76362d5d..0000000000000000000000000000000000000000 --- a/src/ptbench/configs/datasets/padchest/tb_idiap.py +++ /dev/null @@ -1,17 +0,0 @@ -# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""Padchest tuberculosis (idiap protocol) dataset for computer-aided diagnosis. - -The 125 healthy images are the first 125 padchest images with the following -parameters: Label = "Normal", MethodLabel = "Physician", Projection = "PA" - -* Split reference: first 80% of TB and healthy CXR for "train", rest for "test" -* See :py:mod:`ptbench.data.padchest` for dataset details -* This configuration resolution: 512 x 512 (default) -""" - -from . import _maker - -dataset = _maker("tb_idiap", RGB=False) diff --git a/src/ptbench/configs/datasets/padchest/tb_idiap_rgb.py b/src/ptbench/configs/datasets/padchest/tb_idiap_rgb.py deleted file mode 100644 index 44bc0d9abf33c563dc1f2fc2daeb68ad7bd06283..0000000000000000000000000000000000000000 --- a/src/ptbench/configs/datasets/padchest/tb_idiap_rgb.py +++ /dev/null @@ -1,18 +0,0 @@ -# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""Padchest tuberculosis (idiap protocol, rgb) dataset for computer-aided -diagnosis. - -The 125 healthy images are the first 125 padchest images with the following -parameters: Label = "Normal", MethodLabel = "Physician", Projection = "PA" - -* Split reference: first 80% of TB and healthy CXR for "train", rest for "test" -* See :py:mod:`ptbench.data.padchest` for dataset details -* This configuration resolution: 224 x 224 (default) -""" - -from . import _maker - -dataset = _maker("tb_idiap", resize_size=256, cc_size=224) diff --git a/src/ptbench/data/padchest/__init__.py b/src/ptbench/data/padchest/__init__.py index e52b6dd6f44ea7077faf8214bb3319a2917f5b4e..af1dd3ec9f8cbaa1b3e32f5195363592602d94ac 100644 --- a/src/ptbench/data/padchest/__init__.py +++ b/src/ptbench/data/padchest/__init__.py @@ -226,6 +226,7 @@ import importlib.resources import os from ...utils.rc import load_rc +from .. import make_dataset from ..dataset import JSONDataset from ..loader import load_pil, make_delayed @@ -252,9 +253,30 @@ def _loader(context, sample): return make_delayed(sample, _raw_data_loader) -dataset = JSONDataset( +json_dataset = JSONDataset( protocols=_protocols, fieldnames=("data", "label"), loader=_loader, ) """Padchest dataset object.""" + + +def _maker(protocol, resize_size=512, cc_size=512, RGB=True): + import torchvision.transforms as transforms + + from ..transforms import SingleAutoLevel16to8 + + post_transforms = [] + if not RGB: + post_transforms = [transforms.Lambda(lambda x: x.convert("L"))] + + return make_dataset( + [json_dataset.subsets(protocol)], + [ + SingleAutoLevel16to8(), + transforms.Resize(resize_size), + transforms.CenterCrop(cc_size), + ], + [transforms.RandomHorizontalFlip()], + post_transforms, + ) diff --git a/src/ptbench/data/padchest/cardiomegaly_idiap.py b/src/ptbench/data/padchest/cardiomegaly_idiap.py new file mode 100644 index 0000000000000000000000000000000000000000..25eaa6e8fd0ba15db3a46f4d14366bcf5bc91667 --- /dev/null +++ b/src/ptbench/data/padchest/cardiomegaly_idiap.py @@ -0,0 +1,49 @@ +# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> +# +# SPDX-License-Identifier: GPL-3.0-or-later + +"""Padchest cardiomegaly (idiap protocol) dataset for computer-aided diagnosis. + +The first 40 images with cardiomegaly. +parameters: Label = "Normal", MethodLabel = "Physician", Projection = "PA" + +* Split reference: first 100% of cardiomegaly for "train" +* See :py:mod:`ptbench.data.padchest` for dataset details +* This configuration resolution: 512 x 512 (default) +""" + +from clapper.logging import setup + +from .. import return_subsets +from ..base_datamodule import BaseDataModule +from . import _maker + +logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") + + +class DefaultModule(BaseDataModule): + def __init__( + self, + train_batch_size=1, + predict_batch_size=1, + drop_incomplete_batch=False, + multiproc_kwargs=None, + ): + super().__init__( + train_batch_size=train_batch_size, + predict_batch_size=predict_batch_size, + drop_incomplete_batch=drop_incomplete_batch, + multiproc_kwargs=multiproc_kwargs, + ) + + def setup(self, stage: str): + self.dataset = _maker("cardiomegaly_idiap", RGB=False) + ( + self.train_dataset, + self.validation_dataset, + self.extra_validation_datasets, + self.predict_dataset, + ) = return_subsets(self.dataset) + + +datamodule = DefaultModule diff --git a/src/ptbench/data/padchest/idiap.py b/src/ptbench/data/padchest/idiap.py new file mode 100644 index 0000000000000000000000000000000000000000..d91b552f39123cfc6e808dfe3bc8bab097448497 --- /dev/null +++ b/src/ptbench/data/padchest/idiap.py @@ -0,0 +1,45 @@ +# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> +# +# SPDX-License-Identifier: GPL-3.0-or-later + +"""Padchest (idiap protocol) dataset for computer-aided diagnosis. + +* See :py:mod:`ptbench.data.padchest` for dataset details +* This configuration resolution: 512 x 512 (default) +""" + +from clapper.logging import setup + +from .. import return_subsets +from ..base_datamodule import BaseDataModule +from . import _maker + +logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") + + +class DefaultModule(BaseDataModule): + def __init__( + self, + train_batch_size=1, + predict_batch_size=1, + drop_incomplete_batch=False, + multiproc_kwargs=None, + ): + super().__init__( + train_batch_size=train_batch_size, + predict_batch_size=predict_batch_size, + drop_incomplete_batch=drop_incomplete_batch, + multiproc_kwargs=multiproc_kwargs, + ) + + def setup(self, stage: str): + self.dataset = _maker("idiap") + ( + self.train_dataset, + self.validation_dataset, + self.extra_validation_datasets, + self.predict_dataset, + ) = return_subsets(self.dataset) + + +datamodule = DefaultModule diff --git a/src/ptbench/data/padchest/no_tb_idiap.py b/src/ptbench/data/padchest/no_tb_idiap.py new file mode 100644 index 0000000000000000000000000000000000000000..78155abf3c5a44de88920da1833ddfa09b6bb244 --- /dev/null +++ b/src/ptbench/data/padchest/no_tb_idiap.py @@ -0,0 +1,56 @@ +# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> +# +# SPDX-License-Identifier: GPL-3.0-or-later + +"""Padchest tuberculosis (no TB idiap protocol) dataset for computer-aided +diagnosis. + +* Protocol ``no TB idiap``: + + * Training samples: 20'126 + * Validation samples: 1'500 + * Test samples: 0 + +* Images path adapted to Idiap infrastructure + +* Labels: + cardiomegaly, emphysema, effusion, hernia, infiltration, + mass, nodule, atelectasis, pneumothorax, pleural thickening, pneumonia, + fibrosis, edema and consolidation +""" + +from clapper.logging import setup + +from .. import return_subsets +from ..base_datamodule import BaseDataModule +from . import _maker + +logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") + + +class DefaultModule(BaseDataModule): + def __init__( + self, + train_batch_size=1, + predict_batch_size=1, + drop_incomplete_batch=False, + multiproc_kwargs=None, + ): + super().__init__( + train_batch_size=train_batch_size, + predict_batch_size=predict_batch_size, + drop_incomplete_batch=drop_incomplete_batch, + multiproc_kwargs=multiproc_kwargs, + ) + + def setup(self, stage: str): + self.dataset = _maker("no_tb_idiap") + ( + self.train_dataset, + self.validation_dataset, + self.extra_validation_datasets, + self.predict_dataset, + ) = return_subsets(self.dataset) + + +datamodule = DefaultModule diff --git a/src/ptbench/data/padchest/tb_idiap.py b/src/ptbench/data/padchest/tb_idiap.py new file mode 100644 index 0000000000000000000000000000000000000000..51428935d728d243b34eec4e54c2d1fcc911224b --- /dev/null +++ b/src/ptbench/data/padchest/tb_idiap.py @@ -0,0 +1,49 @@ +# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> +# +# SPDX-License-Identifier: GPL-3.0-or-later + +"""Padchest tuberculosis (idiap protocol) dataset for computer-aided diagnosis. + +The 125 healthy images are the first 125 padchest images with the following +parameters: Label = "Normal", MethodLabel = "Physician", Projection = "PA" + +* Split reference: first 80% of TB and healthy CXR for "train", rest for "test" +* See :py:mod:`ptbench.data.padchest` for dataset details +* This configuration resolution: 512 x 512 (default) +""" + +from clapper.logging import setup + +from .. import return_subsets +from ..base_datamodule import BaseDataModule +from . import _maker + +logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") + + +class DefaultModule(BaseDataModule): + def __init__( + self, + train_batch_size=1, + predict_batch_size=1, + drop_incomplete_batch=False, + multiproc_kwargs=None, + ): + super().__init__( + train_batch_size=train_batch_size, + predict_batch_size=predict_batch_size, + drop_incomplete_batch=drop_incomplete_batch, + multiproc_kwargs=multiproc_kwargs, + ) + + def setup(self, stage: str): + self.dataset = _maker("tb_idiap", RGB=False) + ( + self.train_dataset, + self.validation_dataset, + self.extra_validation_datasets, + self.predict_dataset, + ) = return_subsets(self.dataset) + + +datamodule = DefaultModule diff --git a/src/ptbench/data/padchest/tb_idiap_rgb.py b/src/ptbench/data/padchest/tb_idiap_rgb.py new file mode 100644 index 0000000000000000000000000000000000000000..f13876f725a0610ca2babbadb4011d56c2b8599f --- /dev/null +++ b/src/ptbench/data/padchest/tb_idiap_rgb.py @@ -0,0 +1,50 @@ +# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> +# +# SPDX-License-Identifier: GPL-3.0-or-later + +"""Padchest tuberculosis (idiap protocol, rgb) dataset for computer-aided +diagnosis. + +The 125 healthy images are the first 125 padchest images with the following +parameters: Label = "Normal", MethodLabel = "Physician", Projection = "PA" + +* Split reference: first 80% of TB and healthy CXR for "train", rest for "test" +* See :py:mod:`ptbench.data.padchest` for dataset details +* This configuration resolution: 224 x 224 (default) +""" + +from clapper.logging import setup + +from .. import return_subsets +from ..base_datamodule import BaseDataModule +from . import _maker + +logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") + + +class DefaultModule(BaseDataModule): + def __init__( + self, + train_batch_size=1, + predict_batch_size=1, + drop_incomplete_batch=False, + multiproc_kwargs=None, + ): + super().__init__( + train_batch_size=train_batch_size, + predict_batch_size=predict_batch_size, + drop_incomplete_batch=drop_incomplete_batch, + multiproc_kwargs=multiproc_kwargs, + ) + + def setup(self, stage: str): + self.dataset = _maker("tb_idiap", resize_size=256, cc_size=224) + ( + self.train_dataset, + self.validation_dataset, + self.extra_validation_datasets, + self.predict_dataset, + ) = return_subsets(self.dataset) + + +datamodule = DefaultModule