diff --git a/pyproject.toml b/pyproject.toml index a9b1abf1f2b2b220a92d517f61cdf29d456bbde7..e43899d2dbad33e10c8b4eb67a333f12547aca55 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -471,10 +471,10 @@ mc_ch_in_pc_rgb = "ptbench.data.mc_ch_in_pc.rgb" # (with radiological signs) mc_ch_in_pc_rs = "ptbench.configs.datasets.mc_ch_in_pc_RS.default" # NIH CXR14 (relabeled) -nih_cxr14 = "ptbench.configs.datasets.nih_cxr14_re.default" -nih_cxr14_cm = "ptbench.configs.datasets.nih_cxr14_re.cardiomegaly" +nih_cxr14 = "ptbench.data.nih_cxr14_re.default" +nih_cxr14_cm = "ptbench.data.nih_cxr14_re.cardiomegaly" # NIH CXR14 / PadChest aggregated dataset -nih_cxr14_pc_idiap = "ptbench.configs.datasets.nih_cxr14_re_pc.idiap" +nih_cxr14_pc_idiap = "ptbench.data.nih_cxr14_re_pc.idiap" # PadChest padchest_idiap = "ptbench.data.padchest.idiap" padchest_tb_idiap = "ptbench.data.padchest.tb_idiap" diff --git a/src/ptbench/configs/datasets/nih_cxr14_re/__init__.py b/src/ptbench/configs/datasets/nih_cxr14_re/__init__.py deleted file mode 100644 index d0e7117c0ef42e4b84eae245d5982cc595a4c2f0..0000000000000000000000000000000000000000 --- a/src/ptbench/configs/datasets/nih_cxr14_re/__init__.py +++ /dev/null @@ -1,22 +0,0 @@ -# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - - -def _maker(protocol, size=512): - import torchvision.transforms as transforms - - from ....data.nih_cxr14_re import dataset as raw - from .. import make_dataset as mk - - # ImageNet normalization - normalize = transforms.Normalize( - mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225] - ) - - return mk( - [raw.subsets(protocol)], - [transforms.Resize((size, size))], - [transforms.RandomHorizontalFlip()], - [transforms.ToTensor(), normalize], - ) diff --git a/src/ptbench/configs/datasets/nih_cxr14_re/cardiomegaly.py b/src/ptbench/configs/datasets/nih_cxr14_re/cardiomegaly.py deleted file mode 100644 index 0a63a6fd7b00888d323b2fc6a8f68bfcd0a69f83..0000000000000000000000000000000000000000 --- a/src/ptbench/configs/datasets/nih_cxr14_re/cardiomegaly.py +++ /dev/null @@ -1,16 +0,0 @@ -# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""NIH CXR14 dataset for computer-aided diagnosis. - -First 40 images with cardiomegaly. - -* See :py:mod:`ptbench.data.nih_cxr14_re` for split details -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.nih_cxr14_re` for dataset details -""" - -from . import _maker - -dataset = _maker("cardiomegaly") diff --git a/src/ptbench/configs/datasets/nih_cxr14_re/default.py b/src/ptbench/configs/datasets/nih_cxr14_re/default.py deleted file mode 100644 index c1e472d0bd71c0501f6040116fa13e1310544c0e..0000000000000000000000000000000000000000 --- a/src/ptbench/configs/datasets/nih_cxr14_re/default.py +++ /dev/null @@ -1,15 +0,0 @@ -# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""NIH CXR14 (relabeled) dataset for computer-aided diagnosis (default -protocol) - -* See :py:mod:`ptbench.data.nih_cxr14_re` for split details -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.nih_cxr14_re` for dataset details -""" - -from . import _maker - -dataset = _maker("default") diff --git a/src/ptbench/data/nih_cxr14_re/__init__.py b/src/ptbench/data/nih_cxr14_re/__init__.py index 8dc3f2232bbe38f9f9219045a10534124830471a..27d1903c5a25a1ccc99520867ad407b3186a3694 100644 --- a/src/ptbench/data/nih_cxr14_re/__init__.py +++ b/src/ptbench/data/nih_cxr14_re/__init__.py @@ -72,9 +72,27 @@ def _loader(context, sample): return make_delayed(sample, _raw_data_loader) -dataset = JSONDataset( +json_dataset = JSONDataset( protocols=_protocols, fieldnames=("data", "label"), loader=_loader, ) """NIH CXR14 (relabeled) dataset object.""" + + +def _maker(protocol, size=512): + import torchvision.transforms as transforms + + from .. import make_dataset + + # ImageNet normalization + normalize = transforms.Normalize( + mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225] + ) + + return make_dataset( + [json_dataset.subsets(protocol)], + [transforms.Resize((size, size))], + [transforms.RandomHorizontalFlip()], + [transforms.ToTensor(), normalize], + ) diff --git a/src/ptbench/data/nih_cxr14_re/cardiomegaly.py b/src/ptbench/data/nih_cxr14_re/cardiomegaly.py new file mode 100644 index 0000000000000000000000000000000000000000..1904ebfa60dade4ff59f770da7f1310a099c798b --- /dev/null +++ b/src/ptbench/data/nih_cxr14_re/cardiomegaly.py @@ -0,0 +1,48 @@ +# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> +# +# SPDX-License-Identifier: GPL-3.0-or-later + +"""NIH CXR14 dataset for computer-aided diagnosis. + +First 40 images with cardiomegaly. + +* See :py:mod:`ptbench.data.nih_cxr14_re` for split details +* This configuration resolution: 512 x 512 (default) +* See :py:mod:`ptbench.data.nih_cxr14_re` for dataset details +""" + +from clapper.logging import setup + +from .. import return_subsets +from ..base_datamodule import BaseDataModule +from . import _maker + +logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") + + +class Fold0Module(BaseDataModule): + def __init__( + self, + train_batch_size=1, + predict_batch_size=1, + drop_incomplete_batch=False, + multiproc_kwargs=None, + ): + super().__init__( + train_batch_size=train_batch_size, + predict_batch_size=predict_batch_size, + drop_incomplete_batch=drop_incomplete_batch, + multiproc_kwargs=multiproc_kwargs, + ) + + def setup(self, stage: str): + self.dataset = _maker("cardiomegaly") + ( + self.train_dataset, + self.validation_dataset, + self.extra_validation_datasets, + self.predict_dataset, + ) = return_subsets(self.dataset) + + +datamodule = Fold0Module diff --git a/src/ptbench/data/nih_cxr14_re/default.py b/src/ptbench/data/nih_cxr14_re/default.py new file mode 100644 index 0000000000000000000000000000000000000000..0ea6ef5acc55560ae8db115f3585b40da3cf58b8 --- /dev/null +++ b/src/ptbench/data/nih_cxr14_re/default.py @@ -0,0 +1,47 @@ +# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> +# +# SPDX-License-Identifier: GPL-3.0-or-later + +"""NIH CXR14 (relabeled) dataset for computer-aided diagnosis (default +protocol) + +* See :py:mod:`ptbench.data.nih_cxr14_re` for split details +* This configuration resolution: 512 x 512 (default) +* See :py:mod:`ptbench.data.nih_cxr14_re` for dataset details +""" + +from clapper.logging import setup + +from .. import return_subsets +from ..base_datamodule import BaseDataModule +from . import _maker + +logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") + + +class DefaultModule(BaseDataModule): + def __init__( + self, + train_batch_size=1, + predict_batch_size=1, + drop_incomplete_batch=False, + multiproc_kwargs=None, + ): + super().__init__( + train_batch_size=train_batch_size, + predict_batch_size=predict_batch_size, + drop_incomplete_batch=drop_incomplete_batch, + multiproc_kwargs=multiproc_kwargs, + ) + + def setup(self, stage: str): + self.dataset = _maker("default") + ( + self.train_dataset, + self.validation_dataset, + self.extra_validation_datasets, + self.predict_dataset, + ) = return_subsets(self.dataset) + + +datamodule = DefaultModule