From d6cbf9b1095a5f73dac6f99a4f93259ee3828add Mon Sep 17 00:00:00 2001 From: Andre Anjos <andre.dos.anjos@gmail.com> Date: Wed, 2 Aug 2023 19:49:22 +0200 Subject: [PATCH] [data.hivtb] Minor adjustments --- src/ptbench/data/hivtb/datamodule.py | 18 ++++++++++-------- src/ptbench/data/hivtb/fold_0.py | 16 +++------------- src/ptbench/data/hivtb/fold_1.py | 16 +++------------- src/ptbench/data/hivtb/fold_2.py | 16 +++------------- src/ptbench/data/hivtb/fold_3.py | 16 +++------------- src/ptbench/data/hivtb/fold_4.py | 16 +++------------- src/ptbench/data/hivtb/fold_5.py | 16 +++------------- src/ptbench/data/hivtb/fold_6.py | 16 +++------------- src/ptbench/data/hivtb/fold_7.py | 16 +++------------- src/ptbench/data/hivtb/fold_8.py | 16 +++------------- src/ptbench/data/hivtb/fold_9.py | 16 +++------------- 11 files changed, 40 insertions(+), 138 deletions(-) diff --git a/src/ptbench/data/hivtb/datamodule.py b/src/ptbench/data/hivtb/datamodule.py index 63075c61..b5b84ec4 100644 --- a/src/ptbench/data/hivtb/datamodule.py +++ b/src/ptbench/data/hivtb/datamodule.py @@ -11,7 +11,7 @@ from torchvision.transforms.functional import center_crop, to_tensor from ...utils.rc import load_rc from ..datamodule import CachingDataModule -from ..image_utils import load_pil_grayscale, remove_black_borders +from ..image_utils import remove_black_borders from ..split import JSONDatabaseSplit from ..typing import DatabaseSplit from ..typing import RawDataLoader as _BaseRawDataLoader @@ -54,7 +54,9 @@ class RawDataLoader(_BaseRawDataLoader): sample The sample representation """ - image = load_pil_grayscale(os.path.join(self.datadir, sample[0])) + image = PIL.Image.open(os.path.join(self.datadir, sample[0])).convert( + "L" + ) image = remove_black_borders(image) tensor = to_tensor(image) tensor = center_crop(tensor, min(*tensor.shape[1:])) @@ -99,21 +101,21 @@ class DataModule(CachingDataModule): """HIV-TB dataset for computer-aided diagnosis (only BMP files) * Database reference: [HIV-TB-2019]_ - * Original resolution (height x width or width x height): 2048 x 2500 pixels - or 2500 x 2048 pixels - + * Original resolution, varying with most images being 2048 x 2500 pixels + or 2500 x 2048 pixels, but not all. + Data specifications: * Raw data input (on disk): - * BMP images 8 bit grayscale - * resolution fixed to one of the cases above + * BMP (BMP3) and JPEG grayscale images encoded as 8-bit RGB, with + varying resolution * Output image: * Transforms: - * Load raw BMP with :py:mod:`PIL` + * Load raw BMP or JPEG with :py:mod:`PIL` * Remove black borders * Convert to torch tensor * Torch center cropping to get square image diff --git a/src/ptbench/data/hivtb/fold_0.py b/src/ptbench/data/hivtb/fold_0.py index ba9e9150..57d77952 100644 --- a/src/ptbench/data/hivtb/fold_0.py +++ b/src/ptbench/data/hivtb/fold_0.py @@ -2,20 +2,10 @@ # # SPDX-License-Identifier: GPL-3.0-or-later -"""HIV-TB dataset for TB detection (cross validation fold 0) - -* Split reference: none (stratified kfolding) - -* Stratified kfold protocol: - * Training samples: 72% of TB and healthy CXR (including labels) - * Validation samples: 18% of TB and healthy CXR (including labels) - * Test samples: 10% of TB and healthy CXR (including labels) - -* This configuration resolution: 2048 x 2048 (default) -* See :py:mod:`ptbench.data.hivtb` for dataset details -""" - from .datamodule import DataModule datamodule = DataModule("fold-0.json") +"""HIV-TB dataset for TB detection (cross validation fold 0). +See :py:class:`DataModule` for technical details. +""" diff --git a/src/ptbench/data/hivtb/fold_1.py b/src/ptbench/data/hivtb/fold_1.py index 84fb7581..c91a968f 100644 --- a/src/ptbench/data/hivtb/fold_1.py +++ b/src/ptbench/data/hivtb/fold_1.py @@ -2,20 +2,10 @@ # # SPDX-License-Identifier: GPL-3.0-or-later -"""HIV-TB dataset for TB detection (cross validation fold 1) - -* Split reference: none (stratified kfolding) - -* Stratified kfold protocol: - * Training samples: 72% of TB and healthy CXR (including labels) - * Validation samples: 18% of TB and healthy CXR (including labels) - * Test samples: 10% of TB and healthy CXR (including labels) - -* This configuration resolution: 2048 x 2048 (default) -* See :py:mod:`ptbench.data.hivtb` for dataset details -""" - from .datamodule import DataModule datamodule = DataModule("fold-1.json") +"""HIV-TB dataset for TB detection (cross validation fold 1). +See :py:class:`DataModule` for technical details. +""" diff --git a/src/ptbench/data/hivtb/fold_2.py b/src/ptbench/data/hivtb/fold_2.py index a5f5e97a..323e80a0 100644 --- a/src/ptbench/data/hivtb/fold_2.py +++ b/src/ptbench/data/hivtb/fold_2.py @@ -2,20 +2,10 @@ # # SPDX-License-Identifier: GPL-3.0-or-later -"""HIV-TB dataset for TB detection (cross validation fold 2) - -* Split reference: none (stratified kfolding) - -* Stratified kfold protocol: - * Training samples: 72% of TB and healthy CXR (including labels) - * Validation samples: 18% of TB and healthy CXR (including labels) - * Test samples: 10% of TB and healthy CXR (including labels) - -* This configuration resolution: 2048 x 2048 (default) -* See :py:mod:`ptbench.data.hivtb` for dataset details -""" - from .datamodule import DataModule datamodule = DataModule("fold-2.json") +"""HIV-TB dataset for TB detection (cross validation fold 2). +See :py:class:`DataModule` for technical details. +""" diff --git a/src/ptbench/data/hivtb/fold_3.py b/src/ptbench/data/hivtb/fold_3.py index 1b643ae4..1eed4c05 100644 --- a/src/ptbench/data/hivtb/fold_3.py +++ b/src/ptbench/data/hivtb/fold_3.py @@ -2,20 +2,10 @@ # # SPDX-License-Identifier: GPL-3.0-or-later -"""HIV-TB dataset for TB detection (cross validation fold 3) - -* Split reference: none (stratified kfolding) - -* Stratified kfold protocol: - * Training samples: 72% of TB and healthy CXR (including labels) - * Validation samples: 18% of TB and healthy CXR (including labels) - * Test samples: 10% of TB and healthy CXR (including labels) - -* This configuration resolution: 2048 x 2048 (default) -* See :py:mod:`ptbench.data.hivtb` for dataset details -""" - from .datamodule import DataModule datamodule = DataModule("fold-3.json") +"""HIV-TB dataset for TB detection (cross validation fold 3). +See :py:class:`DataModule` for technical details. +""" diff --git a/src/ptbench/data/hivtb/fold_4.py b/src/ptbench/data/hivtb/fold_4.py index 581eb85c..9cfa6186 100644 --- a/src/ptbench/data/hivtb/fold_4.py +++ b/src/ptbench/data/hivtb/fold_4.py @@ -2,20 +2,10 @@ # # SPDX-License-Identifier: GPL-3.0-or-later -"""HIV-TB dataset for TB detection (cross validation fold 4) - -* Split reference: none (stratified kfolding) - -* Stratified kfold protocol: - * Training samples: 72% of TB and healthy CXR (including labels) - * Validation samples: 18% of TB and healthy CXR (including labels) - * Test samples: 10% of TB and healthy CXR (including labels) - -* This configuration resolution: 2048 x 2048 (default) -* See :py:mod:`ptbench.data.hivtb` for dataset details -""" - from .datamodule import DataModule datamodule = DataModule("fold-4.json") +"""HIV-TB dataset for TB detection (cross validation fold 4). +See :py:class:`DataModule` for technical details. +""" diff --git a/src/ptbench/data/hivtb/fold_5.py b/src/ptbench/data/hivtb/fold_5.py index 47ae66d1..591fef37 100644 --- a/src/ptbench/data/hivtb/fold_5.py +++ b/src/ptbench/data/hivtb/fold_5.py @@ -2,20 +2,10 @@ # # SPDX-License-Identifier: GPL-3.0-or-later -"""HIV-TB dataset for TB detection (cross validation fold 5) - -* Split reference: none (stratified kfolding) - -* Stratified kfold protocol: - * Training samples: 72% of TB and healthy CXR (including labels) - * Validation samples: 18% of TB and healthy CXR (including labels) - * Test samples: 10% of TB and healthy CXR (including labels) - -* This configuration resolution: 2048 x 2048 (default) -* See :py:mod:`ptbench.data.hivtb` for dataset details -""" - from .datamodule import DataModule datamodule = DataModule("fold-5.json") +"""HIV-TB dataset for TB detection (cross validation fold 5). +See :py:class:`DataModule` for technical details. +""" diff --git a/src/ptbench/data/hivtb/fold_6.py b/src/ptbench/data/hivtb/fold_6.py index c93232f4..fb5e1614 100644 --- a/src/ptbench/data/hivtb/fold_6.py +++ b/src/ptbench/data/hivtb/fold_6.py @@ -2,20 +2,10 @@ # # SPDX-License-Identifier: GPL-3.0-or-later -"""HIV-TB dataset for TB detection (cross validation fold 6) - -* Split reference: none (stratified kfolding) - -* Stratified kfold protocol: - * Training samples: 72% of TB and healthy CXR (including labels) - * Validation samples: 18% of TB and healthy CXR (including labels) - * Test samples: 10% of TB and healthy CXR (including labels) - -* This configuration resolution: 2048 x 2048 (default) -* See :py:mod:`ptbench.data.hivtb` for dataset details -""" - from .datamodule import DataModule datamodule = DataModule("fold-6.json") +"""HIV-TB dataset for TB detection (cross validation fold 6). +See :py:class:`DataModule` for technical details. +""" diff --git a/src/ptbench/data/hivtb/fold_7.py b/src/ptbench/data/hivtb/fold_7.py index 33d5cc83..d64db483 100644 --- a/src/ptbench/data/hivtb/fold_7.py +++ b/src/ptbench/data/hivtb/fold_7.py @@ -2,20 +2,10 @@ # # SPDX-License-Identifier: GPL-3.0-or-later -"""HIV-TB dataset for TB detection (cross validation fold 7) - -* Split reference: none (stratified kfolding) - -* Stratified kfold protocol: - * Training samples: 72% of TB and healthy CXR (including labels) - * Validation samples: 18% of TB and healthy CXR (including labels) - * Test samples: 10% of TB and healthy CXR (including labels) - -* This configuration resolution: 2048 x 2048 (default) -* See :py:mod:`ptbench.data.hivtb` for dataset details -""" - from .datamodule import DataModule datamodule = DataModule("fold-7.json") +"""HIV-TB dataset for TB detection (cross validation fold 7). +See :py:class:`DataModule` for technical details. +""" diff --git a/src/ptbench/data/hivtb/fold_8.py b/src/ptbench/data/hivtb/fold_8.py index 91d89557..8a0f87d1 100644 --- a/src/ptbench/data/hivtb/fold_8.py +++ b/src/ptbench/data/hivtb/fold_8.py @@ -2,20 +2,10 @@ # # SPDX-License-Identifier: GPL-3.0-or-later -"""HIV-TB dataset for TB detection (cross validation fold 8) - -* Split reference: none (stratified kfolding) - -* Stratified kfold protocol: - * Training samples: 72% of TB and healthy CXR (including labels) - * Validation samples: 18% of TB and healthy CXR (including labels) - * Test samples: 10% of TB and healthy CXR (including labels) - -* This configuration resolution: 2048 x 2048 (default) -* See :py:mod:`ptbench.data.hivtb` for dataset details -""" - from .datamodule import DataModule datamodule = DataModule("fold-8.json") +"""HIV-TB dataset for TB detection (cross validation fold 8). +See :py:class:`DataModule` for technical details. +""" diff --git a/src/ptbench/data/hivtb/fold_9.py b/src/ptbench/data/hivtb/fold_9.py index 0e0063e8..d92de50e 100644 --- a/src/ptbench/data/hivtb/fold_9.py +++ b/src/ptbench/data/hivtb/fold_9.py @@ -2,20 +2,10 @@ # # SPDX-License-Identifier: GPL-3.0-or-later -"""HIV-TB dataset for TB detection (cross validation fold 9) - -* Split reference: none (stratified kfolding) - -* Stratified kfold protocol: - * Training samples: 72% of TB and healthy CXR (including labels) - * Validation samples: 18% of TB and healthy CXR (including labels) - * Test samples: 10% of TB and healthy CXR (including labels) - -* This configuration resolution: 2048 x 2048 (default) -* See :py:mod:`ptbench.data.hivtb` for dataset details -""" - from .datamodule import DataModule datamodule = DataModule("fold-9.json") +"""HIV-TB dataset for TB detection (cross validation fold 9). +See :py:class:`DataModule` for technical details. +""" -- GitLab