diff --git a/src/ptbench/data/tbpoc/datamodule.py b/src/ptbench/data/tbpoc/datamodule.py index 35465bac49ae78db05b769a8d63aee40156f1482..31e2aac4f4607835ff340238cbe3108e79a2a059 100644 --- a/src/ptbench/data/tbpoc/datamodule.py +++ b/src/ptbench/data/tbpoc/datamodule.py @@ -11,7 +11,7 @@ from torchvision.transforms.functional import center_crop, to_tensor from ...utils.rc import load_rc from ..datamodule import CachingDataModule -from ..image_utils import load_pil_grayscale, remove_black_borders +from ..image_utils import remove_black_borders from ..split import JSONDatabaseSplit from ..typing import DatabaseSplit from ..typing import RawDataLoader as _BaseRawDataLoader @@ -57,7 +57,9 @@ class RawDataLoader(_BaseRawDataLoader): sample The sample representation """ - image = load_pil_grayscale(os.path.join(self.datadir, sample[0])) + # images from TBPOC are encoded as grayscale JPEGs, no need to + # call convert("L") here. + image = PIL.Image.open(os.path.join(self.datadir, sample[0])) image = remove_black_borders(image) tensor = to_tensor(image) tensor = center_crop(tensor, min(*tensor.shape[1:])) @@ -102,21 +104,21 @@ class DataModule(CachingDataModule): """TB-POC dataset for computer-aided diagnosis. * Database reference: [TB-POC-2018]_ - * Original resolution (height x width or width x height): 2048 x 2500 pixels - or 2500 x 2048 pixels + * Original resolution (height x width or width x height): 2048 x 2500 pixels + or 2500 x 2048 pixels Data specifications: * Raw data input (on disk): - * jpeg 8-bit grayscale images + * JPEG 8-bit Grayscale images * resolution: fixed to one of the cases above * Output image: * Transforms: - * Load raw jpeg with :py:mod:`PIL` + * Load raw grayscale jpeg with :py:mod:`PIL` * Remove black borders * Convert to torch tensor * Torch center cropping to get square image @@ -124,8 +126,8 @@ class DataModule(CachingDataModule): * Final specifications: * Grayscale, encoded as a single plane tensor, 32-bit floats, - square with varying resolutions, depending on black borders' sizes - on the input image + square with varying resolutions (2048 x 2048 being the maximum), + but also depending on black borders' sizes on the input image. * Labels: 0 (healthy), 1 (active tuberculosis) """ @@ -134,5 +136,3 @@ class DataModule(CachingDataModule): database_split=make_split(split_filename), raw_data_loader=RawDataLoader(), ) - - diff --git a/src/ptbench/data/tbpoc/fold_0.py b/src/ptbench/data/tbpoc/fold_0.py index 972e7188f13a0b7e67b3581eb87c0d20acd38794..775f64cfbf32624d4383b240c2b9377305abaa74 100644 --- a/src/ptbench/data/tbpoc/fold_0.py +++ b/src/ptbench/data/tbpoc/fold_0.py @@ -2,20 +2,10 @@ # # SPDX-License-Identifier: GPL-3.0-or-later -"""TB-POC dataset for TB detection (cross validation fold 0) - -* Split reference: none (stratified kfolding) - -* Stratified kfold protocol: - * Training samples: 72% of TB and healthy CXR (including labels) - * Validation samples: 18% of TB and healthy CXR (including labels) - * Test samples: 10% of TB and healthy CXR (including labels) - -* This configuration resolution: varying depending of black borders on original - image -* See :py:mod:`ptbench.data.tbpoc` for dataset details -""" - from .datamodule import DataModule datamodule = DataModule("fold-0.json") +"""TB-POC dataset for TB detection (cross validation fold 0). + +See :py:class:`DataModule` for technical details. +""" diff --git a/src/ptbench/data/tbpoc/fold_1.py b/src/ptbench/data/tbpoc/fold_1.py index 79b9bfcaec144157770c3be12705f73fcb0f5c79..6f0f137facfd1c88b42d43c3334427f56e32b7b5 100644 --- a/src/ptbench/data/tbpoc/fold_1.py +++ b/src/ptbench/data/tbpoc/fold_1.py @@ -2,20 +2,10 @@ # # SPDX-License-Identifier: GPL-3.0-or-later -"""TB-POC dataset for TB detection (cross validation fold 1) - -* Split reference: none (stratified kfolding) - -* Stratified kfold protocol: - * Training samples: 72% of TB and healthy CXR (including labels) - * Validation samples: 18% of TB and healthy CXR (including labels) - * Test samples: 10% of TB and healthy CXR (including labels) - -* This configuration resolution: varying depending of black borders on original - image -* See :py:mod:`ptbench.data.tbpoc` for dataset details -""" - from .datamodule import DataModule datamodule = DataModule("fold-1.json") +"""TB-POC dataset for TB detection (cross validation fold 1). + +See :py:class:`DataModule` for technical details. +""" diff --git a/src/ptbench/data/tbpoc/fold_2.py b/src/ptbench/data/tbpoc/fold_2.py index 9d41fb595637dce8f31944b4aa2eeee2bd60e58d..662fd32cb76caca49ccb04fcb2aadc987642850c 100644 --- a/src/ptbench/data/tbpoc/fold_2.py +++ b/src/ptbench/data/tbpoc/fold_2.py @@ -2,20 +2,10 @@ # # SPDX-License-Identifier: GPL-3.0-or-later -"""TB-POC dataset for TB detection (cross validation fold 2) - -* Split reference: none (stratified kfolding) - -* Stratified kfold protocol: - * Training samples: 72% of TB and healthy CXR (including labels) - * Validation samples: 18% of TB and healthy CXR (including labels) - * Test samples: 10% of TB and healthy CXR (including labels) - -* This configuration resolution: varying depending of black borders on original - image -* See :py:mod:`ptbench.data.tbpoc` for dataset details -""" - from .datamodule import DataModule datamodule = DataModule("fold-2.json") +"""TB-POC dataset for TB detection (cross validation fold 2). + +See :py:class:`DataModule` for technical details. +""" diff --git a/src/ptbench/data/tbpoc/fold_3.py b/src/ptbench/data/tbpoc/fold_3.py index 08672b3f325a8e60e19b8254ea89bc59fc3ad78b..c52b8c2e4b6353631ac812f2d0dd7c7fb31dcf45 100644 --- a/src/ptbench/data/tbpoc/fold_3.py +++ b/src/ptbench/data/tbpoc/fold_3.py @@ -2,20 +2,10 @@ # # SPDX-License-Identifier: GPL-3.0-or-later -"""TB-POC dataset for TB detection (cross validation fold 3) - -* Split reference: none (stratified kfolding) - -* Stratified kfold protocol: - * Training samples: 72% of TB and healthy CXR (including labels) - * Validation samples: 18% of TB and healthy CXR (including labels) - * Test samples: 10% of TB and healthy CXR (including labels) - -* This configuration resolution: varying depending of black borders on original - image -* See :py:mod:`ptbench.data.tbpoc` for dataset details -""" - from .datamodule import DataModule datamodule = DataModule("fold-3.json") +"""TB-POC dataset for TB detection (cross validation fold 3). + +See :py:class:`DataModule` for technical details. +""" diff --git a/src/ptbench/data/tbpoc/fold_4.py b/src/ptbench/data/tbpoc/fold_4.py index 8354a4c2d7038c35620a7220afa7e9a8731d44fd..6de0dc13e8381707e31c727b465271799d4c7f22 100644 --- a/src/ptbench/data/tbpoc/fold_4.py +++ b/src/ptbench/data/tbpoc/fold_4.py @@ -2,20 +2,10 @@ # # SPDX-License-Identifier: GPL-3.0-or-later -"""TB-POC dataset for TB detection (cross validation fold 4) - -* Split reference: none (stratified kfolding) - -* Stratified kfold protocol: - * Training samples: 72% of TB and healthy CXR (including labels) - * Validation samples: 18% of TB and healthy CXR (including labels) - * Test samples: 10% of TB and healthy CXR (including labels) - -* This configuration resolution: varying depending of black borders on original - image -* See :py:mod:`ptbench.data.tbpoc` for dataset details -""" - from .datamodule import DataModule datamodule = DataModule("fold-4.json") +"""TB-POC dataset for TB detection (cross validation fold 4). + +See :py:class:`DataModule` for technical details. +""" diff --git a/src/ptbench/data/tbpoc/fold_5.py b/src/ptbench/data/tbpoc/fold_5.py index cb7f95612e23dca6af8d3d06dfaf6ae76319ed6f..bdca5a36a3f4c65268256ed6d82df124ddd4fe09 100644 --- a/src/ptbench/data/tbpoc/fold_5.py +++ b/src/ptbench/data/tbpoc/fold_5.py @@ -2,20 +2,10 @@ # # SPDX-License-Identifier: GPL-3.0-or-later -"""TB-POC dataset for TB detection (cross validation fold 5) - -* Split reference: none (stratified kfolding) - -* Stratified kfold protocol: - * Training samples: 72% of TB and healthy CXR (including labels) - * Validation samples: 18% of TB and healthy CXR (including labels) - * Test samples: 10% of TB and healthy CXR (including labels) - -* This configuration resolution: varying depending of black borders on original - image -* See :py:mod:`ptbench.data.tbpoc` for dataset details -""" - from .datamodule import DataModule datamodule = DataModule("fold-5.json") +"""TB-POC dataset for TB detection (cross validation fold 5). + +See :py:class:`DataModule` for technical details. +""" diff --git a/src/ptbench/data/tbpoc/fold_6.py b/src/ptbench/data/tbpoc/fold_6.py index 379211aad631cf9beac280d598f52beb6746eac0..c17ba0ba00ffc7f14ade642cae4d3433b13e031e 100644 --- a/src/ptbench/data/tbpoc/fold_6.py +++ b/src/ptbench/data/tbpoc/fold_6.py @@ -2,20 +2,10 @@ # # SPDX-License-Identifier: GPL-3.0-or-later -"""TB-POC dataset for TB detection (cross validation fold 6) - -* Split reference: none (stratified kfolding) - -* Stratified kfold protocol: - * Training samples: 72% of TB and healthy CXR (including labels) - * Validation samples: 18% of TB and healthy CXR (including labels) - * Test samples: 10% of TB and healthy CXR (including labels) - -* This configuration resolution: varying depending of black borders on original - image -* See :py:mod:`ptbench.data.tbpoc` for dataset details -""" - from .datamodule import DataModule datamodule = DataModule("fold-6.json") +"""TB-POC dataset for TB detection (cross validation fold 6). + +See :py:class:`DataModule` for technical details. +""" diff --git a/src/ptbench/data/tbpoc/fold_7.py b/src/ptbench/data/tbpoc/fold_7.py index b846b88af5cf7375f578ee2ffbc24055a4a3ff85..4310f2f43b054c7c0e7d7d9d73521590d6424c27 100644 --- a/src/ptbench/data/tbpoc/fold_7.py +++ b/src/ptbench/data/tbpoc/fold_7.py @@ -2,20 +2,10 @@ # # SPDX-License-Identifier: GPL-3.0-or-later -"""TB-POC dataset for TB detection (cross validation fold 7) - -* Split reference: none (stratified kfolding) - -* Stratified kfold protocol: - * Training samples: 72% of TB and healthy CXR (including labels) - * Validation samples: 18% of TB and healthy CXR (including labels) - * Test samples: 10% of TB and healthy CXR (including labels) - -* This configuration resolution: varying depending of black borders on original - image -* See :py:mod:`ptbench.data.tbpoc` for dataset details -""" - from .datamodule import DataModule datamodule = DataModule("fold-7.json") +"""TB-POC dataset for TB detection (cross validation fold 7). + +See :py:class:`DataModule` for technical details. +""" diff --git a/src/ptbench/data/tbpoc/fold_8.py b/src/ptbench/data/tbpoc/fold_8.py index acfd42964fe21cf15c1d47a5bc5df794fbcba961..d7fa5d100803aa91ba956977372cd218bbc2b428 100644 --- a/src/ptbench/data/tbpoc/fold_8.py +++ b/src/ptbench/data/tbpoc/fold_8.py @@ -2,20 +2,10 @@ # # SPDX-License-Identifier: GPL-3.0-or-later -"""TB-POC dataset for TB detection (cross validation fold 8) - -* Split reference: none (stratified kfolding) - -* Stratified kfold protocol: - * Training samples: 72% of TB and healthy CXR (including labels) - * Validation samples: 18% of TB and healthy CXR (including labels) - * Test samples: 10% of TB and healthy CXR (including labels) - -* This configuration resolution: varying depending of black borders on original - image -* See :py:mod:`ptbench.data.tbpoc` for dataset details -""" - from .datamodule import DataModule datamodule = DataModule("fold-8.json") +"""TB-POC dataset for TB detection (cross validation fold 8). + +See :py:class:`DataModule` for technical details. +""" diff --git a/src/ptbench/data/tbpoc/fold_9.py b/src/ptbench/data/tbpoc/fold_9.py index 4634068e5942bf9d7062876ee2007702083de1ed..f37e1f36a094580fe3b404839851bb23ebd63b43 100644 --- a/src/ptbench/data/tbpoc/fold_9.py +++ b/src/ptbench/data/tbpoc/fold_9.py @@ -2,20 +2,10 @@ # # SPDX-License-Identifier: GPL-3.0-or-later -"""TB-POC dataset for TB detection (cross validation fold 9) - -* Split reference: none (stratified kfolding) - -* Stratified kfold protocol: - * Training samples: 72% of TB and healthy CXR (including labels) - * Validation samples: 18% of TB and healthy CXR (including labels) - * Test samples: 10% of TB and healthy CXR (including labels) - -* This configuration resolution: varying depending of black borders on original - image -* See :py:mod:`ptbench.data.tbpoc` for dataset details -""" - from .datamodule import DataModule datamodule = DataModule("fold-9.json") +"""TB-POC dataset for TB detection (cross validation fold 9). + +See :py:class:`DataModule` for technical details. +"""