Skip to content
Snippets Groups Projects
Commit 22caca5f authored by André Anjos's avatar André Anjos :speech_balloon:
Browse files

[data.tbpoc] Minor adjustments

parent bcc1e440
No related branches found
No related tags found
2 merge requests!11updated TB-POC dataset and corresponding tests,!6Making use of LightningDataModule and simplification of data loading
Pipeline #76733 failed
...@@ -11,7 +11,7 @@ from torchvision.transforms.functional import center_crop, to_tensor ...@@ -11,7 +11,7 @@ from torchvision.transforms.functional import center_crop, to_tensor
from ...utils.rc import load_rc from ...utils.rc import load_rc
from ..datamodule import CachingDataModule from ..datamodule import CachingDataModule
from ..image_utils import load_pil_grayscale, remove_black_borders from ..image_utils import remove_black_borders
from ..split import JSONDatabaseSplit from ..split import JSONDatabaseSplit
from ..typing import DatabaseSplit from ..typing import DatabaseSplit
from ..typing import RawDataLoader as _BaseRawDataLoader from ..typing import RawDataLoader as _BaseRawDataLoader
...@@ -57,7 +57,9 @@ class RawDataLoader(_BaseRawDataLoader): ...@@ -57,7 +57,9 @@ class RawDataLoader(_BaseRawDataLoader):
sample sample
The sample representation The sample representation
""" """
image = load_pil_grayscale(os.path.join(self.datadir, sample[0])) # images from TBPOC are encoded as grayscale JPEGs, no need to
# call convert("L") here.
image = PIL.Image.open(os.path.join(self.datadir, sample[0]))
image = remove_black_borders(image) image = remove_black_borders(image)
tensor = to_tensor(image) tensor = to_tensor(image)
tensor = center_crop(tensor, min(*tensor.shape[1:])) tensor = center_crop(tensor, min(*tensor.shape[1:]))
...@@ -102,21 +104,21 @@ class DataModule(CachingDataModule): ...@@ -102,21 +104,21 @@ class DataModule(CachingDataModule):
"""TB-POC dataset for computer-aided diagnosis. """TB-POC dataset for computer-aided diagnosis.
* Database reference: [TB-POC-2018]_ * Database reference: [TB-POC-2018]_
* Original resolution (height x width or width x height): 2048 x 2500 pixels * Original resolution (height x width or width x height): 2048 x 2500 pixels
or 2500 x 2048 pixels or 2500 x 2048 pixels
Data specifications: Data specifications:
* Raw data input (on disk): * Raw data input (on disk):
* jpeg 8-bit grayscale images * JPEG 8-bit Grayscale images
* resolution: fixed to one of the cases above * resolution: fixed to one of the cases above
* Output image: * Output image:
* Transforms: * Transforms:
* Load raw jpeg with :py:mod:`PIL` * Load raw grayscale jpeg with :py:mod:`PIL`
* Remove black borders * Remove black borders
* Convert to torch tensor * Convert to torch tensor
* Torch center cropping to get square image * Torch center cropping to get square image
...@@ -124,8 +126,8 @@ class DataModule(CachingDataModule): ...@@ -124,8 +126,8 @@ class DataModule(CachingDataModule):
* Final specifications: * Final specifications:
* Grayscale, encoded as a single plane tensor, 32-bit floats, * Grayscale, encoded as a single plane tensor, 32-bit floats,
square with varying resolutions, depending on black borders' sizes square with varying resolutions (2048 x 2048 being the maximum),
on the input image but also depending on black borders' sizes on the input image.
* Labels: 0 (healthy), 1 (active tuberculosis) * Labels: 0 (healthy), 1 (active tuberculosis)
""" """
...@@ -134,5 +136,3 @@ class DataModule(CachingDataModule): ...@@ -134,5 +136,3 @@ class DataModule(CachingDataModule):
database_split=make_split(split_filename), database_split=make_split(split_filename),
raw_data_loader=RawDataLoader(), raw_data_loader=RawDataLoader(),
) )
...@@ -2,20 +2,10 @@ ...@@ -2,20 +2,10 @@
# #
# SPDX-License-Identifier: GPL-3.0-or-later # SPDX-License-Identifier: GPL-3.0-or-later
"""TB-POC dataset for TB detection (cross validation fold 0)
* Split reference: none (stratified kfolding)
* Stratified kfold protocol:
* Training samples: 72% of TB and healthy CXR (including labels)
* Validation samples: 18% of TB and healthy CXR (including labels)
* Test samples: 10% of TB and healthy CXR (including labels)
* This configuration resolution: varying depending of black borders on original
image
* See :py:mod:`ptbench.data.tbpoc` for dataset details
"""
from .datamodule import DataModule from .datamodule import DataModule
datamodule = DataModule("fold-0.json") datamodule = DataModule("fold-0.json")
"""TB-POC dataset for TB detection (cross validation fold 0).
See :py:class:`DataModule` for technical details.
"""
...@@ -2,20 +2,10 @@ ...@@ -2,20 +2,10 @@
# #
# SPDX-License-Identifier: GPL-3.0-or-later # SPDX-License-Identifier: GPL-3.0-or-later
"""TB-POC dataset for TB detection (cross validation fold 1)
* Split reference: none (stratified kfolding)
* Stratified kfold protocol:
* Training samples: 72% of TB and healthy CXR (including labels)
* Validation samples: 18% of TB and healthy CXR (including labels)
* Test samples: 10% of TB and healthy CXR (including labels)
* This configuration resolution: varying depending of black borders on original
image
* See :py:mod:`ptbench.data.tbpoc` for dataset details
"""
from .datamodule import DataModule from .datamodule import DataModule
datamodule = DataModule("fold-1.json") datamodule = DataModule("fold-1.json")
"""TB-POC dataset for TB detection (cross validation fold 1).
See :py:class:`DataModule` for technical details.
"""
...@@ -2,20 +2,10 @@ ...@@ -2,20 +2,10 @@
# #
# SPDX-License-Identifier: GPL-3.0-or-later # SPDX-License-Identifier: GPL-3.0-or-later
"""TB-POC dataset for TB detection (cross validation fold 2)
* Split reference: none (stratified kfolding)
* Stratified kfold protocol:
* Training samples: 72% of TB and healthy CXR (including labels)
* Validation samples: 18% of TB and healthy CXR (including labels)
* Test samples: 10% of TB and healthy CXR (including labels)
* This configuration resolution: varying depending of black borders on original
image
* See :py:mod:`ptbench.data.tbpoc` for dataset details
"""
from .datamodule import DataModule from .datamodule import DataModule
datamodule = DataModule("fold-2.json") datamodule = DataModule("fold-2.json")
"""TB-POC dataset for TB detection (cross validation fold 2).
See :py:class:`DataModule` for technical details.
"""
...@@ -2,20 +2,10 @@ ...@@ -2,20 +2,10 @@
# #
# SPDX-License-Identifier: GPL-3.0-or-later # SPDX-License-Identifier: GPL-3.0-or-later
"""TB-POC dataset for TB detection (cross validation fold 3)
* Split reference: none (stratified kfolding)
* Stratified kfold protocol:
* Training samples: 72% of TB and healthy CXR (including labels)
* Validation samples: 18% of TB and healthy CXR (including labels)
* Test samples: 10% of TB and healthy CXR (including labels)
* This configuration resolution: varying depending of black borders on original
image
* See :py:mod:`ptbench.data.tbpoc` for dataset details
"""
from .datamodule import DataModule from .datamodule import DataModule
datamodule = DataModule("fold-3.json") datamodule = DataModule("fold-3.json")
"""TB-POC dataset for TB detection (cross validation fold 3).
See :py:class:`DataModule` for technical details.
"""
...@@ -2,20 +2,10 @@ ...@@ -2,20 +2,10 @@
# #
# SPDX-License-Identifier: GPL-3.0-or-later # SPDX-License-Identifier: GPL-3.0-or-later
"""TB-POC dataset for TB detection (cross validation fold 4)
* Split reference: none (stratified kfolding)
* Stratified kfold protocol:
* Training samples: 72% of TB and healthy CXR (including labels)
* Validation samples: 18% of TB and healthy CXR (including labels)
* Test samples: 10% of TB and healthy CXR (including labels)
* This configuration resolution: varying depending of black borders on original
image
* See :py:mod:`ptbench.data.tbpoc` for dataset details
"""
from .datamodule import DataModule from .datamodule import DataModule
datamodule = DataModule("fold-4.json") datamodule = DataModule("fold-4.json")
"""TB-POC dataset for TB detection (cross validation fold 4).
See :py:class:`DataModule` for technical details.
"""
...@@ -2,20 +2,10 @@ ...@@ -2,20 +2,10 @@
# #
# SPDX-License-Identifier: GPL-3.0-or-later # SPDX-License-Identifier: GPL-3.0-or-later
"""TB-POC dataset for TB detection (cross validation fold 5)
* Split reference: none (stratified kfolding)
* Stratified kfold protocol:
* Training samples: 72% of TB and healthy CXR (including labels)
* Validation samples: 18% of TB and healthy CXR (including labels)
* Test samples: 10% of TB and healthy CXR (including labels)
* This configuration resolution: varying depending of black borders on original
image
* See :py:mod:`ptbench.data.tbpoc` for dataset details
"""
from .datamodule import DataModule from .datamodule import DataModule
datamodule = DataModule("fold-5.json") datamodule = DataModule("fold-5.json")
"""TB-POC dataset for TB detection (cross validation fold 5).
See :py:class:`DataModule` for technical details.
"""
...@@ -2,20 +2,10 @@ ...@@ -2,20 +2,10 @@
# #
# SPDX-License-Identifier: GPL-3.0-or-later # SPDX-License-Identifier: GPL-3.0-or-later
"""TB-POC dataset for TB detection (cross validation fold 6)
* Split reference: none (stratified kfolding)
* Stratified kfold protocol:
* Training samples: 72% of TB and healthy CXR (including labels)
* Validation samples: 18% of TB and healthy CXR (including labels)
* Test samples: 10% of TB and healthy CXR (including labels)
* This configuration resolution: varying depending of black borders on original
image
* See :py:mod:`ptbench.data.tbpoc` for dataset details
"""
from .datamodule import DataModule from .datamodule import DataModule
datamodule = DataModule("fold-6.json") datamodule = DataModule("fold-6.json")
"""TB-POC dataset for TB detection (cross validation fold 6).
See :py:class:`DataModule` for technical details.
"""
...@@ -2,20 +2,10 @@ ...@@ -2,20 +2,10 @@
# #
# SPDX-License-Identifier: GPL-3.0-or-later # SPDX-License-Identifier: GPL-3.0-or-later
"""TB-POC dataset for TB detection (cross validation fold 7)
* Split reference: none (stratified kfolding)
* Stratified kfold protocol:
* Training samples: 72% of TB and healthy CXR (including labels)
* Validation samples: 18% of TB and healthy CXR (including labels)
* Test samples: 10% of TB and healthy CXR (including labels)
* This configuration resolution: varying depending of black borders on original
image
* See :py:mod:`ptbench.data.tbpoc` for dataset details
"""
from .datamodule import DataModule from .datamodule import DataModule
datamodule = DataModule("fold-7.json") datamodule = DataModule("fold-7.json")
"""TB-POC dataset for TB detection (cross validation fold 7).
See :py:class:`DataModule` for technical details.
"""
...@@ -2,20 +2,10 @@ ...@@ -2,20 +2,10 @@
# #
# SPDX-License-Identifier: GPL-3.0-or-later # SPDX-License-Identifier: GPL-3.0-or-later
"""TB-POC dataset for TB detection (cross validation fold 8)
* Split reference: none (stratified kfolding)
* Stratified kfold protocol:
* Training samples: 72% of TB and healthy CXR (including labels)
* Validation samples: 18% of TB and healthy CXR (including labels)
* Test samples: 10% of TB and healthy CXR (including labels)
* This configuration resolution: varying depending of black borders on original
image
* See :py:mod:`ptbench.data.tbpoc` for dataset details
"""
from .datamodule import DataModule from .datamodule import DataModule
datamodule = DataModule("fold-8.json") datamodule = DataModule("fold-8.json")
"""TB-POC dataset for TB detection (cross validation fold 8).
See :py:class:`DataModule` for technical details.
"""
...@@ -2,20 +2,10 @@ ...@@ -2,20 +2,10 @@
# #
# SPDX-License-Identifier: GPL-3.0-or-later # SPDX-License-Identifier: GPL-3.0-or-later
"""TB-POC dataset for TB detection (cross validation fold 9)
* Split reference: none (stratified kfolding)
* Stratified kfold protocol:
* Training samples: 72% of TB and healthy CXR (including labels)
* Validation samples: 18% of TB and healthy CXR (including labels)
* Test samples: 10% of TB and healthy CXR (including labels)
* This configuration resolution: varying depending of black borders on original
image
* See :py:mod:`ptbench.data.tbpoc` for dataset details
"""
from .datamodule import DataModule from .datamodule import DataModule
datamodule = DataModule("fold-9.json") datamodule = DataModule("fold-9.json")
"""TB-POC dataset for TB detection (cross validation fold 9).
See :py:class:`DataModule` for technical details.
"""
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment