Skip to content
Snippets Groups Projects
Commit 42726879 authored by André Anjos's avatar André Anjos :speech_balloon:
Browse files

[tbx11k] Implements database, improves documentation

parent 9b6d7386
No related branches found
No related tags found
1 merge request!6Making use of LightningDataModule and simplification of data loading
Pipeline #76661 failed
Showing
with 56 additions and 356 deletions
# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
#
# SPDX-License-Identifier: GPL-3.0-or-later
from .datamodule import DataModule
datamodule = DataModule("v2-fold-3.json")
# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
#
# SPDX-License-Identifier: GPL-3.0-or-later
from .datamodule import DataModule
datamodule = DataModule("v2-fold-4.json")
# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
#
# SPDX-License-Identifier: GPL-3.0-or-later
from .datamodule import DataModule
datamodule = DataModule("v2-fold-5.json")
# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
#
# SPDX-License-Identifier: GPL-3.0-or-later
from .datamodule import DataModule
datamodule = DataModule("v2-fold-6.json")
# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
#
# SPDX-License-Identifier: GPL-3.0-or-later
from .datamodule import DataModule
datamodule = DataModule("v2-fold-7.json")
# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
#
# SPDX-License-Identifier: GPL-3.0-or-later
from .datamodule import DataModule
datamodule = DataModule("v2-fold-8.json")
# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
#
# SPDX-License-Identifier: GPL-3.0-or-later
from .datamodule import DataModule
datamodule = DataModule("v2-fold-9.json")
# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
#
# SPDX-License-Identifier: GPL-3.0-or-later
from .datamodule import DataModule
datamodule = DataModule("v2-others-vs-atb.json")
# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
#
# SPDX-License-Identifier: GPL-3.0-or-later
"""TBX11K simplified dataset for computer-aided diagnosis.
The TBX11K database has been established to foster research
in computer-aided diagnosis of pulmonary diseases with a special
focus on tuberculosis (aTB). The dataset was specifically
designed to be used with CNNs. It contains 11,000 chest X-ray
images, each of a unique patient. They were labeled by expert
radiologists with 5 - 10+ years of experience. Possible labels
are: "healthy", "active TB", "latent TB", and "sick & non-tb".
The version of the dataset used in this benchmark is a simplified.
* Reference: [TBX11K-SIMPLIFIED-2020]_
* Original (released) resolution (height x width or width x height): 512 x 512
* Split reference: none
* Protocol ``default``:
* Training samples: 62.5% of TB and healthy CXR (including labels)
* Validation samples: 15.9% of TB and healthy CXR (including labels)
* Test samples: 21.6% of TB and healthy CXR (including labels)
"""
import importlib.resources
import os
from ...utils.rc import load_rc
from ..dataset import JSONDataset
from ..loader import load_pil_grayscale, make_delayed, make_delayed_bbox
_protocols = [
importlib.resources.files(__name__).joinpath("default.json.bz2"),
importlib.resources.files(__name__).joinpath("fold_0.json.bz2"),
importlib.resources.files(__name__).joinpath("fold_1.json.bz2"),
importlib.resources.files(__name__).joinpath("fold_2.json.bz2"),
importlib.resources.files(__name__).joinpath("fold_3.json.bz2"),
importlib.resources.files(__name__).joinpath("fold_4.json.bz2"),
importlib.resources.files(__name__).joinpath("fold_5.json.bz2"),
importlib.resources.files(__name__).joinpath("fold_6.json.bz2"),
importlib.resources.files(__name__).joinpath("fold_7.json.bz2"),
importlib.resources.files(__name__).joinpath("fold_8.json.bz2"),
importlib.resources.files(__name__).joinpath("fold_9.json.bz2"),
]
_datadir = load_rc().get(
"datadir.tbx11k_simplified", os.path.realpath(os.curdir)
)
def _raw_data_loader(sample):
return dict(
data=load_pil_grayscale(os.path.join(_datadir, sample["data"])), # type: ignore
label=sample["label"],
)
def _raw_data_loader_bbox(sample):
return dict(
data=load_pil_grayscale(os.path.join(_datadir, sample["data"])), # type: ignore
label=sample["label"],
bboxes=sample["bboxes"],
)
def _loader(context, sample):
# "context" is ignored in this case - database is homogeneous
# we return delayed samples to avoid loading all images at once
return make_delayed(sample, _raw_data_loader)
def _loader_bbox(context, sample):
# "context" is ignored in this case - database is homogeneous
# we return delayed samples to avoid loading all images at once
return make_delayed_bbox(sample, _raw_data_loader_bbox)
json_dataset = JSONDataset(
protocols=_protocols,
fieldnames=("data", "label"),
loader=_loader,
)
json_dataset_with_bboxes = JSONDataset(
protocols=_protocols,
fieldnames=("data", "label", "bboxes"),
loader=_loader_bbox,
)
"""TBX11K simplified dataset object."""
def _maker(protocol, RGB=False):
from torchvision import transforms
from .. import make_dataset
from ..augmentations import ElasticDeformation
post_transforms = []
if RGB:
post_transforms = [
transforms.Lambda(lambda x: x.convert("RGB")),
transforms.ToTensor(),
]
return make_dataset(
[json_dataset.subsets(protocol)],
[],
[ElasticDeformation(p=0.8)],
post_transforms,
)
File deleted
# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
#
# SPDX-License-Identifier: GPL-3.0-or-later
"""TBX11k simplified dataset for TB detection (default protocol)
* Split reference: first 62.5% of TB and healthy CXR for "train" 15.9% for
* "validation", 21.6% for "test"
* This split only consists of healthy and active TB samples
* "Latent TB" or "sick & non-TB" samples are not included in this configuration
* This configuration resolution: 512 x 512 (default)
* See :py:mod:`ptbench.data.tbx11k_simplified` for dataset details
"""
from clapper.logging import setup
from .. import return_subsets
from ..base_datamodule import BaseDataModule
from . import _maker
logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s")
class DefaultModule(BaseDataModule):
def __init__(
self,
train_batch_size=1,
predict_batch_size=1,
drop_incomplete_batch=False,
multiproc_kwargs=None,
):
super().__init__(
train_batch_size=train_batch_size,
predict_batch_size=predict_batch_size,
drop_incomplete_batch=drop_incomplete_batch,
multiproc_kwargs=multiproc_kwargs,
)
def setup(self, stage: str):
self.dataset = _maker("default")
(
self.train_dataset,
self.validation_dataset,
self.extra_validation_datasets,
self.predict_dataset,
) = return_subsets(self.dataset)
datamodule = DefaultModule
File deleted
# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
#
# SPDX-License-Identifier: GPL-3.0-or-later
"""TBX11k simplified dataset for TB detection (default protocol)
* Split reference: first 62.5% of TB and healthy CXR for "train" 15.9% for
* "validation", 21.6% for "test"
* This split only consists of healthy and active TB samples
* "Latent TB" or "sick & non-TB" samples are not included in this configuration
* This configuration resolution: 512 x 512 (default)
* See :py:mod:`ptbench.data.tbx11k_simplified` for dataset details
"""
from clapper.logging import setup
from .. import return_subsets
from ..base_datamodule import BaseDataModule
from . import _maker
logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s")
class Fold0Module(BaseDataModule):
def __init__(
self,
train_batch_size=1,
predict_batch_size=1,
drop_incomplete_batch=False,
multiproc_kwargs=None,
):
super().__init__(
train_batch_size=train_batch_size,
predict_batch_size=predict_batch_size,
drop_incomplete_batch=drop_incomplete_batch,
multiproc_kwargs=multiproc_kwargs,
)
def setup(self, stage: str):
self.dataset = _maker("fold_0")
(
self.train_dataset,
self.validation_dataset,
self.extra_validation_datasets,
self.predict_dataset,
) = return_subsets(self.dataset)
datamodule = Fold0Module
File deleted
# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
#
# SPDX-License-Identifier: GPL-3.0-or-later
"""TBX11k simplified dataset for TB detection (default protocol)
* Split reference: first 62.5% of TB and healthy CXR for "train" 15.9% for
* "validation", 21.6% for "test"
* This split only consists of healthy and active TB samples
* "Latent TB" or "sick & non-TB" samples are not included in this configuration
* This configuration resolution: 512 x 512 (default)
* See :py:mod:`ptbench.data.tbx11k_simplified` for dataset details
"""
from clapper.logging import setup
from .. import return_subsets
from ..base_datamodule import BaseDataModule
from . import _maker
logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s")
class Fold0Module(BaseDataModule):
def __init__(
self,
train_batch_size=1,
predict_batch_size=1,
drop_incomplete_batch=False,
multiproc_kwargs=None,
):
super().__init__(
train_batch_size=train_batch_size,
predict_batch_size=predict_batch_size,
drop_incomplete_batch=drop_incomplete_batch,
multiproc_kwargs=multiproc_kwargs,
)
def setup(self, stage: str):
self.dataset = _maker("fold_1")
(
self.train_dataset,
self.validation_dataset,
self.extra_validation_datasets,
self.predict_dataset,
) = return_subsets(self.dataset)
datamodule = Fold0Module
File deleted
# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
#
# SPDX-License-Identifier: GPL-3.0-or-later
"""TBX11k simplified dataset for TB detection (default protocol)
* Split reference: first 62.5% of TB and healthy CXR for "train" 15.9% for
* "validation", 21.6% for "test"
* This split only consists of healthy and active TB samples
* "Latent TB" or "sick & non-TB" samples are not included in this configuration
* This configuration resolution: 512 x 512 (default)
* See :py:mod:`ptbench.data.tbx11k_simplified` for dataset details
"""
from clapper.logging import setup
from .. import return_subsets
from ..base_datamodule import BaseDataModule
from . import _maker
logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s")
class Fold0Module(BaseDataModule):
def __init__(
self,
train_batch_size=1,
predict_batch_size=1,
drop_incomplete_batch=False,
multiproc_kwargs=None,
):
super().__init__(
train_batch_size=train_batch_size,
predict_batch_size=predict_batch_size,
drop_incomplete_batch=drop_incomplete_batch,
multiproc_kwargs=multiproc_kwargs,
)
def setup(self, stage: str):
self.dataset = _maker("fold_2")
(
self.train_dataset,
self.validation_dataset,
self.extra_validation_datasets,
self.predict_dataset,
) = return_subsets(self.dataset)
datamodule = Fold0Module
File deleted
# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
#
# SPDX-License-Identifier: GPL-3.0-or-later
"""TBX11k simplified dataset for TB detection (default protocol)
* Split reference: first 62.5% of TB and healthy CXR for "train" 15.9% for
* "validation", 21.6% for "test"
* This split only consists of healthy and active TB samples
* "Latent TB" or "sick & non-TB" samples are not included in this configuration
* This configuration resolution: 512 x 512 (default)
* See :py:mod:`ptbench.data.tbx11k_simplified` for dataset details
"""
from clapper.logging import setup
from .. import return_subsets
from ..base_datamodule import BaseDataModule
from . import _maker
logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s")
class Fold0Module(BaseDataModule):
def __init__(
self,
train_batch_size=1,
predict_batch_size=1,
drop_incomplete_batch=False,
multiproc_kwargs=None,
):
super().__init__(
train_batch_size=train_batch_size,
predict_batch_size=predict_batch_size,
drop_incomplete_batch=drop_incomplete_batch,
multiproc_kwargs=multiproc_kwargs,
)
def setup(self, stage: str):
self.dataset = _maker("fold_3")
(
self.train_dataset,
self.validation_dataset,
self.extra_validation_datasets,
self.predict_dataset,
) = return_subsets(self.dataset)
datamodule = Fold0Module
File deleted
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment