Skip to content
Snippets Groups Projects
Commit 37f89a95 authored by André Anjos's avatar André Anjos :speech_balloon:
Browse files

[data.montgomery_shenzhen_indian_padchest] Port to new lightning infrastructure

parent c197041c
No related branches found
No related tags found
1 merge request!6Making use of LightningDataModule and simplification of data loading
Pipeline #76709 canceled
...@@ -231,7 +231,7 @@ padchest-cardiomegaly-idiap = "ptbench.data.padchest.cardiomegaly_idiap" ...@@ -231,7 +231,7 @@ padchest-cardiomegaly-idiap = "ptbench.data.padchest.cardiomegaly_idiap"
nih-cxr14-padchest = "ptbench.data.nih_cxr14_padchest.idiap" nih-cxr14-padchest = "ptbench.data.nih_cxr14_padchest.idiap"
# montgomery-shenzhen-indian-padchest aggregated dataset # montgomery-shenzhen-indian-padchest aggregated dataset
mc_ch_in_pc = "ptbench.data.mc_ch_in_pc.default" montgomery-shenzhen-indian-padchest = "ptbench.data.montgomery_shenzhen_indian_padchest.default"
[tool.setuptools] [tool.setuptools]
zip-safe = true zip-safe = true
......
# Copyright © 2022 Idiap Research Institute <contact@idiap.ch>
#
# SPDX-License-Identifier: GPL-3.0-or-later
"""Aggregated dataset composed of Montgomery, Shenzhen, Indian and Padchest
datasets."""
from clapper.logging import setup
from torch.utils.data.dataset import ConcatDataset
from .. import return_subsets
from ..base_datamodule import BaseDataModule, get_dataset_from_module
from ..indian.default import datamodule as indian_datamodule
from ..montgomery.default import datamodule as mc_datamodule
from ..padchest.tb_idiap import datamodule as pc_datamodule
from ..shenzhen.default import datamodule as ch_datamodule
logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s")
class DefaultModule(BaseDataModule):
def __init__(
self,
train_batch_size=1,
predict_batch_size=1,
drop_incomplete_batch=False,
multiproc_kwargs=None,
):
self.train_batch_size = train_batch_size
self.predict_batch_size = predict_batch_size
self.drop_incomplete_batch = drop_incomplete_batch
self.multiproc_kwargs = multiproc_kwargs
super().__init__(
train_batch_size=train_batch_size,
predict_batch_size=predict_batch_size,
drop_incomplete_batch=drop_incomplete_batch,
multiproc_kwargs=multiproc_kwargs,
)
def setup(self, stage: str):
# Instantiate other datamodules and get their datasets
module_args = {
"train_batch_size": self.train_batch_size,
"predict_batch_size": self.predict_batch_size,
"drop_incomplete_batch": self.drop_incomplete_batch,
"multiproc_kwargs": self.multiproc_kwargs,
}
mc = get_dataset_from_module(mc_datamodule, stage, **module_args)
ch = get_dataset_from_module(ch_datamodule, stage, **module_args)
indian = get_dataset_from_module(
indian_datamodule, stage, **module_args
)
pc = get_dataset_from_module(pc_datamodule, stage, **module_args)
# Combine datasets
self.dataset = {}
self.dataset["__train__"] = ConcatDataset(
[
mc["__train__"],
ch["__train__"],
indian["__train__"],
pc["__train__"],
]
)
self.dataset["train"] = ConcatDataset(
[mc["train"], ch["train"], indian["train"], pc["train"]]
)
self.dataset["__valid__"] = ConcatDataset(
[
mc["__valid__"],
ch["__valid__"],
indian["__valid__"],
pc["__valid__"],
]
)
self.dataset["test"] = ConcatDataset(
[mc["test"], ch["test"], indian["test"], pc["test"]]
)
(
self.train_dataset,
self.validation_dataset,
self.extra_validation_datasets,
self.predict_dataset,
) = return_subsets(self.dataset)
datamodule = DefaultModule
# Copyright © 2022 Idiap Research Institute <contact@idiap.ch>
#
# SPDX-License-Identifier: GPL-3.0-or-later
from ..datamodule import ConcatDataModule
from ..indian.datamodule import RawDataLoader as IndianLoader
from ..indian.datamodule import make_split as make_indian_split
from ..montgomery.datamodule import RawDataLoader as MontgomeryLoader
from ..montgomery.datamodule import make_split as make_montgomery_split
from ..padchest.datamodule import RawDataLoader as PadchestLoader
from ..padchest.datamodule import make_split as make_padchest_split
from ..shenzhen.datamodule import RawDataLoader as ShenzhenLoader
from ..shenzhen.datamodule import make_split as make_shenzhen_split
class DataModule(ConcatDataModule):
"""Aggregated datamodule composed of Montgomery and Shenzhen datasets."""
def __init__(self, split_filename: str, padchest_split_filename: str):
montgomery_loader = MontgomeryLoader()
montgomery_split = make_montgomery_split(split_filename)
shenzhen_loader = ShenzhenLoader()
shenzhen_split = make_shenzhen_split(split_filename)
indian_loader = IndianLoader()
indian_split = make_indian_split(split_filename)
padchest_loader = PadchestLoader()
padchest_split = make_padchest_split(padchest_split_filename)
super().__init__(
splits={
"train": [
(montgomery_split["train"], montgomery_loader),
(shenzhen_split["train"], shenzhen_loader),
(indian_split["train"], indian_loader),
(padchest_split["train"], padchest_loader),
],
"validation": [
(montgomery_split["validation"], montgomery_loader),
(shenzhen_split["validation"], shenzhen_loader),
(indian_split["validation"], indian_loader),
(padchest_split["validation"], padchest_loader),
],
"test": [
(montgomery_split["test"], montgomery_loader),
(shenzhen_split["test"], shenzhen_loader),
(indian_split["test"], indian_loader),
(padchest_split["test"], padchest_loader),
],
}
)
# Copyright © 2022 Idiap Research Institute <contact@idiap.ch> # Copyright © 2022 Idiap Research Institute <contact@idiap.ch>
# #
# SPDX-License-Identifier: GPL-3.0-or-later # SPDX-License-Identifier: GPL-3.0-or-later
from .datamodule import DataModule
datamodule = DataModule("default.json", "tb-idiap.json")
"""Aggregated dataset composed of Montgomery, Shenzhen, Indian and Padchest
datasets."""
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment