diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index b7818c75c3a3c31738fb290240ac73bd43d4c280..0fc301b3a02156e97eca69a793215e1527ec8d72 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -49,11 +49,8 @@ repos: - id: check-added-large-files exclude: | (?x)( - ^src/ptbench/data/nih_cxr14_re/idiap.json| - ^src/ptbench/data/nih_cxr14_re/default.json| - ^src/ptbench/data/padchest/idiap.json| - ^src/ptbench/data/padchest/no_tb_idiap.json| - ^src/ptbench/data/padchest/no_tb_idiap.json| + ^src/ptbench/data/padchest/idiap.json.bz2| + ^src/ptbench/data/padchest/no-tb-idiap.json.bz2| ^tests/data/16bits.png| ^doc/results/img/rad_sign_drop.png ) diff --git a/pyproject.toml b/pyproject.toml index ff03e3665b67a566616c10387be8965cf3356ee6..23a5908cf088cb6bf5add2d1d0600c9e7a66fc15 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -220,19 +220,19 @@ hivtb_f9 = "ptbench.data.hivtb.fold_9" # montgomery-shenzhen-indian-padchest aggregated dataset mc_ch_in_pc = "ptbench.data.mc_ch_in_pc.default" -# NIH CXR14 (relabeled) +# NIH CXR14 (relabeled), multi-class (14 labels) nih-cxr14 = "ptbench.data.nih_cxr14.default" nih-cxr14-cardiomegaly = "ptbench.data.nih_cxr14.cardiomegaly" +# PadChest, multi-class (varied number of labels) +padchest-idiap = "ptbench.data.padchest.idiap" +padchest-tb-idiap = "ptbench.data.padchest.tb_idiap" +padchest-no-tb-idiap = "ptbench.data.padchest.no_tb_idiap" +padchest-cardiomegaly-idiap = "ptbench.data.padchest.cardiomegaly_idiap" + # NIH CXR14 / PadChest aggregated dataset nih_cxr14_pc_idiap = "ptbench.data.nih_cxr14_re_pc.idiap" -# PadChest -padchest_idiap = "ptbench.data.padchest.idiap" -padchest_tb_idiap = "ptbench.data.padchest.tb_idiap" -padchest_no_tb_idiap = "ptbench.data.padchest.no_tb_idiap" -padchest_cm_idiap = "ptbench.data.padchest.cardiomegaly_idiap" - [tool.setuptools] zip-safe = true package-dir = { "" = "src" } diff --git a/src/ptbench/data/datamodule.py b/src/ptbench/data/datamodule.py index 9b5eab61b988e65d5b7d199d1cfcfc854cd784be..0785e2332d61978373dea1a9b536559848a5c117 100644 --- a/src/ptbench/data/datamodule.py +++ b/src/ptbench/data/datamodule.py @@ -541,6 +541,11 @@ class ConcatDataModule(lightning.LightningDataModule): self.set_chunk_size(batch_size, batch_chunk_count) self.splits = splits + + for dataset_name, split_loaders in splits.items(): + count = sum([len(k) for k, _ in split_loaders]) + logger.info(f"Dataset `{dataset_name}` contains {count} samples") + self.cache_samples = cache_samples self._train_sampler = None self.balance_sampler_by_class = balance_sampler_by_class diff --git a/src/ptbench/data/image_utils.py b/src/ptbench/data/image_utils.py index b9f8c8108dd0db18a0cd796b968d26f82da49906..99df51c8ac6b6ac0425c0dbb6013ebf36bc3ab7c 100644 --- a/src/ptbench/data/image_utils.py +++ b/src/ptbench/data/image_utils.py @@ -9,26 +9,6 @@ import numpy import PIL.Image -class SingleAutoLevel16to8: - """Converts a 16-bit image to 8-bit representation using "auto-level". - - This transform assumes that the input image is gray-scaled. - - To auto-level, we calculate the maximum and the minimum of the - image, and consider such a range should be mapped to the [0,255] - range of the destination image. - """ - - def __call__(self, img): - imin, imax = img.getextrema() - irange = imax - imin - return PIL.Image.fromarray( - numpy.round( - 255.0 * (numpy.array(img).astype(float) - imin) / irange - ).astype("uint8"), - ).convert("L") - - def remove_black_borders( img: PIL.Image.Image, threshold: int = 0 ) -> PIL.Image.Image: @@ -47,19 +27,23 @@ def remove_black_borders( A PIL image with black borders removed """ - img = numpy.asarray(img) + img_array = numpy.asarray(img) - if len(img.shape) == 2: # single channel - mask = numpy.asarray(img) > threshold - return PIL.Image.fromarray(img[numpy.ix_(mask.any(1), mask.any(0))]) + if len(img_array.shape) == 2: # single channel + mask = numpy.asarray(img_array) > threshold + return PIL.Image.fromarray( + img_array[numpy.ix_(mask.any(1), mask.any(0))] + ) - elif len(img.shape) == 3 and img.shape[2] == 3: - r_mask = img[:, :, 0] > threshold - g_mask = img[:, :, 1] > threshold - b_mask = img[:, :, 2] > threshold + elif len(img_array.shape) == 3 and img_array.shape[2] == 3: + r_mask = img_array[:, :, 0] > threshold + g_mask = img_array[:, :, 1] > threshold + b_mask = img_array[:, :, 2] > threshold mask = r_mask | g_mask | b_mask - return PIL.Image.fromarray(img[numpy.ix_(mask.any(1), mask.any(0))]) + return PIL.Image.fromarray( + img_array[numpy.ix_(mask.any(1), mask.any(0))] + ) else: raise NotImplementedError diff --git a/src/ptbench/data/nih_cxr14/__init__.py b/src/ptbench/data/nih_cxr14/__init__.py index b9954cf126eae1670c87296ad86f0ca6f4f9e758..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 100644 --- a/src/ptbench/data/nih_cxr14/__init__.py +++ b/src/ptbench/data/nih_cxr14/__init__.py @@ -1,97 +0,0 @@ -# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later -"""NIH CXR14 (relabeled) dataset for computer-aided diagnosis. - -This dataset was extracted from the clinical PACS database at the National -Institutes of Health Clinical Center (USA) and represents 60% of all -their radiographs. It contains labels for fourteen common radiological -signs in this order: cardiomegaly, emphysema, effusion, hernia, infiltration, -mass, nodule, atelectasis, pneumothorax, pleural thickening, pneumonia, -fibrosis, edema and consolidation. -This is the relabeled version created in the CheXNeXt study. - -* Reference: [NIH-CXR14-2017]_ -* Original resolution (height x width or width x height): 1024 x 1024 -* Labels: [CHEXNEXT-2018]_ -* Split reference: [CHEXNEXT-2018]_ -* Protocol ``default``: - - * Training samples: 98'637 (including labels) - * Validation samples: 6'350 (including labels) - * Test samples: 0 - -* Protocol `ìdiap``: - * Images path adapted to Idiap infrastructure -""" - -import importlib.resources -import os - -from ...utils.rc import load_rc -from ..dataset import JSONDataset -from ..loader import load_pil_rgb, make_delayed - -_protocols = [ - importlib.resources.files(__name__).joinpath("default.json.bz2"), - importlib.resources.files(__name__).joinpath("cardiomegaly.json.bz2"), -] - -_datadir = load_rc().get("datadir.nih_cxr14_re", os.path.realpath(os.curdir)) -_idiap_folders = load_rc().get("nih_cxr14_re.idiap_folder_structure", False) - - -def _raw_data_loader(sample): - if _idiap_folders: - # for folder lookup efficiency, data is split into subfolders - # each original file is on the subfolder `f[:5]/f`, where f - # is the original file basename - basename = os.path.basename(sample["data"]) - return dict( - data=load_pil_rgb( - os.path.join( - _datadir, - os.path.dirname(sample["data"]), - basename[:5], - basename, - ) - ), - label=sample["label"], - ) - else: - return dict( - data=load_pil_rgb(os.path.join(_datadir, sample["data"])), - label=sample["label"], - ) - - -def _loader(context, sample): - # "context" is ignored in this case - database is homogeneous - # we returned delayed samples to avoid loading all images at once - return make_delayed(sample, _raw_data_loader) - - -json_dataset = JSONDataset( - protocols=_protocols, - fieldnames=("data", "label"), - loader=_loader, -) -"""NIH CXR14 (relabeled) dataset object.""" - - -def _maker(protocol, size=512): - import torchvision.transforms as transforms - - from .. import make_dataset - - # ImageNet normalization - normalize = transforms.Normalize( - mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225] - ) - - return make_dataset( - [json_dataset.subsets(protocol)], - [transforms.Resize((size, size))], - [transforms.RandomHorizontalFlip()], - [transforms.ToTensor(), normalize], - ) diff --git a/src/ptbench/data/padchest/__init__.py b/src/ptbench/data/padchest/__init__.py index 1151b55f28eb1a6e90310e0abc5c544991b314bb..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 100644 --- a/src/ptbench/data/padchest/__init__.py +++ b/src/ptbench/data/padchest/__init__.py @@ -1,281 +0,0 @@ -# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later -"""Padchest dataset for computer-aided diagnosis. - -A large chest x-ray image dataset with multi-label annotated reports. -This dataset includes more than 160,000 images from 67,000 patients that were -interpreted and reported by radiologists at Hospital San Juan (Spain) from 2009 -to 2017, covering six different position views and additional information on -image acquisition and patient demography. - -We keep only "PA" images here. - -* Reference: [PADCHEST-2019]_ -* Original resolution: variable, original size -* Labels: [PADCHEST-2019]_ -* Split reference: no split -* Protocol ``default``: - - * Training samples: 96'269 - * Validation samples: 0 - * Test samples: 0 - -* Protocol `ìdiap``: - * Images path adapted to Idiap infrastructure - -* Labels: - -['COPD signs', -'Chilaiditi sign', -'NSG tube', -'abnormal foreign body', -'abscess', -'adenopathy', -'air bronchogram', -'air fluid level', -'air trapping', -'alveolar pattern', -'aortic aneurysm', -'aortic atheromatosis', -'aortic button enlargement', -'aortic elongation', -'aortic endoprosthesis', -'apical pleural thickening', -'artificial aortic heart valve', -'artificial heart valve', -'artificial mitral heart valve', -'asbestosis signs', -'ascendent aortic elongation', -'atelectasis', -'atelectasis basal', -'atypical pneumonia', -'axial hyperostosis', -'azygoesophageal recess shift', -'azygos lobe', -'blastic bone lesion', -'bone cement', -'bone metastasis', -'breast mass', -'bronchiectasis', -'bronchovascular markings', -'bullas', -'calcified adenopathy', -'calcified densities', -'calcified fibroadenoma', -'calcified granuloma', -'calcified mediastinal adenopathy', -'calcified pleural plaques', -'calcified pleural thickening', -'callus rib fracture', -'cardiomegaly', -'catheter', -'cavitation', -'central vascular redistribution', -'central venous catheter', -'central venous catheter via jugular vein', -'central venous catheter via subclavian vein', -'central venous catheter via umbilical vein', -'cervical rib', -'chest drain tube', -'chronic changes', -'clavicle fracture', -'consolidation', -'costochondral junction hypertrophy', -'costophrenic angle blunting', -'cyst', -'dai', -'descendent aortic elongation', -'dextrocardia', -'diaphragmatic eventration', -'double J stent', -'dual chamber device', -'electrical device', -'emphysema', -'empyema', -'end on vessel', -'endoprosthesis', -'endotracheal tube', -'esophagic dilatation', -'exclude', -'external foreign body', -'fibrotic band', -'fissure thickening', -'flattened diaphragm', -'fracture', -'gastrostomy tube', -'goiter', -'granuloma', -'ground glass pattern', -'gynecomastia', -'heart insufficiency', -'heart valve calcified', -'hemidiaphragm elevation', -'hiatal hernia', -'hilar congestion', -'hilar enlargement', -'humeral fracture', -'humeral prosthesis', -'hydropneumothorax', -'hyperinflated lung', -'hypoexpansion', -'hypoexpansion basal', -'increased density', -'infiltrates', -'interstitial pattern', -'kerley lines', -'kyphosis', -'laminar atelectasis', -'lepidic adenocarcinoma', -'lipomatosis', -'lobar atelectasis', -'loculated fissural effusion', -'loculated pleural effusion', -'lung metastasis', -'lung vascular paucity', -'lymphangitis carcinomatosa', -'lytic bone lesion', -'major fissure thickening', -'mammary prosthesis', -'mass', -'mastectomy', -'mediastinal enlargement', -'mediastinal mass', -'mediastinal shift', -'mediastinic lipomatosis', -'metal', -'miliary opacities', -'minor fissure thickening', -'multiple nodules', -'nephrostomy tube', -'nipple shadow', -'nodule', -'non axial articular degenerative changes', -'normal', -'obesity', -'osteopenia', -'osteoporosis', -'osteosynthesis material', -'pacemaker', -'pectum carinatum', -'pectum excavatum', -'pericardial effusion', -'pleural effusion', -'pleural mass', -'pleural plaques', -'pleural thickening', -'pneumomediastinum', -'pneumonia', -'pneumoperitoneo', -'pneumothorax', -'post radiotherapy changes', -'prosthesis', -'pseudonodule', -'pulmonary artery enlargement', -'pulmonary artery hypertension', -'pulmonary edema', -'pulmonary fibrosis', -'pulmonary hypertension', -'pulmonary mass', -'pulmonary venous hypertension', -'reservoir central venous catheter', -'respiratory distress', -'reticular interstitial pattern', -'reticulonodular interstitial pattern', -'rib fracture', -'right sided aortic arch', -'round atelectasis', -'sclerotic bone lesion', -'scoliosis', -'segmental atelectasis', -'single chamber device', -'soft tissue mass', -'sternoclavicular junction hypertrophy', -'sternotomy', -'subacromial space narrowing', -'subcutaneous emphysema', -'suboptimal study', -'superior mediastinal enlargement', -'supra aortic elongation', -'surgery', -'surgery breast', -'surgery heart', -'surgery humeral', -'surgery lung', -'surgery neck', -'suture material', -'thoracic cage deformation', -'total atelectasis', -'tracheal shift', -'tracheostomy tube', -'tuberculosis', -'tuberculosis sequelae', -'unchanged', -'vascular hilar enlargement', -'vascular redistribution', -'ventriculoperitoneal drain tube', -'vertebral anterior compression', -'vertebral compression', -'vertebral degenerative changes', -'vertebral fracture', -'volume loss'] -""" - -import importlib.resources -import os - -from ...utils.rc import load_rc -from .. import make_dataset -from ..dataset import JSONDataset -from ..loader import load_pil, make_delayed - -_protocols = [ - importlib.resources.files(__name__).joinpath("idiap.json.bz2"), - importlib.resources.files(__name__).joinpath("tb_idiap.json.bz2"), - importlib.resources.files(__name__).joinpath("no_tb_idiap.json.bz2"), - importlib.resources.files(__name__).joinpath("cardiomegaly_idiap.json.bz2"), -] - -_datadir = load_rc().get("datadir.padchest", os.path.realpath(os.curdir)) - - -def _raw_data_loader(sample): - return dict( - data=load_pil(os.path.join(_datadir, sample["data"])), - label=sample["label"], - ) - - -def _loader(context, sample): - # "context" is ignored in this case - database is homogeneous - # we returned delayed samples to avoid loading all images at once - return make_delayed(sample, _raw_data_loader) - - -json_dataset = JSONDataset( - protocols=_protocols, - fieldnames=("data", "label"), - loader=_loader, -) -"""Padchest dataset object.""" - - -def _maker(protocol, resize_size=512, cc_size=512, RGB=True): - import torchvision.transforms as transforms - - from ..loader import SingleAutoLevel16to8 - - post_transforms = [] - if not RGB: - post_transforms = [transforms.Lambda(lambda x: x.convert("L"))] - - return make_dataset( - [json_dataset.subsets(protocol)], - [ - SingleAutoLevel16to8(), - transforms.Resize(resize_size), - transforms.CenterCrop(cc_size), - ], - [transforms.RandomHorizontalFlip()], - post_transforms, - ) diff --git a/src/ptbench/data/padchest/cardiomegaly_idiap.json b/src/ptbench/data/padchest/cardiomegaly_idiap.json new file mode 100644 index 0000000000000000000000000000000000000000..9487ba838f48a22c4f5c1b3f9fd280acbababdf0 --- /dev/null +++ b/src/ptbench/data/padchest/cardiomegaly_idiap.json @@ -0,0 +1,164 @@ +{ + "train": [ + [ + "0/128060267991587422580475423001969971705_pjiogk.png", + [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + ], + [ + "0/62251368659414281768550499840080640229_8jw2hb.png", + [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + ], + [ + "0/30661727075761817007267292459310975718_86nsuj.png", + [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + ], + [ + "0/84545123491560549832532130982226930123_vvayv1.png", + [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + ], + [ + "0/284353054075043225622260270287627142906_kduagg.png", + [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + ], + [ + "0/172366760559200399420370251768770096562_31ewgy.png", + [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + ], + [ + "0/87180314458623865269977662447356279507_tcw56c.png", + [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + ], + [ + "0/229647061532869405127486256298724729451_o2d0lv.png", + [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + ], + [ + "0/110057948387370363009458390264986734403-3_pbni4p.png", + [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + ], + [ + "0/110057948387370363009458390264986734403_myst7q.png", + [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + ], + [ + "0/13028631954019256091081365260375513084_9b0d5l.png", + [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + ], + [ + "0/304673909503207424801802419702123736315_vyckk1.png", + [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + ], + [ + "0/163750344543947776171909966951769207897_dgf4vk.png", + [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + ], + [ + "0/159216970546428043543933950339353602394_t0rgza.png", + [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + ], + [ + "0/220991425902150564296792633654689602163_hrvmt4.png", + [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + ], + [ + "0/227808586259025315397189404192472871023_2_ba4iu6.png", + [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + ], + [ + "0/289108620230468119174968226927683707287_9wupg2.png", + [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + ], + [ + "0/159364525876157332485573893353941089455_aepmh7.png", + [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + ], + [ + "0/262822530217543635254713021579502738735_g2v51e.png", + [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + ], + [ + "0/172167226340244841253091101374217956877_q2xlp3.png", + [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + ], + [ + "0/1256842362861431725328351539259305635_u1qifz.png", + [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + ], + [ + "0/126455653911678893263056441159987922516_uz7fwy.png", + [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + ], + [ + "0/10996416492353037588312781035930080694_8rstz0.png", + [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + ], + [ + "0/76706953811930577985930635244528105325_43rk2l.png", + [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + ], + [ + "0/232572031807344965043258496995347066691_g4zxoq.png", + [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + ], + [ + "0/184768585622747149847312236491996945269_hpn97q.png", + [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + ], + [ + "0/333078556932425505903975417870495109406_19z1qe.png", + [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + ], + [ + "0/278147989796380509519073707235611243961_fplpb6.png", + [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + ], + [ + "0/278147989796380509519073707235611243961_lgo9hx.png", + [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + ], + [ + "0/119688533980562249677219589534851011987_60uvre.png", + [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + ], + [ + "0/125061228802875769555301109909833678388_t5xvaz.png", + [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + ], + [ + "0/106111337169988693038323274011777746837_0yin7u.png", + [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + ], + [ + "0/159127361586483217046684065044208677781_lwjgop.png", + [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + ], + [ + "0/267897293741510635554583282472837799427_2_bbiu7z.png", + [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + ], + [ + "0/63510470621460583865307590457045621750_wfd1y4.png", + [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + ], + [ + "0/199932188422938481238592789979586445399_5tsy8k.png", + [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + ], + [ + "0/62774794894109549387630855543283311955_2_tdirok.png", + [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + ], + [ + "0/62774794894109549387630855543283311955_9jbiyo.png", + [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + ], + [ + "0/245895219126534788779109786698958326807_1a9ait.png", + [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + ], + [ + "0/250229194886382183353648559275366656935_x3otkq.png", + [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + ] + ] +} diff --git a/src/ptbench/data/padchest/cardiomegaly_idiap.json.bz2 b/src/ptbench/data/padchest/cardiomegaly_idiap.json.bz2 deleted file mode 100644 index 52c89dae856d7759b1955d8ece2621aacf5f72c9..0000000000000000000000000000000000000000 Binary files a/src/ptbench/data/padchest/cardiomegaly_idiap.json.bz2 and /dev/null differ diff --git a/src/ptbench/data/padchest/cardiomegaly_idiap.py b/src/ptbench/data/padchest/cardiomegaly_idiap.py index d25387388d1540e706876eef2e15543b293113e0..05286fd598569c53d625f0e2513384dac78cdfd4 100644 --- a/src/ptbench/data/padchest/cardiomegaly_idiap.py +++ b/src/ptbench/data/padchest/cardiomegaly_idiap.py @@ -1,48 +1,14 @@ # SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> # # SPDX-License-Identifier: GPL-3.0-or-later -"""Padchest cardiomegaly (idiap protocol) dataset for computer-aided diagnosis. - -The first 40 images with cardiomegaly. -parameters: Label = "Normal", MethodLabel = "Physician", Projection = "PA" - -* Split reference: first 100% of cardiomegaly for "train" -* See :py:mod:`ptbench.data.padchest` for dataset details -* This configuration resolution: 512 x 512 (default) -""" - -from clapper.logging import setup -from .. import return_subsets -from ..base_datamodule import BaseDataModule -from . import _maker +from .datamodule import DataModule -logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") - - -class DefaultModule(BaseDataModule): - def __init__( - self, - train_batch_size=1, - predict_batch_size=1, - drop_incomplete_batch=False, - multiproc_kwargs=None, - ): - super().__init__( - train_batch_size=train_batch_size, - predict_batch_size=predict_batch_size, - drop_incomplete_batch=drop_incomplete_batch, - multiproc_kwargs=multiproc_kwargs, - ) - - def setup(self, stage: str): - self.dataset = _maker("cardiomegaly_idiap", RGB=False) - ( - self.train_dataset, - self.validation_dataset, - self.extra_validation_datasets, - self.predict_dataset, - ) = return_subsets(self.dataset) +datamodule = DataModule("cardiomegaly_idiap.json") +"""Padchest cardiomegaly (idiap protocol) dataset for computer-aided diagnosis. +This split contains the first 40 images with cardiomegaly, with parameters: +Label = "Normal", MethodLabel = "Physician", Projection = "PA" -datamodule = DefaultModule +Read documentation of :py:class:`DataModule` for technical details. +""" diff --git a/src/ptbench/data/padchest/datamodule.py b/src/ptbench/data/padchest/datamodule.py new file mode 100644 index 0000000000000000000000000000000000000000..5db738887196b60ea953c107f7de2603e7a24f02 --- /dev/null +++ b/src/ptbench/data/padchest/datamodule.py @@ -0,0 +1,333 @@ +# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> +# +# SPDX-License-Identifier: GPL-3.0-or-later + +import importlib.resources +import os + +import numpy +import PIL.Image + +from torchvision.transforms.functional import center_crop, to_tensor + +from ...utils.rc import load_rc +from ..datamodule import CachingDataModule +from ..image_utils import remove_black_borders +from ..split import JSONDatabaseSplit +from ..typing import DatabaseSplit +from ..typing import RawDataLoader as _BaseRawDataLoader +from ..typing import Sample + + +class RawDataLoader(_BaseRawDataLoader): + """A specialized raw-data-loader for the PadChest dataset. + + Attributes + ---------- + + datadir + This variable contains the base directory where the database raw data + is stored. + """ + + datadir: str + + def __init__(self): + rc = load_rc() + self.datadir = rc.get("datadir.padchest", os.path.realpath(os.curdir)) + + def sample(self, sample: tuple[str, list[int]]) -> Sample: + """Loads a single image sample from the disk. + + Parameters + ---------- + + sample: + A tuple containing the path suffix, within the dataset root folder, + where to find the image to be loaded, and an integer, representing the + sample label. + + + Returns + ------- + + sample + The sample representation + """ + # N.B.: PadChest images are encoded as 16-bit grayscale images + image = PIL.Image.open(os.path.join(self.datadir, sample[0])) + image = remove_black_borders(image) + array = numpy.array(image).astype(numpy.float32) / 65536 + tensor = to_tensor(array) + tensor = center_crop(tensor, min(*tensor.shape[1:])) + + # use the code below to view generated images + # from torchvision.transforms.functional import to_pil_image + # to_pil_image(tensor).show() + # __import__("pdb").set_trace() + + return tensor, dict(label=sample[1], name=sample[0]) # type: ignore[arg-type] + + def label(self, sample: tuple[str, list[int]]) -> list[int]: + """Loads a single image sample label from the disk. + + Parameters + ---------- + + sample: + A tuple containing the path suffix, within the dataset root folder, + where to find the image to be loaded, and an integer, representing the + sample label. + + + Returns + ------- + + labels + The integer labels associated with the sample + """ + return sample[1] + + +def make_split(basename: str) -> DatabaseSplit: + """Returns a database split for the NIH CXR-14 database.""" + + return JSONDatabaseSplit( + importlib.resources.files(__name__.rsplit(".", 1)[0]).joinpath(basename) + ) + + +class DataModule(CachingDataModule): + """Padchest dataset for computer-aided diagnosis. + + A large chest x-ray image dataset with multi-label annotated reports. This + dataset includes more than 160,000 images from 67,000 patients that were + interpreted and reported by radiologists at Hospital San Juan (Spain) from + 2009 to 2017, covering six different position views and additional + information on image acquisition and patient demography. + + We keep only postero-anterior "PA" images in our setup. + + * Reference: [PADCHEST-2019]_ + * Raw data input (on disk): + + * PNG grayscale 16-bit depth images + * Resolution: varying resolution + + * Labels: [PADCHEST-2019]_ + * Output image: + + * Transforms: + + * Load raw 16-bit PNG with :py:mod:`PIL` + * Remove excess black borders + * Convert image to 32-bit floats between 0. and 1. + * Convert to tensor + * Center crop, preserving the minimum dimension (height or width) + + * Final specifications + + * Grayscale, encoded as a 1-plane 32-bit float image, square with + varying resolutions depending on the raw input image + * Labels, in order (some of which may not be present in all splits): + + * COPD signs + * Chilaiditi sign + * NSG tube + * abnormal foreign body + * abscess + * adenopathy + * air bronchogram + * air fluid level + * air trapping + * alveolar pattern + * aortic aneurysm + * aortic atheromatosis + * aortic button enlargement + * aortic elongation + * aortic endoprosthesis + * apical pleural thickening + * artificial aortic heart valve + * artificial heart valve + * artificial mitral heart valve + * asbestosis signs + * ascendent aortic elongation + * atelectasis + * atelectasis basal + * atypical pneumonia + * axial hyperostosis + * azygoesophageal recess shift + * azygos lobe + * blastic bone lesion + * bone cement + * bone metastasis + * breast mass + * bronchiectasis + * bronchovascular markings + * bullas + * calcified adenopathy + * calcified densities + * calcified fibroadenoma + * calcified granuloma + * calcified mediastinal adenopathy + * calcified pleural plaques + * calcified pleural thickening + * callus rib fracture + * cardiomegaly + * catheter + * cavitation + * central vascular redistribution + * central venous catheter + * central venous catheter via jugular vein + * central venous catheter via subclavian vein + * central venous catheter via umbilical vein + * cervical rib + * chest drain tube + * chronic changes + * clavicle fracture + * consolidation + * costochondral junction hypertrophy + * costophrenic angle blunting + * cyst + * dai + * descendent aortic elongation + * dextrocardia + * diaphragmatic eventration + * double J stent + * dual chamber device + * electrical device + * emphysema + * empyema + * end on vessel + * endoprosthesis + * endotracheal tube + * esophagic dilatation + * exclude + * external foreign body + * fibrotic band + * fissure thickening + * flattened diaphragm + * fracture + * gastrostomy tube + * goiter + * granuloma + * ground glass pattern + * gynecomastia + * heart insufficiency + * heart valve calcified + * hemidiaphragm elevation + * hiatal hernia + * hilar congestion + * hilar enlargement + * humeral fracture + * humeral prosthesis + * hydropneumothorax + * hyperinflated lung + * hypoexpansion + * hypoexpansion basal + * increased density + * infiltrates + * interstitial pattern + * kerley lines + * kyphosis + * laminar atelectasis + * lepidic adenocarcinoma + * lipomatosis + * lobar atelectasis + * loculated fissural effusion + * loculated pleural effusion + * lung metastasis + * lung vascular paucity + * lymphangitis carcinomatosa + * lytic bone lesion + * major fissure thickening + * mammary prosthesis + * mass + * mastectomy + * mediastinal enlargement + * mediastinal mass + * mediastinal shift + * mediastinic lipomatosis + * metal + * miliary opacities + * minor fissure thickening + * multiple nodules + * nephrostomy tube + * nipple shadow + * nodule + * non axial articular degenerative changes + * normal + * obesity + * osteopenia + * osteoporosis + * osteosynthesis material + * pacemaker + * pectum carinatum + * pectum excavatum + * pericardial effusion + * pleural effusion + * pleural mass + * pleural plaques + * pleural thickening + * pneumomediastinum + * pneumonia + * pneumoperitoneo + * pneumothorax + * post radiotherapy changes + * prosthesis + * pseudonodule + * pulmonary artery enlargement + * pulmonary artery hypertension + * pulmonary edema + * pulmonary fibrosis + * pulmonary hypertension + * pulmonary mass + * pulmonary venous hypertension + * reservoir central venous catheter + * respiratory distress + * reticular interstitial pattern + * reticulonodular interstitial pattern + * rib fracture + * right sided aortic arch + * round atelectasis + * sclerotic bone lesion + * scoliosis + * segmental atelectasis + * single chamber device + * soft tissue mass + * sternoclavicular junction hypertrophy + * sternotomy + * subacromial space narrowing + * subcutaneous emphysema + * suboptimal study + * superior mediastinal enlargement + * supra aortic elongation + * surgery + * surgery breast + * surgery heart + * surgery humeral + * surgery lung + * surgery neck + * suture material + * thoracic cage deformation + * total atelectasis + * tracheal shift + * tracheostomy tube + * tuberculosis + * tuberculosis sequelae + * unchanged + * vascular hilar enlargement + * vascular redistribution + * ventriculoperitoneal drain tube + * vertebral anterior compression + * vertebral compression + * vertebral degenerative changes + * vertebral fracture + * volume loss + """ + + def __init__(self, split_filename: str): + super().__init__( + database_split=make_split(split_filename), + raw_data_loader=RawDataLoader(), + ) diff --git a/src/ptbench/data/padchest/idiap.json.bz2 b/src/ptbench/data/padchest/idiap.json.bz2 index 8dfa755141a0a2a641e2e1ae2b6cdc972151e3ca..d1594934dcc33507729c3a372e2def7977395975 100644 Binary files a/src/ptbench/data/padchest/idiap.json.bz2 and b/src/ptbench/data/padchest/idiap.json.bz2 differ diff --git a/src/ptbench/data/padchest/idiap.py b/src/ptbench/data/padchest/idiap.py index 284a944d0d83bd933123d63732f4f045549084d3..ea8b3dbd24d6fdf770f38c3cde7848687090935c 100644 --- a/src/ptbench/data/padchest/idiap.py +++ b/src/ptbench/data/padchest/idiap.py @@ -1,44 +1,17 @@ # SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> # # SPDX-License-Identifier: GPL-3.0-or-later -"""Padchest (idiap protocol) dataset for computer-aided diagnosis. -* See :py:mod:`ptbench.data.padchest` for dataset details -* This configuration resolution: 512 x 512 (default) -""" - -from clapper.logging import setup - -from .. import return_subsets -from ..base_datamodule import BaseDataModule -from . import _maker - -logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") +from .datamodule import DataModule +datamodule = DataModule("idiap.json.bz2") +"""Padchest dataset for computer-aided diagnosis (``idiap`` split). -class DefaultModule(BaseDataModule): - def __init__( - self, - train_batch_size=1, - predict_batch_size=1, - drop_incomplete_batch=False, - multiproc_kwargs=None, - ): - super().__init__( - train_batch_size=train_batch_size, - predict_batch_size=predict_batch_size, - drop_incomplete_batch=drop_incomplete_batch, - multiproc_kwargs=multiproc_kwargs, - ) +This split contains all images in the database. Read documentation of +:py:class:`DataModule` for technical details. - def setup(self, stage: str): - self.dataset = _maker("idiap") - ( - self.train_dataset, - self.validation_dataset, - self.extra_validation_datasets, - self.predict_dataset, - ) = return_subsets(self.dataset) - - -datamodule = DefaultModule +* Split reference: ours + * Training samples: 96'269 + * Validation samples: ? + * Test samples: ? +""" diff --git a/src/ptbench/data/padchest/no-tb-idiap.json.bz2 b/src/ptbench/data/padchest/no-tb-idiap.json.bz2 new file mode 100644 index 0000000000000000000000000000000000000000..f7b7eefec64a896dc6208dbf887429e1c77e8d99 Binary files /dev/null and b/src/ptbench/data/padchest/no-tb-idiap.json.bz2 differ diff --git a/src/ptbench/data/padchest/no_tb_idiap.json.bz2 b/src/ptbench/data/padchest/no_tb_idiap.json.bz2 deleted file mode 100644 index 08222cbede330a78a8c6248647b84d4e804d8f97..0000000000000000000000000000000000000000 Binary files a/src/ptbench/data/padchest/no_tb_idiap.json.bz2 and /dev/null differ diff --git a/src/ptbench/data/padchest/no_tb_idiap.py b/src/ptbench/data/padchest/no_tb_idiap.py index a424e0fe46890b899804aeac954cb3bd710429ae..62e4b56bee3046c63d13ea7f8cf7bbf848616a96 100644 --- a/src/ptbench/data/padchest/no_tb_idiap.py +++ b/src/ptbench/data/padchest/no_tb_idiap.py @@ -1,10 +1,11 @@ # SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> # # SPDX-License-Identifier: GPL-3.0-or-later -"""Padchest tuberculosis (no TB idiap protocol) dataset for computer-aided -diagnosis. -* Protocol ``no TB idiap``: +from .datamodule import DataModule + +datamodule = DataModule("no-tb-idiap.json.bz2") +"""Padchest dataset for computer-aided diagnosis (``no-tb-idiap`` split). * Training samples: 20'126 * Validation samples: 1'500 @@ -12,44 +13,21 @@ diagnosis. * Images path adapted to Idiap infrastructure -* Labels: - cardiomegaly, emphysema, effusion, hernia, infiltration, - mass, nodule, atelectasis, pneumothorax, pleural thickening, pneumonia, - fibrosis, edema and consolidation +* Labels (in order): + + * cardiomegaly + * emphysema + * effusion + * hernia + * infiltration + * mass + * nodule + * atelectasis + * pneumothorax + * pleural thickening + * pneumonia + * fibrosis + * edema and consolidation + +Read documentation of :py:class:`DataModule` for technical details. """ - -from clapper.logging import setup - -from .. import return_subsets -from ..base_datamodule import BaseDataModule -from . import _maker - -logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") - - -class DefaultModule(BaseDataModule): - def __init__( - self, - train_batch_size=1, - predict_batch_size=1, - drop_incomplete_batch=False, - multiproc_kwargs=None, - ): - super().__init__( - train_batch_size=train_batch_size, - predict_batch_size=predict_batch_size, - drop_incomplete_batch=drop_incomplete_batch, - multiproc_kwargs=multiproc_kwargs, - ) - - def setup(self, stage: str): - self.dataset = _maker("no_tb_idiap") - ( - self.train_dataset, - self.validation_dataset, - self.extra_validation_datasets, - self.predict_dataset, - ) = return_subsets(self.dataset) - - -datamodule = DefaultModule diff --git a/src/ptbench/data/padchest/tb-idiap.json b/src/ptbench/data/padchest/tb-idiap.json new file mode 100644 index 0000000000000000000000000000000000000000..cfa02579b34a25d548eef6c184a1e5861ecd93d3 --- /dev/null +++ b/src/ptbench/data/padchest/tb-idiap.json @@ -0,0 +1,256 @@ +{ + "train": [ + ["0/287829261582195482088446848654649898554_hiv2dj.png", 1], + ["0/71781238823267640402215377532202403315_pxkotz.png", 1], + ["0/232932494651352886935187910213725327397_542k5d.png", 1], + ["0/267611385326465042665315112692206586005_-4xiny.png", 1], + ["0/230116782779421561454466256768554783517_cg49ag.png", 1], + ["1/189304348002497677549254597676333895442_nr95nw.png", 1], + ["1/284083233950421891710386954928396720724_iisntp.png", 1], + ["2/62385505932300594213076222597583809293_o4nauv.png", 1], + ["3/327929900101827184624358086536821590319_p7mhmy.png", 1], + ["3/219525789228894168524584993087468671210_qsb22z.png", 1], + ["3/181608309586604103724035528082311350123_hrywbo.png", 1], + ["4/198010695012203250912000310773676281863_e8fo5o.png", 1], + ["5/94154745525773362638891358070604381275_xu2omi.png", 1], + ["5/94154745525773362638891358070604381275_2_mobm9i.png", 1], + ["6/149448599615132658944902642295667120353_yhnikj.png", 1], + ["7/206757321508676009173922966730015242816_e9ivki.png", 1], + ["8/142686622787446619811118950710490862534_t13f05.png", 1], + ["8/338359343930739545025011516763914060805_cyusbr.png", 1], + ["9/32638629892553023971410131828929042060_0j8zfo.png", 1], + ["9/76287465797375088242663014918307149350_7y75c7.png", 1], + ["9/30340071673860518284580071098941265573_28fovf.png", 1], + ["9/133834049169242825047461122757328574656_hxq15r.png", 1], + ["10/130547094371746834245750660340603513752_2czlu0.png", 1], + ["10/71032888991710211087565881905995666443_dywshe.png", 1], + ["11/196949910603117298426935955787507289138_dnn2xk.png", 1], + ["11/333486394263212957144509987040971957354_5ik24x.png", 1], + ["11/223149869738399986911371339321970392001_k9oayh.png", 1], + ["54/28753966717384878552172419373198893194_z3a3sb.png", 1], + ["54/214014914699503892891500996873152128828_wbgkx9.png", 1], + ["54/195602194021807720669201170455273870892_2_nx9dfb.png", 1], + ["12/216840111366964013829543166512013301133058725_02-087-015.png", 1], + ["12/216840111366964013686042548532013221094406052_02-000-159.png", 1], + ["13/216840111366964013686042548532013189095559443_02-026-060.png", 1], + ["18/216840111366964012809176623042010216102115882_02-122-089.png", 1], + ["18/216840111366964013590140476722013038115344442_02-068-006.png", 1], + ["18/216840111366964013829543166512013349121121083_02-090-198.png", 1], + ["19/216840111366964012809176623042010216124251453_02-121-140.png", 1], + ["19/216840111366964013829543166512013337132636807_02-092-110.png", 1], + ["19/216840111366964013575787468332013022125651679_02-013-087.png", 1], + ["19/216840111366964013686042548532013203113624116_02-031-186.png", 1], + ["20/216840111366964013076187734852011167042130729_01-012-195.png", 1], + ["21/216840111366964013375835044302012149093442693_01-142-098.png", 1], + ["21/216840111366964014008416513202014182103505798_01-162-103.png", 1], + ["21/216840111366964013307756408102012074125628561_01-091-158.png", 1], + ["22/216840111366964013534861372972012334122542714_01-153-110.png", 1], + ["22/216840111366964013534861372972012334174219300_01-153-139.png", 1], + ["22/216840111366964013076187734852011179190106015_01-020-037.png", 1], + ["23/216840111366964013375835044302012145133553460_01-066-089.png", 1], + ["23/216840111366964013336287914842012096133241846_01-107-077.png", 1], + ["24/216840111366964013217898866992011333091555050_01-027-188.png", 1], + ["24/216840111366964013451228379692012281091656994_01-117-103.png", 1], + ["25/216840111366964013307756408102012065122438760_01-086-131.png", 1], + ["26/216840111366964013274515230432012025083713232_01-043-028.png", 1], + ["27/216840111366964013274515230432012027125506724_01-097-014.png", 1], + ["28/216840111366964013962490064942014118114434097_01-188-109.png", 1], + ["28/216840111366964013076187734852011172122834037_01-021-101.png", 1], + ["29/216840111366964013402131755672012172160839897_01-136-125.png", 1], + ["29/216840111366964013534861372972012345121350712_01-136-073.png", 1], + ["29/216840111366964013217898866992012016124428082_01-102-080.png", 1], + ["31/216840111366964013962490064942014125151556277_01-172-070.png", 1], + ["32/216840111366964012558082906712010126095539029_03-150-186.png", 1], + ["32/216840111366964012734950068292010132105523907_03-150-117.png", 1], + ["32/216840111366964012819207061112010251103013727_03-130-173.png", 1], + ["32/216840111366964012768025509942010193092643957_03-140-129.png", 1], + ["33/216840111366964012734950068292010144122120771_03-148-189.png", 1], + ["33/216840111366964012922382741642010364091957451_03-196-136.png", 1], + ["34/216840111366964012558082906712010123085205684_03-146-167.png", 1], + ["35/216840111366964012558082906712010096135413767_03-182-119.png", 1], + ["36/216840111366964012922382741642010361094534387_03-195-044.png", 1], + ["36/216840111366964012283393834152009029134933991_00-010-044.png", 1], + ["37/216840111366964012989926673512011131130808585_00-155-122.png", 1], + ["37/216840111366964012283393834152009026094739380_00-012-198.png", 1], + ["37/216840111366964012373310883942009201083307239_00-029-101.png", 1], + ["37/216840111366964012373310883942009201094902325_00-029-010.png", 1], + ["37/216840111366964012373310883942009201110112337_00-029-103.png", 1], + ["37/216840111366964012373310883942009205080440759_00-026-186.png", 1], + ["37/216840111366964012373310883942009187120837458_00-033-022.png", 1], + ["37/216840111366964013076187734852011200083643751_00-142-079.png", 1], + ["38/216840111366964013076187734852011272200326669_00-134-107.png", 1], + ["38/216840111366964013076187734852011196103810560_00-144-029.png", 1], + ["38/216840111366964013076187734852011193131842486_00-144-131.png", 1], + ["38/216840111366964012989926673512011145084722078_00-150-081.png", 1], + ["38/216840111366964012959786098432011042111403479_00-180-113.png", 1], + ["38/216840111366964012283393834152009034124021993_00-056-037.png", 1], + ["38/216840111366964012558082906712009299150550234_00-080-099.png", 1], + ["39/216840111366964012373310883942009146100514519_00-040-128.png", 1], + ["39/216840111366964012373310883942009145120705372_00-036-050.png", 1], + ["39/216840111366964012487858717522009272091546479_00-020-094.png", 1], + ["39/216840111366964012487858717522009219105038840_00-001-025.png", 1], + ["39/216840111366964012487858717522009219113307532_00-001-048.png", 1], + ["39/216840111366964012373310883942009092152630829_00-069-050.png", 1], + ["40/216840111366964012339356563862009047140513629_00-053-012.png", 1], + ["40/216840111366964012373310883942009145124513316_00-035-198.png", 1], + ["40/1275224347932024878238_00-099-157.png", 1], + ["42/216840111366964013076187734852011221130705130_00-114-119.png", 1], + ["43/216840111366964012959786098432011040111031806_00-173-078.png", 1], + ["43/216840111366964012959786098432011040111031806_00-173-030.png", 1], + ["43/216840111366964012959786098432011040111031806_00-173-025.png", 1], + ["43/216840111366964012558082906712009323164641947_00-106-182.png", 1], + ["43/216840111366964013076187734852011185152425386_00-146-061.png", 1], + ["0/20536686640136348236148679891455886468_k6ga29.png", 0], + ["0/238285621348398466668514178112618553012_a7k6dv.png", 0], + ["0/152191969602076825998375638267191596461_ck9qkz.png", 0], + ["0/84289138814897824414670894017743137660_xjsw7k.png", 0], + ["0/258055173326723517262722952856353517225_gxjwwm.png", 0], + ["0/243837679625088918208093479515843617254_znbkkx.png", 0], + ["0/337234085099221143119428528401779812468_1hc03o.png", 0], + ["0/12582988208641810053552022080964044412_bdbfde.png", 0], + ["0/120005774206062253919592068222866365316_5rzk13.png", 0], + ["0/51753302240077846214410306432856713330_w0bmgz.png", 0], + ["0/151619064599028082995709172863126236342_adr7uc.png", 0], + ["0/97965053291113766610970450200683915158_ta7i4n.png", 0], + ["0/182408105675910680785179281207852452925_ybx7hr.png", 0], + ["0/319976350840661489788378306853554168892_is6ipx.png", 0], + ["0/173348027872651650413607534885352239722_ksfbkb.png", 0], + ["0/267593312931260619142226905522973356507_dfimnx.png", 0], + ["0/105591880985588572332898967525311229749_yqjtuk.png", 0], + ["0/330324745759424821132494701991767497957_9fc9au.png", 0], + ["0/175828040998318011818448557383860207677_m5a8ih.png", 0], + ["0/101408931302974623103956952273007774313_9stb1t.png", 0], + ["0/316808028365561931793158511235924256690_01sv6w.png", 0], + ["0/121722089111950575741877258740783632351_55kku6.png", 0], + ["0/257363608869672643452639640460584331841_2m9azj.png", 0], + ["0/278948104568698034857207837457891721065_z574go.png", 0], + ["0/153144722300895942937262007594583779219_ohw52d.png", 0], + ["0/125998750830791689348030012609205089696_r0z2uf.png", 0], + ["0/49951034854725910980631707796081134817_38f62l.png", 0], + ["0/316487933899836980515723069532430938491_jsr618.png", 0], + ["0/160591118781379370017611727315537422584_8sb33g.png", 0], + ["0/126021751903391108689458989626008942485_26b1rr.png", 0], + ["0/49031120128502913776108011441053379453_3qyvw8.png", 0], + ["0/319750193216879761537585377422638859654_tq1iik.png", 0], + ["0/42728540250863331876520898382409588161_o4q8sm.png", 0], + ["0/20450229093471036935089654431118253290_fsak6z.png", 0], + ["0/237085130674557205383750650934770251154_kr2cfo.png", 0], + ["0/4378551502088567463362725870936911970_tcb27t.png", 0], + ["0/235156922651079386633702235134872520572_1kcee2.png", 0], + ["0/244098485281969800050011620079615032187_kmiw7o.png", 0], + ["0/15257471714388675949620391557999378309_vyin8e.png", 0], + ["0/330657416961603932280429915924322632486_umk9rt.png", 0], + ["0/223182426517840299226527275743732068184_spng8r.png", 0], + ["0/223805166479327036969696654106527616531_2_t9llp8.png", 0], + ["0/187748529663266333212518173684958471015_ittz4x.png", 0], + ["0/52863917336388719685790911566203013345_8ctt1b.png", 0], + ["0/298250519825265668757842559853876298941_d6js3t.png", 0], + ["0/53271040340917494534931731766099840496_bzweu7.png", 0], + ["0/12130328430935630448520149687013276863_99k39n.png", 0], + ["0/62099871704902880343187697364243424587_5pzx3y.png", 0], + ["0/65266590064606711020514044534829206992_emiz6y.png", 0], + ["0/203867956156632398461581965823374154403_hcfaj6.png", 0], + ["0/239027903366999274333251661962927457884_0f0trw.png", 0], + ["0/306704549447961580693205362675398392203_lug40p.png", 0], + ["0/306704549447961580693205362675398392203_3_qxsdo7.png", 0], + ["0/43974812568761155667942095366465446895_3ninvk.png", 0], + ["0/74576479405710985759203367628923301316_i9epl9.png", 0], + ["0/130110969611250982875259631791477091714_jxnrnp.png", 0], + ["0/150762056036635610657089202556643673250_rczqej.png", 0], + ["0/255293910021372270231348852078918972406_gczlpx.png", 0], + ["0/48039780599266839142703323004089311640_jui3wg.png", 0], + ["0/176702947772428028299305495478494543149_wkumlt.png", 0], + ["0/339378129561197015481434844854643771030_uq3k6q.png", 0], + ["0/287026865546374889085157036246558460511_l0lxwq.png", 0], + ["0/172555436098471526561614627462201474466_6rvch6.png", 0], + ["0/79355438231207002902084777212886277284_k1cyrv.png", 0], + ["0/314277032586409115721922718327642931728_vi0hn8.png", 0], + ["0/291629452559210611519089695617689344215_c9p3gg.png", 0], + ["0/25528567975108302073238456043150484827_d4odva.png", 0], + ["0/61569799595446412488213405581720832033_b84v2f.png", 0], + ["0/200862359831761685992552819450233793891_z09t4i.png", 0], + ["0/75078999225025048286726158739709244898_clh5ow.png", 0], + ["0/159690112140413823244967667566418197322_nrua7y.png", 0], + ["0/334589562203518771379665149239316362968_3nh7ym.png", 0], + ["0/291366571583131755848449851925170824781_ce0bx6.png", 0], + ["0/305453048517584511909927361247180991655_2x71z9.png", 0], + ["0/44536207964452859015038957006687811448_noeqf7.png", 0], + ["0/214260817085057432303623958907855926368_jrl28m.png", 0], + ["0/170868574132883919470570081008800440313_0zq8qv.png", 0], + ["0/28041611833871250677444368303361730215_ff13fg.png", 0], + ["0/90464809371777154323219683309097631033_g7ks8z.png", 0], + ["0/339511729523635430557303404954216047272_rwrvt6.png", 0], + ["0/26244571749755521880111325776337650586_zofmec.png", 0], + ["0/26244571749755521880111325776337650586_0mezs6.png", 0], + ["0/169811598307650468944273653237225758377_ri69fd.png", 0], + ["0/72598842051928771977753725274938862160_2pamb0.png", 0], + ["0/229495322771160678933819106193415737007_6298bo.png", 0], + ["0/265767708505416563059027799932479206802_purml1.png", 0], + ["0/255305021945238682284369757239439639240_1gk2z6.png", 0], + ["0/208887182360953191530026578700363773829_qgx13z.png", 0], + ["0/188512239145790616137268274285581986225_efhnpk.png", 0], + ["0/323279587618131957350287102063368542386_269oky.png", 0], + ["0/28387517755754605410664016700168547553_ui2lcv.png", 0], + ["0/77914334473380412364913757909337796966_x0exbf.png", 0], + ["0/35040582196710953341366337559603278309_mks08j.png", 0], + ["0/32078055345180912106051534833637957932_fkcmpo.png", 0], + ["0/215216312896434609819729304269555722027_9i3jyg.png", 0], + ["0/14747842373519304109072114951815645766_kjxsxw.png", 0], + ["0/300765837088870057523971311706854661569_55ii9c.png", 0], + ["0/102102747557522113902850993567166035648_-5a78l.png", 0], + ["0/47481551763215566757392610054682744939_3azhzs.png", 0], + ["0/125766228454316130624782540198484575895_9ha77d.png", 0] + ], + "test": [ + ["44/216840111366964012373310883942009170111625797_00-097-039.png", 1], + ["44/216840111366964012373310883942009202102102178_00-034-039.png", 1], + ["44/216840111366964012373310883942009187123652081_00-032-157.png", 1], + ["44/216840111366964012373310883942009187171059045_00-032-135.png", 1], + ["44/216840111366964012373310883942009187120903052_00-032-141.png", 1], + ["44/216840111366964012373310883942009187120813381_00-032-175.png", 1], + ["44/216840111366964012373310883942009082135558614_00-049-056.png", 1], + ["45/216840111366964012487858717522009218100750234_00-005-054.png", 1], + ["45/216840111366964012373310883942009117084022290_00-063-162.png", 1], + ["45/216840111366964012487858717522009231091732422_00-003-050.png", 1], + ["45/216840111366964012373310883942009159112500535_00-093-077.png", 1], + ["45/216840111366964012373310883942009132135802286_00-041-118.png", 1], + ["45/216840111366964012558082906712009302095926650_00-078-157.png", 1], + ["45/216840111366964012959786098432011038093625941_00-174-028.png", 1], + ["45/216840111366964012989926673512011097174442376_00-183-163.png", 1], + ["46/216840111366964012283393834152009020133704931_00-014-063.png", 1], + ["47/1275224347932024875489_00-095-072.png", 1], + ["47/216840111366964012487858717522009281094308471_00-073-123.png", 1], + ["47/216840111366964012373310883942009159103757279_00-092-099.png", 1], + ["47/216840111366964013076187734852011187170414896_00-145-078.png", 1], + ["47/216840111366964013076187734852011187170414896_00-145-192.png", 1], + ["47/12752243479320241414701_00-171-104.png", 1], + ["47/216840111366964012989926673512011097174442376_00-184-039.png", 1], + ["48/216840111366964012922382741642010355113626877_04-000-161.png", 1], + ["49/216840111366964012819207061112010321104541236_04-018-166.png", 1], + ["0/233506623954258136177168751019496286188_muxzw8.png", 0], + ["0/257273696639795055440516119201724869708_szlui2.png", 0], + ["0/126887219835336055655888692181690517955_1uyqv0.png", 0], + ["0/234216022935307048216299336991419380952_sgrlgf.png", 0], + ["0/335952795723413338158135725278292632828_iepbz5.png", 0], + ["0/82090431388093551321398783174196866128_tr2g8h.png", 0], + ["0/198687427221787561855370297023668306242_260owe.png", 0], + ["0/68579354598156854599040192419227509037_koc7xf.png", 0], + ["0/144478523611315461003350014835868422251_2_s8dluj.png", 0], + ["0/302929243006484152942115282128804026168_n68554.png", 0], + ["0/164834710188971445829128771258574408035_121u4x.png", 0], + ["0/89135767289800763526891535832874263112_j11bkl.png", 0], + ["0/289813269991598332532654906677599208958_u5qqk4.png", 0], + ["0/164997689015514390292649262625407524157_5p3hbr.png", 0], + ["0/284969877609993327035605642598902430366_71qh8x.png", 0], + ["0/183325708478983298834864550015860941024_yo7ozp.png", 0], + ["0/168265719116971404186158893567526651554_bu6ryj.png", 0], + ["0/143131945457640249494238411094875089491_nb0guj.png", 0], + ["0/242348865558077717792572639139276481233_tb3asf.png", 0], + ["0/200494933217168186622620771086149301425_xwagt4.png", 0], + ["0/217342025624397376056025865389719461061_0s1x0d.png", 0], + ["0/339445012265588569253208175476185397432_y7ahfu.png", 0], + ["0/91815483074912963411621985384575850871_-ja50t.png", 0], + ["0/215041224245905944888939370272745392061_993y3a.png", 0], + ["0/175965847113500546623993767251681877654_9dyncn.png", 0] + ] +} diff --git a/src/ptbench/data/padchest/tb_idiap.json.bz2 b/src/ptbench/data/padchest/tb_idiap.json.bz2 deleted file mode 100644 index 99d0bb971acef3ee860be789209985da6d96fed3..0000000000000000000000000000000000000000 Binary files a/src/ptbench/data/padchest/tb_idiap.json.bz2 and /dev/null differ diff --git a/src/ptbench/data/padchest/tb_idiap.py b/src/ptbench/data/padchest/tb_idiap.py index 1a2516202e676386d149fbfc684e3b93e805978e..69c2708b6872c6a2dbc3a71ab88d2d41b9e4c5e1 100644 --- a/src/ptbench/data/padchest/tb_idiap.py +++ b/src/ptbench/data/padchest/tb_idiap.py @@ -1,48 +1,15 @@ # SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> # # SPDX-License-Identifier: GPL-3.0-or-later -"""Padchest tuberculosis (idiap protocol) dataset for computer-aided diagnosis. -The 125 healthy images are the first 125 padchest images with the following -parameters: Label = "Normal", MethodLabel = "Physician", Projection = "PA" +from .datamodule import DataModule -* Split reference: first 80% of TB and healthy CXR for "train", rest for "test" -* See :py:mod:`ptbench.data.padchest` for dataset details -* This configuration resolution: 512 x 512 (default) -""" - -from clapper.logging import setup - -from .. import return_subsets -from ..base_datamodule import BaseDataModule -from . import _maker - -logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") +datamodule = DataModule("tb-idiap.json") +"""Padchest dataset for computer-aided diagnosis (``tb-idiap`` split). +This split contains 125 healthy images are the first 125 padchest images with +the following parameters: Label = "Normal", MethodLabel = "Physician", +Projection = "PA" -class DefaultModule(BaseDataModule): - def __init__( - self, - train_batch_size=1, - predict_batch_size=1, - drop_incomplete_batch=False, - multiproc_kwargs=None, - ): - super().__init__( - train_batch_size=train_batch_size, - predict_batch_size=predict_batch_size, - drop_incomplete_batch=drop_incomplete_batch, - multiproc_kwargs=multiproc_kwargs, - ) - - def setup(self, stage: str): - self.dataset = _maker("tb_idiap", RGB=False) - ( - self.train_dataset, - self.validation_dataset, - self.extra_validation_datasets, - self.predict_dataset, - ) = return_subsets(self.dataset) - - -datamodule = DefaultModule +Read documentation of :py:class:`DataModule` for technical details. +"""