From 819a43801e9eef7f948bcefa35e67f01bf38e750 Mon Sep 17 00:00:00 2001 From: Andre Anjos <andre.dos.anjos@gmail.com> Date: Thu, 27 Jul 2023 20:18:01 +0200 Subject: [PATCH] [data.indian] Re-structure database to new format; Uncompress json files for easier maintenance --- src/ptbench/data/indian/__init__.py | 84 ------------ src/ptbench/data/indian/datamodule.py | 51 +++++++ src/ptbench/data/indian/datamodules.py | 46 ------- src/ptbench/data/indian/default.json | 163 +++++++++++++++++++++++ src/ptbench/data/indian/default.json.bz2 | Bin 428 -> 0 bytes src/ptbench/data/indian/default.py | 7 + src/ptbench/data/indian/fold_0.json | 163 +++++++++++++++++++++++ src/ptbench/data/indian/fold_0.json.bz2 | Bin 439 -> 0 bytes src/ptbench/data/indian/fold_0.py | 43 +----- src/ptbench/data/indian/fold_1.json | 163 +++++++++++++++++++++++ src/ptbench/data/indian/fold_1.json.bz2 | Bin 431 -> 0 bytes src/ptbench/data/indian/fold_1.py | 43 +----- src/ptbench/data/indian/fold_2.json | 163 +++++++++++++++++++++++ src/ptbench/data/indian/fold_2.json.bz2 | Bin 438 -> 0 bytes src/ptbench/data/indian/fold_2.py | 43 +----- src/ptbench/data/indian/fold_3.json | 163 +++++++++++++++++++++++ src/ptbench/data/indian/fold_3.json.bz2 | Bin 442 -> 0 bytes src/ptbench/data/indian/fold_3.py | 43 +----- src/ptbench/data/indian/fold_4.json | 163 +++++++++++++++++++++++ src/ptbench/data/indian/fold_4.json.bz2 | Bin 437 -> 0 bytes src/ptbench/data/indian/fold_4.py | 43 +----- src/ptbench/data/indian/fold_5.json | 163 +++++++++++++++++++++++ src/ptbench/data/indian/fold_5.json.bz2 | Bin 435 -> 0 bytes src/ptbench/data/indian/fold_5.py | 43 +----- src/ptbench/data/indian/fold_6.json | 163 +++++++++++++++++++++++ src/ptbench/data/indian/fold_6.json.bz2 | Bin 432 -> 0 bytes src/ptbench/data/indian/fold_6.py | 43 +----- src/ptbench/data/indian/fold_7.json | 163 +++++++++++++++++++++++ src/ptbench/data/indian/fold_7.json.bz2 | Bin 437 -> 0 bytes src/ptbench/data/indian/fold_7.py | 43 +----- src/ptbench/data/indian/fold_8.json | 163 +++++++++++++++++++++++ src/ptbench/data/indian/fold_8.json.bz2 | Bin 437 -> 0 bytes src/ptbench/data/indian/fold_8.py | 43 +----- src/ptbench/data/indian/fold_9.json | 163 +++++++++++++++++++++++ src/ptbench/data/indian/fold_9.json.bz2 | Bin 439 -> 0 bytes src/ptbench/data/indian/fold_9.py | 43 +----- 36 files changed, 1871 insertions(+), 540 deletions(-) create mode 100644 src/ptbench/data/indian/datamodule.py delete mode 100644 src/ptbench/data/indian/datamodules.py create mode 100644 src/ptbench/data/indian/default.json delete mode 100644 src/ptbench/data/indian/default.json.bz2 create mode 100644 src/ptbench/data/indian/default.py create mode 100644 src/ptbench/data/indian/fold_0.json delete mode 100644 src/ptbench/data/indian/fold_0.json.bz2 create mode 100644 src/ptbench/data/indian/fold_1.json delete mode 100644 src/ptbench/data/indian/fold_1.json.bz2 create mode 100644 src/ptbench/data/indian/fold_2.json delete mode 100644 src/ptbench/data/indian/fold_2.json.bz2 create mode 100644 src/ptbench/data/indian/fold_3.json delete mode 100644 src/ptbench/data/indian/fold_3.json.bz2 create mode 100644 src/ptbench/data/indian/fold_4.json delete mode 100644 src/ptbench/data/indian/fold_4.json.bz2 create mode 100644 src/ptbench/data/indian/fold_5.json delete mode 100644 src/ptbench/data/indian/fold_5.json.bz2 create mode 100644 src/ptbench/data/indian/fold_6.json delete mode 100644 src/ptbench/data/indian/fold_6.json.bz2 create mode 100644 src/ptbench/data/indian/fold_7.json delete mode 100644 src/ptbench/data/indian/fold_7.json.bz2 create mode 100644 src/ptbench/data/indian/fold_8.json delete mode 100644 src/ptbench/data/indian/fold_8.json.bz2 create mode 100644 src/ptbench/data/indian/fold_9.json delete mode 100644 src/ptbench/data/indian/fold_9.json.bz2 diff --git a/src/ptbench/data/indian/__init__.py b/src/ptbench/data/indian/__init__.py index 889cf513..e69de29b 100644 --- a/src/ptbench/data/indian/__init__.py +++ b/src/ptbench/data/indian/__init__.py @@ -1,84 +0,0 @@ -# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""Indian collection dataset for computer-aided diagnosis. - -The Indian collection database has been established to foster research -in computer-aided diagnosis of pulmonary diseases with a special -focus on pulmonary tuberculosis (TB). - -* Reference: [INDIAN-2013]_ -* Original resolution (height x width or width x height): more than 1024 x 1024 -* Split reference: [INDIAN-2013]_ with 20% of train set for the validation set -""" - -import importlib.resources -import os - -from ...utils.rc import load_rc -from .. import make_dataset -from ..dataset import JSONDataset -from ..loader import load_pil_grayscale, make_delayed - -_protocols = [ - importlib.resources.files(__name__).joinpath("default.json.bz2"), - importlib.resources.files(__name__).joinpath("fold_0.json.bz2"), - importlib.resources.files(__name__).joinpath("fold_1.json.bz2"), - importlib.resources.files(__name__).joinpath("fold_2.json.bz2"), - importlib.resources.files(__name__).joinpath("fold_3.json.bz2"), - importlib.resources.files(__name__).joinpath("fold_4.json.bz2"), - importlib.resources.files(__name__).joinpath("fold_5.json.bz2"), - importlib.resources.files(__name__).joinpath("fold_6.json.bz2"), - importlib.resources.files(__name__).joinpath("fold_7.json.bz2"), - importlib.resources.files(__name__).joinpath("fold_8.json.bz2"), - importlib.resources.files(__name__).joinpath("fold_9.json.bz2"), -] - -_datadir = load_rc().get("datadir.indian", os.path.realpath(os.curdir)) - - -def _raw_data_loader(sample): - return dict( - data=load_pil_grayscale(os.path.join(_datadir, sample["data"])), - label=sample["label"], - ) - - -def _loader(context, sample): - # "context" is ignored in this case - database is homogeneous - # we returned delayed samples to avoid loading all images at once - return make_delayed(sample, _raw_data_loader) - - -json_dataset = JSONDataset( - protocols=_protocols, - fieldnames=("data", "label"), - loader=_loader, -) -"""Indian dataset object.""" - - -def _maker(protocol, resize_size=512, cc_size=512, RGB=False): - from torchvision import transforms - - from ..augmentations import ElasticDeformation - from ..image_utils import RemoveBlackBorders - - post_transforms = [] - if RGB: - post_transforms = [ - transforms.Lambda(lambda x: x.convert("RGB")), - transforms.ToTensor(), - ] - - return make_dataset( - [json_dataset.subsets(protocol)], - [ - RemoveBlackBorders(), - transforms.Resize(resize_size), - transforms.CenterCrop(cc_size), - ], - [ElasticDeformation(p=0.8)], - post_transforms, - ) diff --git a/src/ptbench/data/indian/datamodule.py b/src/ptbench/data/indian/datamodule.py new file mode 100644 index 00000000..7042a0c4 --- /dev/null +++ b/src/ptbench/data/indian/datamodule.py @@ -0,0 +1,51 @@ +# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> +# +# SPDX-License-Identifier: GPL-3.0-or-later + +import importlib.resources + +from ..datamodule import CachingDataModule +from ..shenzhen.datamodule import RawDataLoader +from ..split import JSONDatabaseSplit + + +class DataModule(CachingDataModule): + """Indian collection dataset for computer-aided diagnosis. + + The Indian collection database has been established to foster research + in computer-aided diagnosis of pulmonary diseases with a special + focus on pulmonary tuberculosis (TB). + + * Original resolution (height x width or width x height): more than 1024 x 1024 + * Split reference: [INDIAN-2013]_ with 20% of train set for the validation set + + Data specifications: + + * Raw data input (on disk): + + * PNG images (grayscale, encoded as RGB images with "inverted" grayscale scale) + * Variable width and height + + * Output image: + + * Transforms: + + * Load raw PNG with :py:mod:`PIL` + * Remove black borders + * Torch center cropping to get square image + + * Final specifications: + + * Grayscale, encoded as a single plane image, 8 bits + * Square, with varying resolutions, depending on the input image + """ + + def __init__(self, split_filename: str): + super().__init__( + database_split=JSONDatabaseSplit( + importlib.resources.files(__name__.rsplit(".", 1)[0]).joinpath( + split_filename + ) + ), + raw_data_loader=RawDataLoader(), + ) diff --git a/src/ptbench/data/indian/datamodules.py b/src/ptbench/data/indian/datamodules.py deleted file mode 100644 index fc8d52ff..00000000 --- a/src/ptbench/data/indian/datamodules.py +++ /dev/null @@ -1,46 +0,0 @@ -# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""Indian dataset for TB detection (default protocol) - -* Split reference: [INDIAN-2013]_ with 20% of train set for the validation set -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.indian` for dataset details -""" - -from clapper.logging import setup - -from .. import return_subsets -from ..base_datamodule import BaseDataModule -from . import _maker - -logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") - - -class DefaultModule(BaseDataModule): - def __init__( - self, - train_batch_size=1, - predict_batch_size=1, - drop_incomplete_batch=False, - multiproc_kwargs=None, - ): - super().__init__( - train_batch_size=train_batch_size, - predict_batch_size=predict_batch_size, - drop_incomplete_batch=drop_incomplete_batch, - multiproc_kwargs=multiproc_kwargs, - ) - - def setup(self, stage: str): - self.dataset = _maker("default") - ( - self.train_dataset, - self.validation_dataset, - self.extra_validation_datasets, - self.predict_dataset, - ) = return_subsets(self.dataset) - - -datamodule = DefaultModule diff --git a/src/ptbench/data/indian/default.json b/src/ptbench/data/indian/default.json new file mode 100644 index 00000000..3bc3742a --- /dev/null +++ b/src/ptbench/data/indian/default.json @@ -0,0 +1,163 @@ +{ + "train": [ + ["DatasetA/Training/nx52.jpg", 0], + ["DatasetA/Training/nx19.jpg", 0], + ["DatasetA/Training/nx38.jpg", 0], + ["DatasetA/Training/nx39.jpg", 0], + ["DatasetA/Training/nx35.jpg", 0], + ["DatasetA/Training/nx21.jpg", 0], + ["DatasetA/Training/nx20.jpg", 0], + ["DatasetA/Training/nx11.jpg", 0], + ["DatasetA/Training/nx34.jpg", 0], + ["DatasetA/Training/nx13.jpg", 0], + ["DatasetA/Training/nx12.jpg", 0], + ["DatasetA/Training/nx22.jpg", 0], + ["DatasetA/Training/nx36.jpg", 0], + ["DatasetA/Training/nx16.jpg", 0], + ["DatasetA/Training/nx37.jpg", 0], + ["DatasetA/Training/nx17.jpg", 0], + ["DatasetA/Training/nx29.jpg", 0], + ["DatasetA/Training/nx27.jpg", 0], + ["DatasetA/Training/nx15.jpg", 0], + ["DatasetA/Training/nx33.jpg", 0], + ["DatasetA/Training/nx14.jpg", 0], + ["DatasetA/Training/nx32.jpg", 0], + ["DatasetA/Training/nx26.jpg", 0], + ["DatasetA/Training/nx28.jpg", 0], + ["DatasetA/Training/nx30.jpg", 0], + ["DatasetA/Training/nx31.jpg", 0], + ["DatasetA/Training/nx24.jpg", 0], + ["DatasetA/Training/nx18.jpg", 0], + ["DatasetA/Training/nx46.jpg", 0], + ["DatasetA/Training/nx7.jpg", 0], + ["DatasetA/Training/nx25.jpg", 0], + ["DatasetA/Training/nx47.jpg", 0], + ["DatasetA/Training/nx41.jpg", 0], + ["DatasetA/Training/nx1.jpg", 0], + ["DatasetA/Training/nx40.jpg", 0], + ["DatasetA/Training/nx6.jpg", 0], + ["DatasetA/Training/nx49.jpg", 0], + ["DatasetA/Training/nx8.jpg", 0], + ["DatasetA/Training/nx9.jpg", 0], + ["DatasetA/Training/nx48.jpg", 0], + ["DatasetA/Training/nx44.jpg", 0], + ["DatasetA/Training/px45.jpg", 1], + ["DatasetA/Training/px51.jpg", 1], + ["DatasetA/Training/px48.jpg", 1], + ["DatasetA/Training/px10.jpg", 1], + ["DatasetA/Training/px38.jpg", 1], + ["DatasetA/Training/px15.jpg", 1], + ["DatasetA/Training/px11.jpg", 1], + ["DatasetA/Training/px29.jpg", 1], + ["DatasetA/Training/px49.jpg", 1], + ["DatasetA/Training/px14.jpg", 1], + ["DatasetA/Training/px8.jpg", 1], + ["DatasetA/Training/px28.jpg", 1], + ["DatasetA/Training/px9.jpg", 1], + ["DatasetA/Training/px16.jpg", 1], + ["DatasetA/Training/px17.jpg", 1], + ["DatasetA/Training/px39.jpg", 1], + ["DatasetA/Training/px22.jpg", 1], + ["DatasetA/Training/px47.jpg", 1], + ["DatasetA/Training/px50.jpg", 1], + ["DatasetA/Training/px36.jpg", 1], + ["DatasetA/Training/px37.jpg", 1], + ["DatasetA/Training/px23.jpg", 1], + ["DatasetA/Training/px35.jpg", 1], + ["DatasetA/Training/px21.jpg", 1], + ["DatasetA/Training/px20.jpg", 1], + ["DatasetA/Training/px34.jpg", 1], + ["DatasetA/Training/px30.jpg", 1], + ["DatasetA/Training/px24.jpg", 1], + ["DatasetA/Training/px18.jpg", 1], + ["DatasetA/Training/px19.jpg", 1], + ["DatasetA/Training/px25.jpg", 1], + ["DatasetA/Training/px31.jpg", 1], + ["DatasetA/Training/px27.jpg", 1], + ["DatasetA/Training/px33.jpg", 1], + ["DatasetA/Training/px32.jpg", 1], + ["DatasetA/Training/px26.jpg", 1], + ["DatasetA/Training/px44.jpg", 1], + ["DatasetA/Training/px52.jpg", 1], + ["DatasetA/Training/px46.jpg", 1], + ["DatasetA/Training/px3.jpg", 1], + ["DatasetA/Training/px2.jpg", 1], + ["DatasetA/Training/px12.jpg", 1] + ], + "validation": [ + ["DatasetA/Training/nx43.jpg", 0], + ["DatasetA/Training/nx50.jpg", 0], + ["DatasetA/Training/nx4.jpg", 0], + ["DatasetA/Training/nx51.jpg", 0], + ["DatasetA/Training/nx45.jpg", 0], + ["DatasetA/Training/nx2.jpg", 0], + ["DatasetA/Training/nx3.jpg", 0], + ["DatasetA/Training/nx42.jpg", 0], + ["DatasetA/Training/nx10.jpg", 0], + ["DatasetA/Training/nx5.jpg", 0], + ["DatasetA/Training/px1.jpg", 1], + ["DatasetA/Training/px43.jpg", 1], + ["DatasetA/Training/px7.jpg", 1], + ["DatasetA/Training/px42.jpg", 1], + ["DatasetA/Training/px40.jpg", 1], + ["DatasetA/Training/px5.jpg", 1], + ["DatasetA/Training/px4.jpg", 1], + ["DatasetA/Training/px41.jpg", 1], + ["DatasetA/Training/px6.jpg", 1], + ["DatasetA/Training/px13.jpg", 1] + ], + "test": [ + ["DatasetA/Testing/nx26.jpg", 0], + ["DatasetA/Testing/nx19.jpg", 0], + ["DatasetA/Testing/nx25.jpg", 0], + ["DatasetA/Testing/nx14.jpg", 0], + ["DatasetA/Testing/nx15.jpg", 0], + ["DatasetA/Testing/nx23.jpg", 0], + ["DatasetA/Testing/nx22.jpg", 0], + ["DatasetA/Testing/nx17.jpg", 0], + ["DatasetA/Testing/nx20.jpg", 0], + ["DatasetA/Testing/nx21.jpg", 0], + ["DatasetA/Testing/nx16.jpg", 0], + ["DatasetA/Testing/nx24.jpg", 0], + ["DatasetA/Testing/nx7.jpg", 0], + ["DatasetA/Testing/nx12.jpg", 0], + ["DatasetA/Testing/nx13.jpg", 0], + ["DatasetA/Testing/nx3.jpg", 0], + ["DatasetA/Testing/nx2.jpg", 0], + ["DatasetA/Testing/nx8.jpg", 0], + ["DatasetA/Testing/nx6.jpg", 0], + ["DatasetA/Testing/nx9.jpg", 0], + ["DatasetA/Testing/nx1.jpg", 0], + ["DatasetA/Testing/nx5.jpg", 0], + ["DatasetA/Testing/nx4.jpg", 0], + ["DatasetA/Testing/nx10.jpg", 0], + ["DatasetA/Testing/nx11.jpg", 0], + ["DatasetA/Testing/nx18.jpg", 0], + ["DatasetA/Testing/px28.jpg", 1], + ["DatasetA/Testing/px29.jpg", 1], + ["DatasetA/Testing/px48.jpg", 1], + ["DatasetA/Testing/px45.jpg", 1], + ["DatasetA/Testing/px51.jpg", 1], + ["DatasetA/Testing/px49.jpg", 1], + ["DatasetA/Testing/px36.jpg", 1], + ["DatasetA/Testing/px44.jpg", 1], + ["DatasetA/Testing/px37.jpg", 1], + ["DatasetA/Testing/px35.jpg", 1], + ["DatasetA/Testing/px34.jpg", 1], + ["DatasetA/Testing/px30.jpg", 1], + ["DatasetA/Testing/px31.jpg", 1], + ["DatasetA/Testing/px27.jpg", 1], + ["DatasetA/Testing/px33.jpg", 1], + ["DatasetA/Testing/px50.jpg", 1], + ["DatasetA/Testing/px32.jpg", 1], + ["DatasetA/Testing/px40.jpg", 1], + ["DatasetA/Testing/px42.jpg", 1], + ["DatasetA/Testing/px43.jpg", 1], + ["DatasetA/Testing/px47.jpg", 1], + ["DatasetA/Testing/px39.jpg", 1], + ["DatasetA/Testing/px46.jpg", 1], + ["DatasetA/Testing/px52.jpg", 1], + ["DatasetA/Testing/px41.jpg", 1], + ["DatasetA/Testing/px38.jpg", 1] + ] +} diff --git a/src/ptbench/data/indian/default.json.bz2 b/src/ptbench/data/indian/default.json.bz2 deleted file mode 100644 index d3c3ba1f0393f1006262115f942ccbd4b087b7a5..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 428 zcmV;d0aN}$T4*^jL0KkKS>aqzsQ?i_Uw}kVPzC?+Bme{oCbiv4KmwdaiNu2`{u9Zn zf?x)K00m~sRDb|v000E1>Z(luXa<@FfHVP0P=ZfW(KKi@&>9UtRM{0Af7#<=JaR)a zGX-_J#hHS{Sfentr5e(W_;ON8=!%bx_N5-5r*$apxuqV;QS;Z9{d;FbHTQz%Y9MY% z#KjQEv|5Wh%b^gJRZyoU)TSDNAv{PHaH=M*MXE#_gDNSDt69VeXHW?VI<aJ_B<*I2 zp#)9NjfvsUPR}Y)-%2~B9C|r(*PgoRrrPbd1xCm6Y+Z&-GaQ<#qae88IfOCQRbFch zEG-JEq;3mr25PFA1W{tQTGV0!+#sw0S#Vo|h`Ng0hMMfxZ8g_zyKikM%2D*C9}CG; zcvvX(65o|5`_hj;-o9kFr5o91R@}=nzjttN000021sRc<6aiTQ000C401z2y%*xFh zvnn*-vViZDX7r;)M;?_I8d1ELuPH{9qr*~KXz1&V#$z%fA|fIp2#7>TL_|bHM1({{ WL_|nLNQj?z_`8xR!i0wk;)O_PV!0at diff --git a/src/ptbench/data/indian/default.py b/src/ptbench/data/indian/default.py new file mode 100644 index 00000000..7fe993a9 --- /dev/null +++ b/src/ptbench/data/indian/default.py @@ -0,0 +1,7 @@ +# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> +# +# SPDX-License-Identifier: GPL-3.0-or-later + +from .datamodule import DataModule + +datamodule = DataModule("default.json.bz2") diff --git a/src/ptbench/data/indian/fold_0.json b/src/ptbench/data/indian/fold_0.json new file mode 100644 index 00000000..2bceb427 --- /dev/null +++ b/src/ptbench/data/indian/fold_0.json @@ -0,0 +1,163 @@ +{ + "train": [ + ["DatasetA/Training/nx19.jpg", 0], + ["DatasetA/Training/nx35.jpg", 0], + ["DatasetA/Training/nx32.jpg", 0], + ["DatasetA/Testing/nx17.jpg", 0], + ["DatasetA/Testing/nx19.jpg", 0], + ["DatasetA/Training/nx45.jpg", 0], + ["DatasetA/Training/nx14.jpg", 0], + ["DatasetA/Training/px27.jpg", 1], + ["DatasetA/Training/px3.jpg", 1], + ["DatasetA/Testing/nx8.jpg", 0], + ["DatasetA/Training/nx16.jpg", 0], + ["DatasetA/Training/px35.jpg", 1], + ["DatasetA/Training/px49.jpg", 1], + ["DatasetA/Training/px39.jpg", 1], + ["DatasetA/Testing/nx21.jpg", 0], + ["DatasetA/Testing/nx13.jpg", 0], + ["DatasetA/Testing/nx23.jpg", 0], + ["DatasetA/Training/nx1.jpg", 0], + ["DatasetA/Training/nx38.jpg", 0], + ["DatasetA/Testing/px35.jpg", 1], + ["DatasetA/Training/px31.jpg", 1], + ["DatasetA/Training/px15.jpg", 1], + ["DatasetA/Training/px6.jpg", 1], + ["DatasetA/Training/px7.jpg", 1], + ["DatasetA/Training/nx40.jpg", 0], + ["DatasetA/Training/nx46.jpg", 0], + ["DatasetA/Training/nx10.jpg", 0], + ["DatasetA/Testing/nx16.jpg", 0], + ["DatasetA/Training/nx47.jpg", 0], + ["DatasetA/Training/px22.jpg", 1], + ["DatasetA/Testing/nx18.jpg", 0], + ["DatasetA/Testing/px40.jpg", 1], + ["DatasetA/Testing/px33.jpg", 1], + ["DatasetA/Testing/px38.jpg", 1], + ["DatasetA/Testing/nx22.jpg", 0], + ["DatasetA/Training/nx31.jpg", 0], + ["DatasetA/Testing/px36.jpg", 1], + ["DatasetA/Training/px38.jpg", 1], + ["DatasetA/Training/px29.jpg", 1], + ["DatasetA/Testing/px41.jpg", 1], + ["DatasetA/Testing/nx25.jpg", 0], + ["DatasetA/Testing/px44.jpg", 1], + ["DatasetA/Training/px10.jpg", 1], + ["DatasetA/Testing/nx5.jpg", 0], + ["DatasetA/Training/px50.jpg", 1], + ["DatasetA/Training/nx48.jpg", 0], + ["DatasetA/Training/nx34.jpg", 0], + ["DatasetA/Testing/nx24.jpg", 0], + ["DatasetA/Testing/px31.jpg", 1], + ["DatasetA/Training/px33.jpg", 1], + ["DatasetA/Training/px34.jpg", 1], + ["DatasetA/Testing/px52.jpg", 1], + ["DatasetA/Training/px52.jpg", 1], + ["DatasetA/Testing/px42.jpg", 1], + ["DatasetA/Training/nx41.jpg", 0], + ["DatasetA/Training/px41.jpg", 1], + ["DatasetA/Training/px14.jpg", 1], + ["DatasetA/Testing/px37.jpg", 1], + ["DatasetA/Testing/px48.jpg", 1], + ["DatasetA/Training/px8.jpg", 1], + ["DatasetA/Training/nx37.jpg", 0], + ["DatasetA/Training/nx51.jpg", 0], + ["DatasetA/Training/px1.jpg", 1], + ["DatasetA/Training/nx8.jpg", 0], + ["DatasetA/Training/nx7.jpg", 0], + ["DatasetA/Training/nx52.jpg", 0], + ["DatasetA/Training/nx29.jpg", 0], + ["DatasetA/Training/nx2.jpg", 0], + ["DatasetA/Training/nx25.jpg", 0], + ["DatasetA/Training/px13.jpg", 1], + ["DatasetA/Training/nx22.jpg", 0], + ["DatasetA/Training/nx27.jpg", 0], + ["DatasetA/Training/nx9.jpg", 0], + ["DatasetA/Testing/nx11.jpg", 0], + ["DatasetA/Testing/px30.jpg", 1], + ["DatasetA/Testing/nx1.jpg", 0], + ["DatasetA/Testing/px45.jpg", 1], + ["DatasetA/Training/px28.jpg", 1], + ["DatasetA/Training/px42.jpg", 1], + ["DatasetA/Training/px21.jpg", 1], + ["DatasetA/Training/nx12.jpg", 0], + ["DatasetA/Training/nx11.jpg", 0], + ["DatasetA/Training/px12.jpg", 1], + ["DatasetA/Training/nx13.jpg", 0], + ["DatasetA/Training/px43.jpg", 1], + ["DatasetA/Testing/px27.jpg", 1], + ["DatasetA/Training/px32.jpg", 1], + ["DatasetA/Testing/px28.jpg", 1], + ["DatasetA/Testing/nx9.jpg", 0], + ["DatasetA/Training/px19.jpg", 1], + ["DatasetA/Testing/nx4.jpg", 0], + ["DatasetA/Training/nx17.jpg", 0], + ["DatasetA/Testing/px51.jpg", 1], + ["DatasetA/Testing/px29.jpg", 1], + ["DatasetA/Training/px18.jpg", 1], + ["DatasetA/Testing/nx12.jpg", 0], + ["DatasetA/Testing/px34.jpg", 1], + ["DatasetA/Training/nx28.jpg", 0], + ["DatasetA/Testing/nx7.jpg", 0], + ["DatasetA/Training/px46.jpg", 1], + ["DatasetA/Training/nx44.jpg", 0], + ["DatasetA/Training/nx4.jpg", 0], + ["DatasetA/Training/px4.jpg", 1], + ["DatasetA/Training/nx42.jpg", 0], + ["DatasetA/Training/px17.jpg", 1], + ["DatasetA/Training/nx26.jpg", 0], + ["DatasetA/Training/px26.jpg", 1], + ["DatasetA/Training/px36.jpg", 1], + ["DatasetA/Training/px25.jpg", 1], + ["DatasetA/Training/px45.jpg", 1], + ["DatasetA/Testing/nx3.jpg", 0] + ], + "validation": [ + ["DatasetA/Training/px16.jpg", 1], + ["DatasetA/Training/nx20.jpg", 0], + ["DatasetA/Testing/px47.jpg", 1], + ["DatasetA/Training/px20.jpg", 1], + ["DatasetA/Training/px44.jpg", 1], + ["DatasetA/Testing/px39.jpg", 1], + ["DatasetA/Training/nx6.jpg", 0], + ["DatasetA/Training/nx36.jpg", 0], + ["DatasetA/Training/nx18.jpg", 0], + ["DatasetA/Training/px2.jpg", 1], + ["DatasetA/Training/nx50.jpg", 0], + ["DatasetA/Training/nx15.jpg", 0], + ["DatasetA/Training/px30.jpg", 1], + ["DatasetA/Testing/nx14.jpg", 0], + ["DatasetA/Testing/nx26.jpg", 0], + ["DatasetA/Testing/px49.jpg", 1], + ["DatasetA/Training/px48.jpg", 1], + ["DatasetA/Training/px51.jpg", 1], + ["DatasetA/Training/px23.jpg", 1], + ["DatasetA/Testing/px32.jpg", 1], + ["DatasetA/Training/nx49.jpg", 0], + ["DatasetA/Training/px5.jpg", 1], + ["DatasetA/Testing/nx15.jpg", 0], + ["DatasetA/Testing/nx2.jpg", 0], + ["DatasetA/Testing/nx6.jpg", 0], + ["DatasetA/Training/nx24.jpg", 0], + ["DatasetA/Testing/nx20.jpg", 0], + ["DatasetA/Training/px24.jpg", 1] + ], + "test": [ + ["DatasetA/Training/nx39.jpg", 0], + ["DatasetA/Training/nx21.jpg", 0], + ["DatasetA/Training/nx33.jpg", 0], + ["DatasetA/Training/nx30.jpg", 0], + ["DatasetA/Training/px11.jpg", 1], + ["DatasetA/Training/px9.jpg", 1], + ["DatasetA/Training/px47.jpg", 1], + ["DatasetA/Training/px37.jpg", 1], + ["DatasetA/Training/nx43.jpg", 0], + ["DatasetA/Training/nx3.jpg", 0], + ["DatasetA/Training/nx5.jpg", 0], + ["DatasetA/Training/px40.jpg", 1], + ["DatasetA/Testing/nx10.jpg", 0], + ["DatasetA/Testing/px50.jpg", 1], + ["DatasetA/Testing/px43.jpg", 1], + ["DatasetA/Testing/px46.jpg", 1] + ] +} diff --git a/src/ptbench/data/indian/fold_0.json.bz2 b/src/ptbench/data/indian/fold_0.json.bz2 deleted file mode 100644 index 855c3050b6d37dd55866301aadc01b35d950caee..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 439 zcmV;o0Z9HrT4*^jL0KkKS*eiog#ZdR-+%xRPzC?+Bme{oCbiv4KmxrOXDJX>Pw>g5 zK^U7&L6ZS6$(p2)00uw+2~X8k27qV*k%VLd6(T{k0gyCk&>p6`>`_B=?4pM++k%Q4 zI&G>bdWfN(&6{{-mQ9Jyq9PbQX30enNM0EGRvNB}iiIliO+l5SO1gJ-WlUBRMTx0^ zLlW|xh?zi(04EUQrwJCrnS*FMO^9kYQHF%#@p0Ylrqb+fHj%qGZOUZA2<d6Gu-Em? z8z>h>XidG7Z7V9xwVP(Hi>OAMG-xqUgcEF*Qp|#|iMC2gF$M!lg+Yie6jp?!lv$7r zik3lHb285?v&EKoYrRDYMGgumZWVXHlp{c<j`KrxL{kw{5=2axgA7Xo45DPjAdEsl zm{Cj-iI{SJRhDbDMGh(`a{9}VBdUrTsG-G058gUCWB>w5Bts&|tT2)*B!Uu22#_kO zBC4pM!l2{vMGSOn2mp8@g{q1e6%K6`2bo0;Q68y93@D-JkRyy59dShzQAHFH5fKDL hA|xUrA|fI}A|fIpBqAh4PwzkRcO+AV2@07HI8ao>t9AeY diff --git a/src/ptbench/data/indian/fold_0.py b/src/ptbench/data/indian/fold_0.py index ffca6347..c810e85c 100644 --- a/src/ptbench/data/indian/fold_0.py +++ b/src/ptbench/data/indian/fold_0.py @@ -2,45 +2,6 @@ # # SPDX-License-Identifier: GPL-3.0-or-later -"""Indian dataset for TB detection (cross validation fold 0) +from .datamodule import DataModule -* Split reference: [INDIAN-2013]_ with 20% of train set for the validation set -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.indian` for dataset details -""" - -from clapper.logging import setup - -from .. import return_subsets -from ..base_datamodule import BaseDataModule -from . import _maker - -logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") - - -class Fold0Module(BaseDataModule): - def __init__( - self, - train_batch_size=1, - predict_batch_size=1, - drop_incomplete_batch=False, - multiproc_kwargs=None, - ): - super().__init__( - train_batch_size=train_batch_size, - predict_batch_size=predict_batch_size, - drop_incomplete_batch=drop_incomplete_batch, - multiproc_kwargs=multiproc_kwargs, - ) - - def setup(self, stage: str): - self.dataset = _maker("fold_0") - ( - self.train_dataset, - self.validation_dataset, - self.extra_validation_datasets, - self.predict_dataset, - ) = return_subsets(self.dataset) - - -datamodule = Fold0Module +datamodule = DataModule("fold_0.json.bz2") diff --git a/src/ptbench/data/indian/fold_1.json b/src/ptbench/data/indian/fold_1.json new file mode 100644 index 00000000..8dd669d9 --- /dev/null +++ b/src/ptbench/data/indian/fold_1.json @@ -0,0 +1,163 @@ +{ + "train": [ + ["DatasetA/Training/nx37.jpg", 0], + ["DatasetA/Training/nx41.jpg", 0], + ["DatasetA/Training/px10.jpg", 1], + ["DatasetA/Testing/nx15.jpg", 0], + ["DatasetA/Training/px34.jpg", 1], + ["DatasetA/Training/nx34.jpg", 0], + ["DatasetA/Training/nx36.jpg", 0], + ["DatasetA/Training/nx26.jpg", 0], + ["DatasetA/Testing/nx12.jpg", 0], + ["DatasetA/Testing/nx20.jpg", 0], + ["DatasetA/Training/nx46.jpg", 0], + ["DatasetA/Training/px19.jpg", 1], + ["DatasetA/Testing/nx1.jpg", 0], + ["DatasetA/Training/nx29.jpg", 0], + ["DatasetA/Training/px45.jpg", 1], + ["DatasetA/Training/nx30.jpg", 0], + ["DatasetA/Training/px11.jpg", 1], + ["DatasetA/Training/px51.jpg", 1], + ["DatasetA/Training/px27.jpg", 1], + ["DatasetA/Testing/px48.jpg", 1], + ["DatasetA/Testing/nx5.jpg", 0], + ["DatasetA/Testing/px43.jpg", 1], + ["DatasetA/Training/nx43.jpg", 0], + ["DatasetA/Training/px17.jpg", 1], + ["DatasetA/Training/px31.jpg", 1], + ["DatasetA/Training/nx8.jpg", 0], + ["DatasetA/Training/px2.jpg", 1], + ["DatasetA/Testing/nx8.jpg", 0], + ["DatasetA/Training/nx47.jpg", 0], + ["DatasetA/Training/nx33.jpg", 0], + ["DatasetA/Training/nx18.jpg", 0], + ["DatasetA/Training/px39.jpg", 1], + ["DatasetA/Training/nx51.jpg", 0], + ["DatasetA/Testing/nx9.jpg", 0], + ["DatasetA/Training/nx15.jpg", 0], + ["DatasetA/Testing/px51.jpg", 1], + ["DatasetA/Training/nx17.jpg", 0], + ["DatasetA/Training/px6.jpg", 1], + ["DatasetA/Training/nx5.jpg", 0], + ["DatasetA/Training/px24.jpg", 1], + ["DatasetA/Training/px7.jpg", 1], + ["DatasetA/Training/px47.jpg", 1], + ["DatasetA/Training/nx4.jpg", 0], + ["DatasetA/Training/px32.jpg", 1], + ["DatasetA/Training/px36.jpg", 1], + ["DatasetA/Training/nx13.jpg", 0], + ["DatasetA/Training/nx39.jpg", 0], + ["DatasetA/Training/px8.jpg", 1], + ["DatasetA/Training/nx42.jpg", 0], + ["DatasetA/Testing/px46.jpg", 1], + ["DatasetA/Training/px18.jpg", 1], + ["DatasetA/Testing/px34.jpg", 1], + ["DatasetA/Testing/nx22.jpg", 0], + ["DatasetA/Training/px48.jpg", 1], + ["DatasetA/Training/nx25.jpg", 0], + ["DatasetA/Testing/px28.jpg", 1], + ["DatasetA/Training/px9.jpg", 1], + ["DatasetA/Training/nx45.jpg", 0], + ["DatasetA/Testing/nx16.jpg", 0], + ["DatasetA/Training/nx31.jpg", 0], + ["DatasetA/Testing/nx10.jpg", 0], + ["DatasetA/Testing/px29.jpg", 1], + ["DatasetA/Training/nx3.jpg", 0], + ["DatasetA/Testing/nx26.jpg", 0], + ["DatasetA/Testing/px30.jpg", 1], + ["DatasetA/Training/px25.jpg", 1], + ["DatasetA/Testing/px27.jpg", 1], + ["DatasetA/Training/px44.jpg", 1], + ["DatasetA/Training/px49.jpg", 1], + ["DatasetA/Testing/px37.jpg", 1], + ["DatasetA/Training/px38.jpg", 1], + ["DatasetA/Training/nx9.jpg", 0], + ["DatasetA/Training/px42.jpg", 1], + ["DatasetA/Testing/nx23.jpg", 0], + ["DatasetA/Training/nx32.jpg", 0], + ["DatasetA/Testing/px39.jpg", 1], + ["DatasetA/Training/nx12.jpg", 0], + ["DatasetA/Training/nx44.jpg", 0], + ["DatasetA/Training/px35.jpg", 1], + ["DatasetA/Testing/nx17.jpg", 0], + ["DatasetA/Testing/nx24.jpg", 0], + ["DatasetA/Training/px40.jpg", 1], + ["DatasetA/Training/nx11.jpg", 0], + ["DatasetA/Testing/nx25.jpg", 0], + ["DatasetA/Training/px12.jpg", 1], + ["DatasetA/Training/px41.jpg", 1], + ["DatasetA/Training/nx27.jpg", 0], + ["DatasetA/Training/px14.jpg", 1], + ["DatasetA/Training/nx21.jpg", 0], + ["DatasetA/Testing/px42.jpg", 1], + ["DatasetA/Testing/nx2.jpg", 0], + ["DatasetA/Testing/px50.jpg", 1], + ["DatasetA/Training/px4.jpg", 1], + ["DatasetA/Testing/nx19.jpg", 0], + ["DatasetA/Training/px22.jpg", 1], + ["DatasetA/Training/nx7.jpg", 0], + ["DatasetA/Training/px33.jpg", 1], + ["DatasetA/Training/nx14.jpg", 0], + ["DatasetA/Training/px37.jpg", 1], + ["DatasetA/Testing/nx7.jpg", 0], + ["DatasetA/Training/px23.jpg", 1], + ["DatasetA/Testing/px33.jpg", 1], + ["DatasetA/Testing/nx13.jpg", 0], + ["DatasetA/Training/px20.jpg", 1], + ["DatasetA/Training/nx35.jpg", 0], + ["DatasetA/Testing/px47.jpg", 1], + ["DatasetA/Testing/px52.jpg", 1], + ["DatasetA/Testing/px35.jpg", 1], + ["DatasetA/Training/px21.jpg", 1], + ["DatasetA/Testing/px45.jpg", 1], + ["DatasetA/Training/nx24.jpg", 0] + ], + "validation": [ + ["DatasetA/Training/px26.jpg", 1], + ["DatasetA/Training/px13.jpg", 1], + ["DatasetA/Training/px30.jpg", 1], + ["DatasetA/Training/px29.jpg", 1], + ["DatasetA/Training/px1.jpg", 1], + ["DatasetA/Testing/nx14.jpg", 0], + ["DatasetA/Testing/px41.jpg", 1], + ["DatasetA/Training/nx20.jpg", 0], + ["DatasetA/Training/nx16.jpg", 0], + ["DatasetA/Training/nx2.jpg", 0], + ["DatasetA/Testing/px40.jpg", 1], + ["DatasetA/Training/nx49.jpg", 0], + ["DatasetA/Training/px16.jpg", 1], + ["DatasetA/Training/nx22.jpg", 0], + ["DatasetA/Training/nx19.jpg", 0], + ["DatasetA/Testing/px49.jpg", 1], + ["DatasetA/Testing/nx21.jpg", 0], + ["DatasetA/Training/nx6.jpg", 0], + ["DatasetA/Training/px3.jpg", 1], + ["DatasetA/Testing/px44.jpg", 1], + ["DatasetA/Testing/nx11.jpg", 0], + ["DatasetA/Testing/nx4.jpg", 0], + ["DatasetA/Testing/nx6.jpg", 0], + ["DatasetA/Training/px15.jpg", 1], + ["DatasetA/Testing/px36.jpg", 1], + ["DatasetA/Training/px28.jpg", 1], + ["DatasetA/Training/nx10.jpg", 0], + ["DatasetA/Training/nx48.jpg", 0] + ], + "test": [ + ["DatasetA/Training/nx52.jpg", 0], + ["DatasetA/Training/nx38.jpg", 0], + ["DatasetA/Training/nx28.jpg", 0], + ["DatasetA/Training/nx1.jpg", 0], + ["DatasetA/Training/nx40.jpg", 0], + ["DatasetA/Training/px50.jpg", 1], + ["DatasetA/Training/px52.jpg", 1], + ["DatasetA/Training/px46.jpg", 1], + ["DatasetA/Training/nx50.jpg", 0], + ["DatasetA/Training/px43.jpg", 1], + ["DatasetA/Training/px5.jpg", 1], + ["DatasetA/Testing/nx3.jpg", 0], + ["DatasetA/Testing/nx18.jpg", 0], + ["DatasetA/Testing/px31.jpg", 1], + ["DatasetA/Testing/px32.jpg", 1], + ["DatasetA/Testing/px38.jpg", 1] + ] +} diff --git a/src/ptbench/data/indian/fold_1.json.bz2 b/src/ptbench/data/indian/fold_1.json.bz2 deleted file mode 100644 index 536e8855ecf28eddd09b1ab15acdc706c6dfe0a2..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 431 zcmV;g0Z{%zT4*^jL0KkKSrk&#Isghb-+%xRPzC?+Bme{oCbiv4KmxrME-0WyPt6(? z01W^D1RE+!13(5q00})+C?T|s4H*pvO)?3pkV!VCjGlxtV?@X5na<=_C-{oz@s9#4 zew=Y4yku65yLN2C0^qTP?xZAz-pvAPDI!_!$cGau=(iPg(!t^^212nbY(zsXS*N1R zCV-lUd1r!#jgpowT!@-9UXiZIbh81bsY5cYzYX>2zob_pD}S|)jy)RVY;A3IqO{)E zeA%?nv`uQR?`pfcsG7~T)M<3)hVJZLU@>sJyMzXyxGi1WkU|g|TdHX=a3JEyG=o4S z3L3@<h|M<JU9C0SO*-AARtT>GE0U$#ilW*zS1sFe_>2*l5T?x}VWeh+nK2=hQxsz; zlmQ5c6OZ*to+MY170=sVbeSTuMR^lfr=L$TB#?vv$T75S6alRX01yd~0ziP*8*1En z?QL9peO9Wj<W@38X$s-@6yQZ=CsrETxZ2vg`Kw!wE?2z~5fKp)5fDT|B0?e}A|fOr ZA|fIpLLx*&_3i(Qxgwk>ND3)x9RRG&wO9ZE diff --git a/src/ptbench/data/indian/fold_1.py b/src/ptbench/data/indian/fold_1.py index 6075fb0e..736a778d 100644 --- a/src/ptbench/data/indian/fold_1.py +++ b/src/ptbench/data/indian/fold_1.py @@ -2,45 +2,6 @@ # # SPDX-License-Identifier: GPL-3.0-or-later -"""Indian dataset for TB detection (cross validation fold 1) +from .datamodule import DataModule -* Split reference: [INDIAN-2013]_ with 20% of train set for the validation set -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.indian` for dataset details -""" - -from clapper.logging import setup - -from .. import return_subsets -from ..base_datamodule import BaseDataModule -from . import _maker - -logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") - - -class Fold0Module(BaseDataModule): - def __init__( - self, - train_batch_size=1, - predict_batch_size=1, - drop_incomplete_batch=False, - multiproc_kwargs=None, - ): - super().__init__( - train_batch_size=train_batch_size, - predict_batch_size=predict_batch_size, - drop_incomplete_batch=drop_incomplete_batch, - multiproc_kwargs=multiproc_kwargs, - ) - - def setup(self, stage: str): - self.dataset = _maker("fold_1") - ( - self.train_dataset, - self.validation_dataset, - self.extra_validation_datasets, - self.predict_dataset, - ) = return_subsets(self.dataset) - - -datamodule = Fold0Module +datamodule = DataModule("fold_1.json.bz2") diff --git a/src/ptbench/data/indian/fold_2.json b/src/ptbench/data/indian/fold_2.json new file mode 100644 index 00000000..412d64fc --- /dev/null +++ b/src/ptbench/data/indian/fold_2.json @@ -0,0 +1,163 @@ +{ + "train": [ + ["DatasetA/Training/nx7.jpg", 0], + ["DatasetA/Testing/nx26.jpg", 0], + ["DatasetA/Testing/nx16.jpg", 0], + ["DatasetA/Training/nx43.jpg", 0], + ["DatasetA/Training/px9.jpg", 1], + ["DatasetA/Testing/px27.jpg", 1], + ["DatasetA/Testing/px38.jpg", 1], + ["DatasetA/Testing/nx5.jpg", 0], + ["DatasetA/Testing/nx18.jpg", 0], + ["DatasetA/Training/px4.jpg", 1], + ["DatasetA/Testing/px28.jpg", 1], + ["DatasetA/Training/px36.jpg", 1], + ["DatasetA/Training/px8.jpg", 1], + ["DatasetA/Training/px30.jpg", 1], + ["DatasetA/Training/px17.jpg", 1], + ["DatasetA/Training/nx18.jpg", 0], + ["DatasetA/Training/px27.jpg", 1], + ["DatasetA/Training/px24.jpg", 1], + ["DatasetA/Training/nx6.jpg", 0], + ["DatasetA/Training/px40.jpg", 1], + ["DatasetA/Testing/nx2.jpg", 0], + ["DatasetA/Training/nx21.jpg", 0], + ["DatasetA/Training/nx12.jpg", 0], + ["DatasetA/Testing/nx24.jpg", 0], + ["DatasetA/Training/px28.jpg", 1], + ["DatasetA/Training/nx33.jpg", 0], + ["DatasetA/Training/nx5.jpg", 0], + ["DatasetA/Testing/px37.jpg", 1], + ["DatasetA/Testing/nx25.jpg", 0], + ["DatasetA/Testing/nx12.jpg", 0], + ["DatasetA/Training/px47.jpg", 1], + ["DatasetA/Training/px11.jpg", 1], + ["DatasetA/Testing/px36.jpg", 1], + ["DatasetA/Testing/nx13.jpg", 0], + ["DatasetA/Training/nx46.jpg", 0], + ["DatasetA/Training/px37.jpg", 1], + ["DatasetA/Training/px22.jpg", 1], + ["DatasetA/Training/nx15.jpg", 0], + ["DatasetA/Testing/px44.jpg", 1], + ["DatasetA/Testing/px46.jpg", 1], + ["DatasetA/Training/nx16.jpg", 0], + ["DatasetA/Training/px38.jpg", 1], + ["DatasetA/Testing/nx20.jpg", 0], + ["DatasetA/Training/nx29.jpg", 0], + ["DatasetA/Testing/px30.jpg", 1], + ["DatasetA/Training/nx45.jpg", 0], + ["DatasetA/Training/px16.jpg", 1], + ["DatasetA/Testing/nx9.jpg", 0], + ["DatasetA/Training/px15.jpg", 1], + ["DatasetA/Testing/nx4.jpg", 0], + ["DatasetA/Training/px45.jpg", 1], + ["DatasetA/Testing/px47.jpg", 1], + ["DatasetA/Testing/px49.jpg", 1], + ["DatasetA/Testing/nx1.jpg", 0], + ["DatasetA/Training/px21.jpg", 1], + ["DatasetA/Training/px7.jpg", 1], + ["DatasetA/Training/nx9.jpg", 0], + ["DatasetA/Testing/px31.jpg", 1], + ["DatasetA/Testing/nx10.jpg", 0], + ["DatasetA/Training/px48.jpg", 1], + ["DatasetA/Training/px3.jpg", 1], + ["DatasetA/Training/nx3.jpg", 0], + ["DatasetA/Testing/px48.jpg", 1], + ["DatasetA/Training/px51.jpg", 1], + ["DatasetA/Testing/nx6.jpg", 0], + ["DatasetA/Training/px34.jpg", 1], + ["DatasetA/Training/nx42.jpg", 0], + ["DatasetA/Testing/px41.jpg", 1], + ["DatasetA/Testing/px32.jpg", 1], + ["DatasetA/Training/nx52.jpg", 0], + ["DatasetA/Testing/nx15.jpg", 0], + ["DatasetA/Training/nx17.jpg", 0], + ["DatasetA/Testing/px35.jpg", 1], + ["DatasetA/Training/px39.jpg", 1], + ["DatasetA/Testing/px33.jpg", 1], + ["DatasetA/Training/nx41.jpg", 0], + ["DatasetA/Training/px49.jpg", 1], + ["DatasetA/Training/nx51.jpg", 0], + ["DatasetA/Testing/nx22.jpg", 0], + ["DatasetA/Training/nx48.jpg", 0], + ["DatasetA/Training/px12.jpg", 1], + ["DatasetA/Training/nx38.jpg", 0], + ["DatasetA/Testing/nx23.jpg", 0], + ["DatasetA/Training/nx49.jpg", 0], + ["DatasetA/Testing/px50.jpg", 1], + ["DatasetA/Training/nx36.jpg", 0], + ["DatasetA/Testing/px45.jpg", 1], + ["DatasetA/Testing/nx14.jpg", 0], + ["DatasetA/Training/nx19.jpg", 0], + ["DatasetA/Training/px43.jpg", 1], + ["DatasetA/Training/nx10.jpg", 0], + ["DatasetA/Training/nx26.jpg", 0], + ["DatasetA/Testing/nx3.jpg", 0], + ["DatasetA/Training/px10.jpg", 1], + ["DatasetA/Training/px50.jpg", 1], + ["DatasetA/Training/nx2.jpg", 0], + ["DatasetA/Testing/px52.jpg", 1], + ["DatasetA/Training/px29.jpg", 1], + ["DatasetA/Training/px18.jpg", 1], + ["DatasetA/Training/nx35.jpg", 0], + ["DatasetA/Training/nx24.jpg", 0], + ["DatasetA/Training/px26.jpg", 1], + ["DatasetA/Training/nx50.jpg", 0], + ["DatasetA/Training/px5.jpg", 1], + ["DatasetA/Training/px13.jpg", 1], + ["DatasetA/Testing/nx19.jpg", 0], + ["DatasetA/Training/nx39.jpg", 0], + ["DatasetA/Training/px1.jpg", 1], + ["DatasetA/Training/px52.jpg", 1], + ["DatasetA/Training/nx20.jpg", 0], + ["DatasetA/Training/nx1.jpg", 0] + ], + "validation": [ + ["DatasetA/Testing/px40.jpg", 1], + ["DatasetA/Training/px2.jpg", 1], + ["DatasetA/Testing/px29.jpg", 1], + ["DatasetA/Training/nx8.jpg", 0], + ["DatasetA/Training/px23.jpg", 1], + ["DatasetA/Testing/nx11.jpg", 0], + ["DatasetA/Training/px19.jpg", 1], + ["DatasetA/Training/nx14.jpg", 0], + ["DatasetA/Training/nx40.jpg", 0], + ["DatasetA/Training/nx11.jpg", 0], + ["DatasetA/Testing/px39.jpg", 1], + ["DatasetA/Training/nx47.jpg", 0], + ["DatasetA/Training/nx30.jpg", 0], + ["DatasetA/Training/px6.jpg", 1], + ["DatasetA/Testing/nx7.jpg", 0], + ["DatasetA/Training/px25.jpg", 1], + ["DatasetA/Training/nx32.jpg", 0], + ["DatasetA/Training/px14.jpg", 1], + ["DatasetA/Training/nx27.jpg", 0], + ["DatasetA/Training/px46.jpg", 1], + ["DatasetA/Testing/px51.jpg", 1], + ["DatasetA/Testing/px34.jpg", 1], + ["DatasetA/Training/nx13.jpg", 0], + ["DatasetA/Training/px32.jpg", 1], + ["DatasetA/Training/nx4.jpg", 0], + ["DatasetA/Training/nx28.jpg", 0], + ["DatasetA/Testing/nx17.jpg", 0], + ["DatasetA/Testing/px43.jpg", 1] + ], + "test": [ + ["DatasetA/Training/nx34.jpg", 0], + ["DatasetA/Training/nx22.jpg", 0], + ["DatasetA/Training/nx37.jpg", 0], + ["DatasetA/Training/nx31.jpg", 0], + ["DatasetA/Training/nx25.jpg", 0], + ["DatasetA/Training/nx44.jpg", 0], + ["DatasetA/Training/px35.jpg", 1], + ["DatasetA/Training/px20.jpg", 1], + ["DatasetA/Training/px31.jpg", 1], + ["DatasetA/Training/px33.jpg", 1], + ["DatasetA/Training/px44.jpg", 1], + ["DatasetA/Training/px42.jpg", 1], + ["DatasetA/Training/px41.jpg", 1], + ["DatasetA/Testing/nx21.jpg", 0], + ["DatasetA/Testing/nx8.jpg", 0], + ["DatasetA/Testing/px42.jpg", 1] + ] +} diff --git a/src/ptbench/data/indian/fold_2.json.bz2 b/src/ptbench/data/indian/fold_2.json.bz2 deleted file mode 100644 index 2d2f71f0dae9e86ba6d5990b9afd5803b643a440..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 438 zcmV;n0ZINsT4*^jL0KkKS>>ofMgR&n-+%xRPzC?+Bme{oCbiv4Kmxh~#S{jNMAD!D z$N&II>ZwVgh-s#SAi$UtRYDPv41uFZs5JEq8feo@CMHY-4KqzOkz3y6R($Z{MQhuJ z;w#!kcJ=pgU8V4+#4A;8-wj^@m&M6VP71f0%8^sFp2N&DtTGhoR&OL>xjNac)>2kA zy|p7xM)vPcwyO29R2?kIrf_AcC{_ovPg}aNw|)@<AP&fZ5CO|GyJQes>46XhB{YAo zQK^=lNQjFO5Cv7SLfTZAG$=$vs!6g_1c-=8mKqY=w8TUi3^c-$*q8u<(<Mnkw!=x8 zO_s!^EFjBh$;T~OW0pB(hFY%Y0xJS5i50w83?hwawzo#>x%X01n3PgrSpp>l(Gq}= zWsqb*r3i@&F$Wy#O*au-h^+qd&XYnbLMxWm)#>N&<beqxAtXaa(P*Mb+KC{Lkb(gT zB*0f2YTSA4ZCrZoDyzj6hJ;oyt(`)f2(1>k$+fj{wY7e^D_f2(i{^-kh=_=Yh$0~o gArTP~5fTv*5fKp~5h5ab_4^CCBAh5la?~KB0AXahwg3PC diff --git a/src/ptbench/data/indian/fold_2.py b/src/ptbench/data/indian/fold_2.py index a8514142..48df1bfe 100644 --- a/src/ptbench/data/indian/fold_2.py +++ b/src/ptbench/data/indian/fold_2.py @@ -2,45 +2,6 @@ # # SPDX-License-Identifier: GPL-3.0-or-later -"""Indian dataset for TB detection (cross validation fold 2) +from .datamodule import DataModule -* Split reference: [INDIAN-2013]_ with 20% of train set for the validation set -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.indian` for dataset details -""" - -from clapper.logging import setup - -from .. import return_subsets -from ..base_datamodule import BaseDataModule -from . import _maker - -logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") - - -class Fold0Module(BaseDataModule): - def __init__( - self, - train_batch_size=1, - predict_batch_size=1, - drop_incomplete_batch=False, - multiproc_kwargs=None, - ): - super().__init__( - train_batch_size=train_batch_size, - predict_batch_size=predict_batch_size, - drop_incomplete_batch=drop_incomplete_batch, - multiproc_kwargs=multiproc_kwargs, - ) - - def setup(self, stage: str): - self.dataset = _maker("fold_2") - ( - self.train_dataset, - self.validation_dataset, - self.extra_validation_datasets, - self.predict_dataset, - ) = return_subsets(self.dataset) - - -datamodule = Fold0Module +datamodule = DataModule("fold_2.json.bz2") diff --git a/src/ptbench/data/indian/fold_3.json b/src/ptbench/data/indian/fold_3.json new file mode 100644 index 00000000..5a587001 --- /dev/null +++ b/src/ptbench/data/indian/fold_3.json @@ -0,0 +1,163 @@ +{ + "train": [ + ["DatasetA/Training/nx21.jpg", 0], + ["DatasetA/Testing/px39.jpg", 1], + ["DatasetA/Testing/px45.jpg", 1], + ["DatasetA/Testing/px50.jpg", 1], + ["DatasetA/Training/nx30.jpg", 0], + ["DatasetA/Testing/px33.jpg", 1], + ["DatasetA/Testing/nx6.jpg", 0], + ["DatasetA/Training/nx11.jpg", 0], + ["DatasetA/Training/nx10.jpg", 0], + ["DatasetA/Training/nx22.jpg", 0], + ["DatasetA/Testing/nx2.jpg", 0], + ["DatasetA/Training/nx33.jpg", 0], + ["DatasetA/Training/nx40.jpg", 0], + ["DatasetA/Training/nx2.jpg", 0], + ["DatasetA/Training/nx18.jpg", 0], + ["DatasetA/Training/nx47.jpg", 0], + ["DatasetA/Training/nx39.jpg", 0], + ["DatasetA/Testing/px43.jpg", 1], + ["DatasetA/Training/px46.jpg", 1], + ["DatasetA/Training/nx44.jpg", 0], + ["DatasetA/Training/nx41.jpg", 0], + ["DatasetA/Training/px31.jpg", 1], + ["DatasetA/Training/px28.jpg", 1], + ["DatasetA/Testing/nx9.jpg", 0], + ["DatasetA/Training/nx29.jpg", 0], + ["DatasetA/Testing/px38.jpg", 1], + ["DatasetA/Testing/px32.jpg", 1], + ["DatasetA/Testing/px30.jpg", 1], + ["DatasetA/Training/nx15.jpg", 0], + ["DatasetA/Training/nx37.jpg", 0], + ["DatasetA/Training/px10.jpg", 1], + ["DatasetA/Training/nx20.jpg", 0], + ["DatasetA/Training/px37.jpg", 1], + ["DatasetA/Training/nx50.jpg", 0], + ["DatasetA/Testing/px42.jpg", 1], + ["DatasetA/Training/px11.jpg", 1], + ["DatasetA/Training/nx51.jpg", 0], + ["DatasetA/Training/px47.jpg", 1], + ["DatasetA/Training/nx31.jpg", 0], + ["DatasetA/Training/px50.jpg", 1], + ["DatasetA/Training/px9.jpg", 1], + ["DatasetA/Training/px24.jpg", 1], + ["DatasetA/Training/px8.jpg", 1], + ["DatasetA/Training/px18.jpg", 1], + ["DatasetA/Testing/nx22.jpg", 0], + ["DatasetA/Training/px33.jpg", 1], + ["DatasetA/Training/nx45.jpg", 0], + ["DatasetA/Training/px52.jpg", 1], + ["DatasetA/Testing/px34.jpg", 1], + ["DatasetA/Testing/nx19.jpg", 0], + ["DatasetA/Training/px7.jpg", 1], + ["DatasetA/Training/px38.jpg", 1], + ["DatasetA/Training/nx32.jpg", 0], + ["DatasetA/Training/nx34.jpg", 0], + ["DatasetA/Training/px19.jpg", 1], + ["DatasetA/Testing/px51.jpg", 1], + ["DatasetA/Training/nx46.jpg", 0], + ["DatasetA/Training/nx24.jpg", 0], + ["DatasetA/Testing/px27.jpg", 1], + ["DatasetA/Testing/px48.jpg", 1], + ["DatasetA/Testing/px44.jpg", 1], + ["DatasetA/Training/px13.jpg", 1], + ["DatasetA/Training/nx36.jpg", 0], + ["DatasetA/Testing/nx13.jpg", 0], + ["DatasetA/Testing/nx7.jpg", 0], + ["DatasetA/Training/nx19.jpg", 0], + ["DatasetA/Training/px36.jpg", 1], + ["DatasetA/Training/nx3.jpg", 0], + ["DatasetA/Testing/px28.jpg", 1], + ["DatasetA/Training/nx43.jpg", 0], + ["DatasetA/Training/px48.jpg", 1], + ["DatasetA/Testing/nx4.jpg", 0], + ["DatasetA/Testing/px46.jpg", 1], + ["DatasetA/Training/nx12.jpg", 0], + ["DatasetA/Testing/px47.jpg", 1], + ["DatasetA/Training/px5.jpg", 1], + ["DatasetA/Training/nx27.jpg", 0], + ["DatasetA/Testing/nx20.jpg", 0], + ["DatasetA/Training/nx49.jpg", 0], + ["DatasetA/Testing/px35.jpg", 1], + ["DatasetA/Training/px51.jpg", 1], + ["DatasetA/Training/px30.jpg", 1], + ["DatasetA/Training/nx25.jpg", 0], + ["DatasetA/Training/px32.jpg", 1], + ["DatasetA/Training/px23.jpg", 1], + ["DatasetA/Testing/px41.jpg", 1], + ["DatasetA/Training/px17.jpg", 1], + ["DatasetA/Training/px15.jpg", 1], + ["DatasetA/Testing/nx18.jpg", 0], + ["DatasetA/Training/nx5.jpg", 0], + ["DatasetA/Training/px35.jpg", 1], + ["DatasetA/Training/nx16.jpg", 0], + ["DatasetA/Training/px34.jpg", 1], + ["DatasetA/Testing/nx15.jpg", 0], + ["DatasetA/Testing/nx10.jpg", 0], + ["DatasetA/Testing/nx23.jpg", 0], + ["DatasetA/Training/px4.jpg", 1], + ["DatasetA/Testing/nx24.jpg", 0], + ["DatasetA/Training/nx28.jpg", 0], + ["DatasetA/Training/px40.jpg", 1], + ["DatasetA/Testing/nx21.jpg", 0], + ["DatasetA/Training/px41.jpg", 1], + ["DatasetA/Training/px42.jpg", 1], + ["DatasetA/Testing/nx25.jpg", 0], + ["DatasetA/Training/px45.jpg", 1], + ["DatasetA/Testing/nx12.jpg", 0], + ["DatasetA/Training/nx42.jpg", 0], + ["DatasetA/Training/nx26.jpg", 0], + ["DatasetA/Testing/px29.jpg", 1], + ["DatasetA/Training/px16.jpg", 1], + ["DatasetA/Training/px26.jpg", 1] + ], + "validation": [ + ["DatasetA/Testing/px31.jpg", 1], + ["DatasetA/Training/nx38.jpg", 0], + ["DatasetA/Training/px44.jpg", 1], + ["DatasetA/Testing/nx8.jpg", 0], + ["DatasetA/Training/px20.jpg", 1], + ["DatasetA/Training/nx7.jpg", 0], + ["DatasetA/Training/nx4.jpg", 0], + ["DatasetA/Training/px2.jpg", 1], + ["DatasetA/Testing/nx5.jpg", 0], + ["DatasetA/Testing/nx26.jpg", 0], + ["DatasetA/Training/nx48.jpg", 0], + ["DatasetA/Training/nx14.jpg", 0], + ["DatasetA/Training/nx52.jpg", 0], + ["DatasetA/Testing/px52.jpg", 1], + ["DatasetA/Training/px3.jpg", 1], + ["DatasetA/Training/px14.jpg", 1], + ["DatasetA/Training/px39.jpg", 1], + ["DatasetA/Training/px1.jpg", 1], + ["DatasetA/Training/nx13.jpg", 0], + ["DatasetA/Training/px21.jpg", 1], + ["DatasetA/Training/nx1.jpg", 0], + ["DatasetA/Testing/nx17.jpg", 0], + ["DatasetA/Testing/px40.jpg", 1], + ["DatasetA/Training/px22.jpg", 1], + ["DatasetA/Training/px43.jpg", 1], + ["DatasetA/Training/px27.jpg", 1], + ["DatasetA/Testing/nx11.jpg", 0], + ["DatasetA/Testing/nx3.jpg", 0] + ], + "test": [ + ["DatasetA/Training/nx35.jpg", 0], + ["DatasetA/Training/nx17.jpg", 0], + ["DatasetA/Training/nx6.jpg", 0], + ["DatasetA/Training/nx8.jpg", 0], + ["DatasetA/Training/nx9.jpg", 0], + ["DatasetA/Training/px29.jpg", 1], + ["DatasetA/Training/px49.jpg", 1], + ["DatasetA/Training/px25.jpg", 1], + ["DatasetA/Training/px12.jpg", 1], + ["DatasetA/Training/px6.jpg", 1], + ["DatasetA/Testing/nx14.jpg", 0], + ["DatasetA/Testing/nx16.jpg", 0], + ["DatasetA/Testing/nx1.jpg", 0], + ["DatasetA/Testing/px49.jpg", 1], + ["DatasetA/Testing/px36.jpg", 1], + ["DatasetA/Testing/px37.jpg", 1] + ] +} diff --git a/src/ptbench/data/indian/fold_3.json.bz2 b/src/ptbench/data/indian/fold_3.json.bz2 deleted file mode 100644 index 93d5428108b9ed474c3ca44f2c660dd1b9712560..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 442 zcmV;r0Y&~oT4*^jL0KkKS({;<bpQ%B-+%xRPzC?+Bme{oCbiv4Kmxh~#S{jNM9QSd zV?a$A01|qtQ8Y71(WW3`G66MIO%ad{0ib9!{ZPTCjWohyWWYevF^yza_puem<-m&9 zw=Kk1tcvN<dUtpDbhqJi%JLP&uPSEzwdM+3!Y;<s&S7@ZK;CZd7q`w$6(*LhX`I8R zc#@gUQ(1>@b;y)z`DT)HT+H5bxw(vQEzdBgb*}R(&LBhq-4G%G>gwED3sT!ZND%;k zu8E~qXsV=(5fNe{0K2DCZku$8bx<NESyb9(F(M)pGLv#81ki|sETt?dZIb{plO@Y@ zDT!)JVU*I85N$M!mb2BW($iLXX{MR>H<4KpTnMf3@5)qb8(O-zZSuK9l0=x91jLYp zi9`~RX-Y_iN^DJ&BuEIe<x*pD74AiG`_;yi1Xc*I&eqla_2Z%v06>s{QE1vIB-Vt0 zAtW+DBqR}BZL4wXwY71LrB!$$vcVOcD`!Ba!Ydlq<l5S}+S<H1t6PpPtE~|c5fKp) k5JW;ELLwp}A|xUrA|fI}B1A<0cK^lPkxmpOCfH|PK<pdEm;e9( diff --git a/src/ptbench/data/indian/fold_3.py b/src/ptbench/data/indian/fold_3.py index f0e4b15a..9967e4ea 100644 --- a/src/ptbench/data/indian/fold_3.py +++ b/src/ptbench/data/indian/fold_3.py @@ -2,45 +2,6 @@ # # SPDX-License-Identifier: GPL-3.0-or-later -"""Indian dataset for TB detection (cross validation fold 3) +from .datamodule import DataModule -* Split reference: [INDIAN-2013]_ with 20% of train set for the validation set -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.indian` for dataset details -""" - -from clapper.logging import setup - -from .. import return_subsets -from ..base_datamodule import BaseDataModule -from . import _maker - -logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") - - -class Fold0Module(BaseDataModule): - def __init__( - self, - train_batch_size=1, - predict_batch_size=1, - drop_incomplete_batch=False, - multiproc_kwargs=None, - ): - super().__init__( - train_batch_size=train_batch_size, - predict_batch_size=predict_batch_size, - drop_incomplete_batch=drop_incomplete_batch, - multiproc_kwargs=multiproc_kwargs, - ) - - def setup(self, stage: str): - self.dataset = _maker("fold_3") - ( - self.train_dataset, - self.validation_dataset, - self.extra_validation_datasets, - self.predict_dataset, - ) = return_subsets(self.dataset) - - -datamodule = Fold0Module +datamodule = DataModule("fold_3.json.bz2") diff --git a/src/ptbench/data/indian/fold_4.json b/src/ptbench/data/indian/fold_4.json new file mode 100644 index 00000000..0342a015 --- /dev/null +++ b/src/ptbench/data/indian/fold_4.json @@ -0,0 +1,163 @@ +{ + "train": [ + ["DatasetA/Training/nx6.jpg", 0], + ["DatasetA/Training/px30.jpg", 1], + ["DatasetA/Training/nx24.jpg", 0], + ["DatasetA/Training/nx13.jpg", 0], + ["DatasetA/Testing/px43.jpg", 1], + ["DatasetA/Training/px20.jpg", 1], + ["DatasetA/Training/nx4.jpg", 0], + ["DatasetA/Testing/nx13.jpg", 0], + ["DatasetA/Training/px18.jpg", 1], + ["DatasetA/Training/nx37.jpg", 0], + ["DatasetA/Training/nx22.jpg", 0], + ["DatasetA/Training/nx11.jpg", 0], + ["DatasetA/Testing/px47.jpg", 1], + ["DatasetA/Training/px32.jpg", 1], + ["DatasetA/Training/nx49.jpg", 0], + ["DatasetA/Training/px8.jpg", 1], + ["DatasetA/Training/nx39.jpg", 0], + ["DatasetA/Testing/px34.jpg", 1], + ["DatasetA/Testing/nx9.jpg", 0], + ["DatasetA/Testing/px28.jpg", 1], + ["DatasetA/Training/px37.jpg", 1], + ["DatasetA/Training/nx52.jpg", 0], + ["DatasetA/Training/px9.jpg", 1], + ["DatasetA/Testing/nx23.jpg", 0], + ["DatasetA/Training/px50.jpg", 1], + ["DatasetA/Training/px15.jpg", 1], + ["DatasetA/Testing/nx20.jpg", 0], + ["DatasetA/Training/nx14.jpg", 0], + ["DatasetA/Testing/nx6.jpg", 0], + ["DatasetA/Training/px26.jpg", 1], + ["DatasetA/Testing/nx8.jpg", 0], + ["DatasetA/Testing/nx16.jpg", 0], + ["DatasetA/Testing/px35.jpg", 1], + ["DatasetA/Testing/px50.jpg", 1], + ["DatasetA/Testing/px41.jpg", 1], + ["DatasetA/Testing/px45.jpg", 1], + ["DatasetA/Training/px33.jpg", 1], + ["DatasetA/Testing/px42.jpg", 1], + ["DatasetA/Training/nx29.jpg", 0], + ["DatasetA/Training/nx10.jpg", 0], + ["DatasetA/Testing/nx12.jpg", 0], + ["DatasetA/Training/px6.jpg", 1], + ["DatasetA/Training/px22.jpg", 1], + ["DatasetA/Training/px12.jpg", 1], + ["DatasetA/Training/px34.jpg", 1], + ["DatasetA/Training/px29.jpg", 1], + ["DatasetA/Testing/px36.jpg", 1], + ["DatasetA/Training/px14.jpg", 1], + ["DatasetA/Testing/nx19.jpg", 0], + ["DatasetA/Training/nx48.jpg", 0], + ["DatasetA/Training/px19.jpg", 1], + ["DatasetA/Testing/px52.jpg", 1], + ["DatasetA/Testing/nx7.jpg", 0], + ["DatasetA/Training/nx32.jpg", 0], + ["DatasetA/Training/px39.jpg", 1], + ["DatasetA/Testing/nx21.jpg", 0], + ["DatasetA/Training/px25.jpg", 1], + ["DatasetA/Testing/nx14.jpg", 0], + ["DatasetA/Training/nx46.jpg", 0], + ["DatasetA/Testing/px39.jpg", 1], + ["DatasetA/Training/px2.jpg", 1], + ["DatasetA/Training/nx17.jpg", 0], + ["DatasetA/Training/nx1.jpg", 0], + ["DatasetA/Training/px36.jpg", 1], + ["DatasetA/Training/px40.jpg", 1], + ["DatasetA/Training/px31.jpg", 1], + ["DatasetA/Testing/nx18.jpg", 0], + ["DatasetA/Training/nx50.jpg", 0], + ["DatasetA/Training/nx8.jpg", 0], + ["DatasetA/Testing/px38.jpg", 1], + ["DatasetA/Training/nx3.jpg", 0], + ["DatasetA/Training/nx51.jpg", 0], + ["DatasetA/Training/nx35.jpg", 0], + ["DatasetA/Testing/nx1.jpg", 0], + ["DatasetA/Testing/px37.jpg", 1], + ["DatasetA/Testing/nx15.jpg", 0], + ["DatasetA/Training/nx21.jpg", 0], + ["DatasetA/Training/nx45.jpg", 0], + ["DatasetA/Training/nx31.jpg", 0], + ["DatasetA/Testing/px44.jpg", 1], + ["DatasetA/Testing/px48.jpg", 1], + ["DatasetA/Testing/px40.jpg", 1], + ["DatasetA/Training/nx25.jpg", 0], + ["DatasetA/Training/nx40.jpg", 0], + ["DatasetA/Training/px1.jpg", 1], + ["DatasetA/Training/px5.jpg", 1], + ["DatasetA/Training/nx19.jpg", 0], + ["DatasetA/Training/nx47.jpg", 0], + ["DatasetA/Testing/nx25.jpg", 0], + ["DatasetA/Training/nx33.jpg", 0], + ["DatasetA/Training/px43.jpg", 1], + ["DatasetA/Testing/px30.jpg", 1], + ["DatasetA/Training/px46.jpg", 1], + ["DatasetA/Testing/nx17.jpg", 0], + ["DatasetA/Training/px38.jpg", 1], + ["DatasetA/Training/px45.jpg", 1], + ["DatasetA/Training/px24.jpg", 1], + ["DatasetA/Testing/px32.jpg", 1], + ["DatasetA/Training/nx38.jpg", 0], + ["DatasetA/Training/px28.jpg", 1], + ["DatasetA/Testing/nx11.jpg", 0], + ["DatasetA/Training/px47.jpg", 1], + ["DatasetA/Training/nx42.jpg", 0], + ["DatasetA/Testing/px46.jpg", 1], + ["DatasetA/Training/nx12.jpg", 0], + ["DatasetA/Training/nx28.jpg", 0], + ["DatasetA/Training/px17.jpg", 1], + ["DatasetA/Training/nx20.jpg", 0], + ["DatasetA/Training/px13.jpg", 1], + ["DatasetA/Training/px49.jpg", 1], + ["DatasetA/Testing/nx10.jpg", 0] + ], + "validation": [ + ["DatasetA/Testing/nx3.jpg", 0], + ["DatasetA/Testing/px31.jpg", 1], + ["DatasetA/Training/nx44.jpg", 0], + ["DatasetA/Training/px27.jpg", 1], + ["DatasetA/Training/px52.jpg", 1], + ["DatasetA/Training/nx16.jpg", 0], + ["DatasetA/Training/px11.jpg", 1], + ["DatasetA/Training/nx26.jpg", 0], + ["DatasetA/Training/px7.jpg", 1], + ["DatasetA/Training/nx5.jpg", 0], + ["DatasetA/Testing/px51.jpg", 1], + ["DatasetA/Training/nx34.jpg", 0], + ["DatasetA/Training/nx43.jpg", 0], + ["DatasetA/Training/px41.jpg", 1], + ["DatasetA/Training/nx27.jpg", 0], + ["DatasetA/Training/px44.jpg", 1], + ["DatasetA/Training/nx30.jpg", 0], + ["DatasetA/Training/nx9.jpg", 0], + ["DatasetA/Training/nx7.jpg", 0], + ["DatasetA/Training/px42.jpg", 1], + ["DatasetA/Testing/nx2.jpg", 0], + ["DatasetA/Testing/px27.jpg", 1], + ["DatasetA/Testing/nx26.jpg", 0], + ["DatasetA/Training/px35.jpg", 1], + ["DatasetA/Training/nx2.jpg", 0], + ["DatasetA/Training/px48.jpg", 1], + ["DatasetA/Training/px23.jpg", 1], + ["DatasetA/Testing/px49.jpg", 1] + ], + "test": [ + ["DatasetA/Training/nx36.jpg", 0], + ["DatasetA/Training/nx15.jpg", 0], + ["DatasetA/Training/nx18.jpg", 0], + ["DatasetA/Training/nx41.jpg", 0], + ["DatasetA/Training/px51.jpg", 1], + ["DatasetA/Training/px10.jpg", 1], + ["DatasetA/Training/px16.jpg", 1], + ["DatasetA/Training/px21.jpg", 1], + ["DatasetA/Training/px3.jpg", 1], + ["DatasetA/Training/px4.jpg", 1], + ["DatasetA/Testing/nx22.jpg", 0], + ["DatasetA/Testing/nx24.jpg", 0], + ["DatasetA/Testing/nx5.jpg", 0], + ["DatasetA/Testing/nx4.jpg", 0], + ["DatasetA/Testing/px29.jpg", 1], + ["DatasetA/Testing/px33.jpg", 1] + ] +} diff --git a/src/ptbench/data/indian/fold_4.json.bz2 b/src/ptbench/data/indian/fold_4.json.bz2 deleted file mode 100644 index aa45648f410cd3318c8efa892a0031bfe0575e55..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 437 zcmV;m0ZRTtT4*^jL0KkKS*9qjYXAy1-+%xRPzC?+Bme{oCbiv4Kmxh~#S{jNM9Qcb zo|DQ06G4QYs+3JMXc!|SAjtIu)k-u&Kr#l69-uu#2AVX}35k;d15DFRgjV;l6`$@n zky|=3xQg|WT|2sU>DHKCk5k*W7Or`mdAVZfndI*j;97PzTbp)>S+-_UM7%h8xqRHN zDYtKChHKh8e87g_!1qy7>9Psq8K`r2Ij?Up+UGTWVgx`RIDrraZ@Rtdw%Td;K!^kW zU07=BOwyXDh>H;r0W@yex{!+%OhiP?w#g;7Vj>zPCoMJ#5fL&NP789ufDuIo9LsE^ zQI@4P%Rz#fB{bTS!ph55R$B7ya3ZipaU!>s!xS-#QMR{M?YjN)MM%uTMr2Gek&=wU zkWxhhGRj3FN=YJzAKJCKZX&r6S^aCylR_&(E0Gob>(R{s2?7!zQCPH41!$8AAOr*? z2>@%2wQfCjwyrsQwyLh=Rx~2DLbmjZZ6dTu)q-l{Yij#*R<|5oSH%$#5fKp)5JW;E fLLwp}A|xUrA|fI}B1A;x+y57GML1B9rYNs#n>o3u diff --git a/src/ptbench/data/indian/fold_4.py b/src/ptbench/data/indian/fold_4.py index 4a1ca920..8630ee09 100644 --- a/src/ptbench/data/indian/fold_4.py +++ b/src/ptbench/data/indian/fold_4.py @@ -2,45 +2,6 @@ # # SPDX-License-Identifier: GPL-3.0-or-later -"""Indian dataset for TB detection (cross validation fold 4) +from .datamodule import DataModule -* Split reference: [INDIAN-2013]_ with 20% of train set for the validation set -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.indian` for dataset details -""" - -from clapper.logging import setup - -from .. import return_subsets -from ..base_datamodule import BaseDataModule -from . import _maker - -logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") - - -class Fold0Module(BaseDataModule): - def __init__( - self, - train_batch_size=1, - predict_batch_size=1, - drop_incomplete_batch=False, - multiproc_kwargs=None, - ): - super().__init__( - train_batch_size=train_batch_size, - predict_batch_size=predict_batch_size, - drop_incomplete_batch=drop_incomplete_batch, - multiproc_kwargs=multiproc_kwargs, - ) - - def setup(self, stage: str): - self.dataset = _maker("fold_4") - ( - self.train_dataset, - self.validation_dataset, - self.extra_validation_datasets, - self.predict_dataset, - ) = return_subsets(self.dataset) - - -datamodule = Fold0Module +datamodule = DataModule("fold_4.json.bz2") diff --git a/src/ptbench/data/indian/fold_5.json b/src/ptbench/data/indian/fold_5.json new file mode 100644 index 00000000..f8ffb386 --- /dev/null +++ b/src/ptbench/data/indian/fold_5.json @@ -0,0 +1,163 @@ +{ + "train": [ + ["DatasetA/Training/px38.jpg", 1], + ["DatasetA/Training/px45.jpg", 1], + ["DatasetA/Training/px36.jpg", 1], + ["DatasetA/Training/nx19.jpg", 0], + ["DatasetA/Training/nx22.jpg", 0], + ["DatasetA/Training/px39.jpg", 1], + ["DatasetA/Testing/nx18.jpg", 0], + ["DatasetA/Training/nx20.jpg", 0], + ["DatasetA/Training/px23.jpg", 1], + ["DatasetA/Testing/px41.jpg", 1], + ["DatasetA/Testing/nx5.jpg", 0], + ["DatasetA/Training/nx52.jpg", 0], + ["DatasetA/Testing/px38.jpg", 1], + ["DatasetA/Training/px52.jpg", 1], + ["DatasetA/Training/nx2.jpg", 0], + ["DatasetA/Training/nx16.jpg", 0], + ["DatasetA/Testing/px33.jpg", 1], + ["DatasetA/Training/nx4.jpg", 0], + ["DatasetA/Training/px11.jpg", 1], + ["DatasetA/Testing/nx16.jpg", 0], + ["DatasetA/Training/px35.jpg", 1], + ["DatasetA/Training/nx47.jpg", 0], + ["DatasetA/Testing/px35.jpg", 1], + ["DatasetA/Testing/px48.jpg", 1], + ["DatasetA/Training/px22.jpg", 1], + ["DatasetA/Testing/px43.jpg", 1], + ["DatasetA/Training/px9.jpg", 1], + ["DatasetA/Training/px33.jpg", 1], + ["DatasetA/Training/nx36.jpg", 0], + ["DatasetA/Training/px15.jpg", 1], + ["DatasetA/Training/nx21.jpg", 0], + ["DatasetA/Training/nx24.jpg", 0], + ["DatasetA/Training/px4.jpg", 1], + ["DatasetA/Training/nx34.jpg", 0], + ["DatasetA/Training/nx12.jpg", 0], + ["DatasetA/Training/px46.jpg", 1], + ["DatasetA/Testing/nx4.jpg", 0], + ["DatasetA/Training/nx30.jpg", 0], + ["DatasetA/Testing/px52.jpg", 1], + ["DatasetA/Testing/px47.jpg", 1], + ["DatasetA/Training/px37.jpg", 1], + ["DatasetA/Training/nx48.jpg", 0], + ["DatasetA/Training/px51.jpg", 1], + ["DatasetA/Testing/nx3.jpg", 0], + ["DatasetA/Testing/nx2.jpg", 0], + ["DatasetA/Training/nx33.jpg", 0], + ["DatasetA/Training/px18.jpg", 1], + ["DatasetA/Testing/px27.jpg", 1], + ["DatasetA/Training/nx1.jpg", 0], + ["DatasetA/Training/px30.jpg", 1], + ["DatasetA/Training/px48.jpg", 1], + ["DatasetA/Training/px28.jpg", 1], + ["DatasetA/Training/px49.jpg", 1], + ["DatasetA/Training/px5.jpg", 1], + ["DatasetA/Testing/px29.jpg", 1], + ["DatasetA/Training/px21.jpg", 1], + ["DatasetA/Training/px17.jpg", 1], + ["DatasetA/Testing/nx25.jpg", 0], + ["DatasetA/Testing/px40.jpg", 1], + ["DatasetA/Testing/nx21.jpg", 0], + ["DatasetA/Training/nx25.jpg", 0], + ["DatasetA/Testing/px37.jpg", 1], + ["DatasetA/Training/nx14.jpg", 0], + ["DatasetA/Testing/nx7.jpg", 0], + ["DatasetA/Training/nx50.jpg", 0], + ["DatasetA/Testing/nx20.jpg", 0], + ["DatasetA/Training/px41.jpg", 1], + ["DatasetA/Training/nx29.jpg", 0], + ["DatasetA/Training/nx17.jpg", 0], + ["DatasetA/Testing/nx12.jpg", 0], + ["DatasetA/Testing/nx22.jpg", 0], + ["DatasetA/Training/nx11.jpg", 0], + ["DatasetA/Training/nx49.jpg", 0], + ["DatasetA/Testing/nx23.jpg", 0], + ["DatasetA/Training/nx44.jpg", 0], + ["DatasetA/Testing/nx24.jpg", 0], + ["DatasetA/Training/px6.jpg", 1], + ["DatasetA/Training/nx39.jpg", 0], + ["DatasetA/Training/nx27.jpg", 0], + ["DatasetA/Training/nx7.jpg", 0], + ["DatasetA/Training/nx45.jpg", 0], + ["DatasetA/Training/px14.jpg", 1], + ["DatasetA/Training/nx3.jpg", 0], + ["DatasetA/Training/px25.jpg", 1], + ["DatasetA/Training/nx38.jpg", 0], + ["DatasetA/Training/px44.jpg", 1], + ["DatasetA/Testing/nx26.jpg", 0], + ["DatasetA/Training/px8.jpg", 1], + ["DatasetA/Testing/nx1.jpg", 0], + ["DatasetA/Testing/px50.jpg", 1], + ["DatasetA/Testing/nx14.jpg", 0], + ["DatasetA/Training/nx28.jpg", 0], + ["DatasetA/Training/px27.jpg", 1], + ["DatasetA/Training/px40.jpg", 1], + ["DatasetA/Training/nx5.jpg", 0], + ["DatasetA/Training/px43.jpg", 1], + ["DatasetA/Testing/px42.jpg", 1], + ["DatasetA/Training/px34.jpg", 1], + ["DatasetA/Training/px16.jpg", 1], + ["DatasetA/Training/nx9.jpg", 0], + ["DatasetA/Testing/nx15.jpg", 0], + ["DatasetA/Training/px42.jpg", 1], + ["DatasetA/Training/px12.jpg", 1], + ["DatasetA/Testing/px31.jpg", 1], + ["DatasetA/Training/px47.jpg", 1], + ["DatasetA/Testing/nx10.jpg", 0], + ["DatasetA/Training/px2.jpg", 1], + ["DatasetA/Training/px50.jpg", 1], + ["DatasetA/Training/nx41.jpg", 0], + ["DatasetA/Training/px29.jpg", 1], + ["DatasetA/Training/px20.jpg", 1], + ["DatasetA/Training/nx40.jpg", 0] + ], + "validation": [ + ["DatasetA/Training/px26.jpg", 1], + ["DatasetA/Training/px10.jpg", 1], + ["DatasetA/Training/px3.jpg", 1], + ["DatasetA/Testing/px44.jpg", 1], + ["DatasetA/Testing/px46.jpg", 1], + ["DatasetA/Testing/px32.jpg", 1], + ["DatasetA/Training/nx35.jpg", 0], + ["DatasetA/Training/px13.jpg", 1], + ["DatasetA/Testing/nx6.jpg", 0], + ["DatasetA/Training/nx18.jpg", 0], + ["DatasetA/Testing/nx19.jpg", 0], + ["DatasetA/Training/nx6.jpg", 0], + ["DatasetA/Training/px31.jpg", 1], + ["DatasetA/Training/nx15.jpg", 0], + ["DatasetA/Testing/px39.jpg", 1], + ["DatasetA/Training/nx8.jpg", 0], + ["DatasetA/Training/nx13.jpg", 0], + ["DatasetA/Training/nx37.jpg", 0], + ["DatasetA/Testing/px28.jpg", 1], + ["DatasetA/Training/nx51.jpg", 0], + ["DatasetA/Testing/px30.jpg", 1], + ["DatasetA/Testing/nx8.jpg", 0], + ["DatasetA/Testing/px36.jpg", 1], + ["DatasetA/Training/nx43.jpg", 0], + ["DatasetA/Training/nx31.jpg", 0], + ["DatasetA/Testing/px49.jpg", 1], + ["DatasetA/Training/nx26.jpg", 0], + ["DatasetA/Training/px7.jpg", 1] + ], + "test": [ + ["DatasetA/Training/nx32.jpg", 0], + ["DatasetA/Training/nx46.jpg", 0], + ["DatasetA/Training/px24.jpg", 1], + ["DatasetA/Training/px19.jpg", 1], + ["DatasetA/Training/px32.jpg", 1], + ["DatasetA/Training/nx42.jpg", 0], + ["DatasetA/Training/nx10.jpg", 0], + ["DatasetA/Training/px1.jpg", 1], + ["DatasetA/Testing/nx17.jpg", 0], + ["DatasetA/Testing/nx13.jpg", 0], + ["DatasetA/Testing/nx9.jpg", 0], + ["DatasetA/Testing/nx11.jpg", 0], + ["DatasetA/Testing/px45.jpg", 1], + ["DatasetA/Testing/px51.jpg", 1], + ["DatasetA/Testing/px34.jpg", 1] + ] +} diff --git a/src/ptbench/data/indian/fold_5.json.bz2 b/src/ptbench/data/indian/fold_5.json.bz2 deleted file mode 100644 index 9364b3a72199720389fd6f9de7785a870865362b..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 435 zcmV;k0ZjfvT4*^jL0KkKS-1iENB{~p-+%xRPzC?+Bme{oCbiv4Kmxh~#S{jNMK-Ab zGJ1dj1fHstO&VwnBO?e8K{ZmRkpK*XBPM|L3>s+DOeQ8w1PwDyH4$4L<X7*_yo%M+ zj-o5tMQ`3Gem%TjAn%svTk1tdA*wl~Y)uTg7-;33*^JW!=7Jfu>@CZoOGjaaTe-^H zGVa2ijSUL6@z{G+oDDrZ$``DbC|MP)3fj%t%h!ZJhy%7DL;=FM4GVJ8P4z&C0j2$6 zS~E&$(PAPjL_imJbS%)C2vDeqiZe~5(*%f!mZb(5Nx32-3X+>aB+_sLl!{>n<+%m0 z%Pb<?i7mF!W>`x+@r<#RmU;KKkys+Pky|LOxS}f+uC3X0-FRj)z!0*Gf|DVMGaE7n zK*%zJVnq!Mjz86Fa`%y32(DjQa-`6T(2C$ie|~gxNDu%50Apy{D3GlL5<*D;K#+t< zt~S-U{_SmCa_QQtyHQxsisprD>lD^SXp^%9)yCG<_vWo`IJ&QjA|fIpA|fD&ghYfy dL_|bLL_|bHM1(|$iOaLpUC9*TLPFpN=^!9`ym$Zr diff --git a/src/ptbench/data/indian/fold_5.py b/src/ptbench/data/indian/fold_5.py index 7d411c26..0c7504c5 100644 --- a/src/ptbench/data/indian/fold_5.py +++ b/src/ptbench/data/indian/fold_5.py @@ -2,45 +2,6 @@ # # SPDX-License-Identifier: GPL-3.0-or-later -"""Indian dataset for TB detection (cross validation fold 5) +from .datamodule import DataModule -* Split reference: [INDIAN-2013]_ with 20% of train set for the validation set -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.indian` for dataset details -""" - -from clapper.logging import setup - -from .. import return_subsets -from ..base_datamodule import BaseDataModule -from . import _maker - -logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") - - -class Fold0Module(BaseDataModule): - def __init__( - self, - train_batch_size=1, - predict_batch_size=1, - drop_incomplete_batch=False, - multiproc_kwargs=None, - ): - super().__init__( - train_batch_size=train_batch_size, - predict_batch_size=predict_batch_size, - drop_incomplete_batch=drop_incomplete_batch, - multiproc_kwargs=multiproc_kwargs, - ) - - def setup(self, stage: str): - self.dataset = _maker("fold_5") - ( - self.train_dataset, - self.validation_dataset, - self.extra_validation_datasets, - self.predict_dataset, - ) = return_subsets(self.dataset) - - -datamodule = Fold0Module +datamodule = DataModule("fold_5.json.bz2") diff --git a/src/ptbench/data/indian/fold_6.json b/src/ptbench/data/indian/fold_6.json new file mode 100644 index 00000000..721aae0f --- /dev/null +++ b/src/ptbench/data/indian/fold_6.json @@ -0,0 +1,163 @@ +{ + "train": [ + ["DatasetA/Testing/nx19.jpg", 0], + ["DatasetA/Training/nx50.jpg", 0], + ["DatasetA/Training/nx3.jpg", 0], + ["DatasetA/Training/px41.jpg", 1], + ["DatasetA/Training/px3.jpg", 1], + ["DatasetA/Training/nx22.jpg", 0], + ["DatasetA/Training/nx12.jpg", 0], + ["DatasetA/Training/px6.jpg", 1], + ["DatasetA/Testing/nx13.jpg", 0], + ["DatasetA/Training/px44.jpg", 1], + ["DatasetA/Testing/nx16.jpg", 0], + ["DatasetA/Training/nx14.jpg", 0], + ["DatasetA/Testing/nx1.jpg", 0], + ["DatasetA/Testing/px29.jpg", 1], + ["DatasetA/Training/nx8.jpg", 0], + ["DatasetA/Training/px36.jpg", 1], + ["DatasetA/Testing/nx10.jpg", 0], + ["DatasetA/Testing/nx7.jpg", 0], + ["DatasetA/Training/px33.jpg", 1], + ["DatasetA/Testing/nx9.jpg", 0], + ["DatasetA/Testing/nx11.jpg", 0], + ["DatasetA/Training/px5.jpg", 1], + ["DatasetA/Testing/px37.jpg", 1], + ["DatasetA/Testing/px34.jpg", 1], + ["DatasetA/Training/px38.jpg", 1], + ["DatasetA/Testing/px30.jpg", 1], + ["DatasetA/Testing/nx12.jpg", 0], + ["DatasetA/Testing/px46.jpg", 1], + ["DatasetA/Testing/px31.jpg", 1], + ["DatasetA/Testing/nx23.jpg", 0], + ["DatasetA/Training/nx49.jpg", 0], + ["DatasetA/Training/px43.jpg", 1], + ["DatasetA/Training/px30.jpg", 1], + ["DatasetA/Training/nx18.jpg", 0], + ["DatasetA/Testing/nx5.jpg", 0], + ["DatasetA/Testing/nx4.jpg", 0], + ["DatasetA/Testing/nx8.jpg", 0], + ["DatasetA/Training/px4.jpg", 1], + ["DatasetA/Training/px27.jpg", 1], + ["DatasetA/Training/px1.jpg", 1], + ["DatasetA/Training/nx26.jpg", 0], + ["DatasetA/Training/px29.jpg", 1], + ["DatasetA/Training/nx47.jpg", 0], + ["DatasetA/Training/px15.jpg", 1], + ["DatasetA/Training/nx30.jpg", 0], + ["DatasetA/Training/px24.jpg", 1], + ["DatasetA/Training/nx17.jpg", 0], + ["DatasetA/Training/nx13.jpg", 0], + ["DatasetA/Training/nx51.jpg", 0], + ["DatasetA/Training/px16.jpg", 1], + ["DatasetA/Testing/px52.jpg", 1], + ["DatasetA/Testing/px44.jpg", 1], + ["DatasetA/Testing/px27.jpg", 1], + ["DatasetA/Training/px34.jpg", 1], + ["DatasetA/Training/px17.jpg", 1], + ["DatasetA/Testing/px48.jpg", 1], + ["DatasetA/Training/nx43.jpg", 0], + ["DatasetA/Training/nx5.jpg", 0], + ["DatasetA/Training/nx36.jpg", 0], + ["DatasetA/Training/nx37.jpg", 0], + ["DatasetA/Training/px47.jpg", 1], + ["DatasetA/Training/px22.jpg", 1], + ["DatasetA/Training/nx42.jpg", 0], + ["DatasetA/Training/nx45.jpg", 0], + ["DatasetA/Testing/px51.jpg", 1], + ["DatasetA/Testing/nx21.jpg", 0], + ["DatasetA/Training/px21.jpg", 1], + ["DatasetA/Testing/px32.jpg", 1], + ["DatasetA/Training/nx1.jpg", 0], + ["DatasetA/Training/nx7.jpg", 0], + ["DatasetA/Training/nx6.jpg", 0], + ["DatasetA/Training/nx31.jpg", 0], + ["DatasetA/Training/nx40.jpg", 0], + ["DatasetA/Training/nx41.jpg", 0], + ["DatasetA/Testing/px45.jpg", 1], + ["DatasetA/Training/nx11.jpg", 0], + ["DatasetA/Testing/px39.jpg", 1], + ["DatasetA/Testing/px36.jpg", 1], + ["DatasetA/Training/nx38.jpg", 0], + ["DatasetA/Training/nx16.jpg", 0], + ["DatasetA/Training/nx52.jpg", 0], + ["DatasetA/Training/px12.jpg", 1], + ["DatasetA/Training/px32.jpg", 1], + ["DatasetA/Training/px49.jpg", 1], + ["DatasetA/Testing/px38.jpg", 1], + ["DatasetA/Training/nx48.jpg", 0], + ["DatasetA/Training/px28.jpg", 1], + ["DatasetA/Training/nx21.jpg", 0], + ["DatasetA/Training/nx28.jpg", 0], + ["DatasetA/Training/px52.jpg", 1], + ["DatasetA/Training/nx39.jpg", 0], + ["DatasetA/Training/px14.jpg", 1], + ["DatasetA/Training/nx25.jpg", 0], + ["DatasetA/Training/px35.jpg", 1], + ["DatasetA/Training/px19.jpg", 1], + ["DatasetA/Training/px11.jpg", 1], + ["DatasetA/Testing/nx18.jpg", 0], + ["DatasetA/Training/px39.jpg", 1], + ["DatasetA/Training/px31.jpg", 1], + ["DatasetA/Training/nx35.jpg", 0], + ["DatasetA/Testing/px50.jpg", 1], + ["DatasetA/Training/nx27.jpg", 0], + ["DatasetA/Training/nx32.jpg", 0], + ["DatasetA/Training/px42.jpg", 1], + ["DatasetA/Testing/nx3.jpg", 0], + ["DatasetA/Testing/px41.jpg", 1], + ["DatasetA/Training/px8.jpg", 1], + ["DatasetA/Testing/nx14.jpg", 0], + ["DatasetA/Training/px40.jpg", 1], + ["DatasetA/Testing/px33.jpg", 1], + ["DatasetA/Training/px9.jpg", 1], + ["DatasetA/Training/px37.jpg", 1] + ], + "validation": [ + ["DatasetA/Training/px25.jpg", 1], + ["DatasetA/Training/nx10.jpg", 0], + ["DatasetA/Training/px13.jpg", 1], + ["DatasetA/Training/nx15.jpg", 0], + ["DatasetA/Testing/px49.jpg", 1], + ["DatasetA/Training/nx34.jpg", 0], + ["DatasetA/Testing/nx17.jpg", 0], + ["DatasetA/Testing/nx15.jpg", 0], + ["DatasetA/Training/nx9.jpg", 0], + ["DatasetA/Training/px2.jpg", 1], + ["DatasetA/Training/px20.jpg", 1], + ["DatasetA/Training/px46.jpg", 1], + ["DatasetA/Training/px48.jpg", 1], + ["DatasetA/Training/px18.jpg", 1], + ["DatasetA/Testing/px42.jpg", 1], + ["DatasetA/Training/px10.jpg", 1], + ["DatasetA/Training/px51.jpg", 1], + ["DatasetA/Testing/nx25.jpg", 0], + ["DatasetA/Training/nx44.jpg", 0], + ["DatasetA/Testing/nx22.jpg", 0], + ["DatasetA/Testing/px43.jpg", 1], + ["DatasetA/Training/px50.jpg", 1], + ["DatasetA/Training/nx33.jpg", 0], + ["DatasetA/Training/nx19.jpg", 0], + ["DatasetA/Testing/nx24.jpg", 0], + ["DatasetA/Testing/px40.jpg", 1], + ["DatasetA/Training/nx20.jpg", 0], + ["DatasetA/Training/nx46.jpg", 0] + ], + "test": [ + ["DatasetA/Training/nx29.jpg", 0], + ["DatasetA/Training/nx24.jpg", 0], + ["DatasetA/Training/px45.jpg", 1], + ["DatasetA/Training/px23.jpg", 1], + ["DatasetA/Training/px26.jpg", 1], + ["DatasetA/Training/nx4.jpg", 0], + ["DatasetA/Training/nx2.jpg", 0], + ["DatasetA/Training/px7.jpg", 1], + ["DatasetA/Testing/nx26.jpg", 0], + ["DatasetA/Testing/nx20.jpg", 0], + ["DatasetA/Testing/nx2.jpg", 0], + ["DatasetA/Testing/nx6.jpg", 0], + ["DatasetA/Testing/px28.jpg", 1], + ["DatasetA/Testing/px35.jpg", 1], + ["DatasetA/Testing/px47.jpg", 1] + ] +} diff --git a/src/ptbench/data/indian/fold_6.json.bz2 b/src/ptbench/data/indian/fold_6.json.bz2 deleted file mode 100644 index f41ea98ea4e6c79d6f2abc5e26fd8ffca27e7310..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 432 zcmV;h0Z;xyT4*^jL0KkKS&oXKQveD!-+%xRPzC?+Bme{oCbiv4KmxrME-0W>@MP6V zrkNUklnpdwVHj$XfY1Su00K`{N=8OZnq<%nMu(t^Q)r-!O`$O{Jw_g-?^P5rdMKfn znC7B|bVijFKLk+o*Du2_IoBeFaQf6S(U7oJpi|7kd%<8yB-J}YG-E`PcnAz$6F{tB z12h3MX`Muvj3sz63Tc#Gvs|+%dz+edR^$hys6l+kB@{7HLe<lvl+02hNu7wHd@DAa zQluR&vSLeYNoF%<ZLy#psjW;&Fa|gnTuBnqlH-8dNk~v|fnc)+GE$JGxeCU>c3BF> zO3J`xoFI~EI~~d>LW&&JP{d8pio`C_YPz?5+tOe_%_P*pCQ}J9kuqeNHW{V}l7y8A znHL}GwYhZdD0NXoFZyH%-9-ziq18nXUM)yT0uY3V6m1(t5Sr3J5RyrN5Rw4b8*1Et zcDAlP+?J}Zp0)_OiWmxoz7+<_C|wcxg%mK_+P=Kit;ZKt)`*CRh=_=YA|VkW5fKp) a5)ly*5fLE~A|i6{^%rtQI8cy|ilI}k__1*S diff --git a/src/ptbench/data/indian/fold_6.py b/src/ptbench/data/indian/fold_6.py index cf1ba7ee..2f8e8e32 100644 --- a/src/ptbench/data/indian/fold_6.py +++ b/src/ptbench/data/indian/fold_6.py @@ -2,45 +2,6 @@ # # SPDX-License-Identifier: GPL-3.0-or-later -"""Indian dataset for TB detection (cross validation fold 6) +from .datamodule import DataModule -* Split reference: [INDIAN-2013]_ with 20% of train set for the validation set -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.indian` for dataset details -""" - -from clapper.logging import setup - -from .. import return_subsets -from ..base_datamodule import BaseDataModule -from . import _maker - -logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") - - -class Fold0Module(BaseDataModule): - def __init__( - self, - train_batch_size=1, - predict_batch_size=1, - drop_incomplete_batch=False, - multiproc_kwargs=None, - ): - super().__init__( - train_batch_size=train_batch_size, - predict_batch_size=predict_batch_size, - drop_incomplete_batch=drop_incomplete_batch, - multiproc_kwargs=multiproc_kwargs, - ) - - def setup(self, stage: str): - self.dataset = _maker("fold_6") - ( - self.train_dataset, - self.validation_dataset, - self.extra_validation_datasets, - self.predict_dataset, - ) = return_subsets(self.dataset) - - -datamodule = Fold0Module +datamodule = DataModule("fold_6.json.bz2") diff --git a/src/ptbench/data/indian/fold_7.json b/src/ptbench/data/indian/fold_7.json new file mode 100644 index 00000000..6cecc46a --- /dev/null +++ b/src/ptbench/data/indian/fold_7.json @@ -0,0 +1,163 @@ +{ + "train": [ + ["DatasetA/Training/nx25.jpg", 0], + ["DatasetA/Testing/nx16.jpg", 0], + ["DatasetA/Training/nx27.jpg", 0], + ["DatasetA/Training/px36.jpg", 1], + ["DatasetA/Training/nx21.jpg", 0], + ["DatasetA/Testing/px52.jpg", 1], + ["DatasetA/Training/nx42.jpg", 0], + ["DatasetA/Training/nx31.jpg", 0], + ["DatasetA/Training/nx13.jpg", 0], + ["DatasetA/Training/px6.jpg", 1], + ["DatasetA/Testing/nx4.jpg", 0], + ["DatasetA/Testing/px50.jpg", 1], + ["DatasetA/Training/px44.jpg", 1], + ["DatasetA/Training/nx36.jpg", 0], + ["DatasetA/Training/nx39.jpg", 0], + ["DatasetA/Testing/nx5.jpg", 0], + ["DatasetA/Testing/px37.jpg", 1], + ["DatasetA/Training/nx40.jpg", 0], + ["DatasetA/Training/px12.jpg", 1], + ["DatasetA/Testing/nx2.jpg", 0], + ["DatasetA/Training/px39.jpg", 1], + ["DatasetA/Training/px9.jpg", 1], + ["DatasetA/Training/px40.jpg", 1], + ["DatasetA/Training/px49.jpg", 1], + ["DatasetA/Training/px29.jpg", 1], + ["DatasetA/Training/px16.jpg", 1], + ["DatasetA/Training/nx7.jpg", 0], + ["DatasetA/Testing/nx12.jpg", 0], + ["DatasetA/Training/nx22.jpg", 0], + ["DatasetA/Training/nx9.jpg", 0], + ["DatasetA/Training/nx43.jpg", 0], + ["DatasetA/Testing/px39.jpg", 1], + ["DatasetA/Training/px19.jpg", 1], + ["DatasetA/Training/nx5.jpg", 0], + ["DatasetA/Training/px51.jpg", 1], + ["DatasetA/Training/px20.jpg", 1], + ["DatasetA/Training/px18.jpg", 1], + ["DatasetA/Testing/px40.jpg", 1], + ["DatasetA/Testing/px28.jpg", 1], + ["DatasetA/Testing/nx15.jpg", 0], + ["DatasetA/Training/px47.jpg", 1], + ["DatasetA/Testing/nx19.jpg", 0], + ["DatasetA/Training/px48.jpg", 1], + ["DatasetA/Training/nx29.jpg", 0], + ["DatasetA/Testing/nx25.jpg", 0], + ["DatasetA/Testing/nx3.jpg", 0], + ["DatasetA/Training/px24.jpg", 1], + ["DatasetA/Testing/px35.jpg", 1], + ["DatasetA/Training/nx16.jpg", 0], + ["DatasetA/Training/nx41.jpg", 0], + ["DatasetA/Testing/nx17.jpg", 0], + ["DatasetA/Training/nx28.jpg", 0], + ["DatasetA/Testing/nx26.jpg", 0], + ["DatasetA/Training/px37.jpg", 1], + ["DatasetA/Training/px25.jpg", 1], + ["DatasetA/Training/px46.jpg", 1], + ["DatasetA/Training/nx30.jpg", 0], + ["DatasetA/Training/nx35.jpg", 0], + ["DatasetA/Training/px28.jpg", 1], + ["DatasetA/Training/px13.jpg", 1], + ["DatasetA/Testing/px41.jpg", 1], + ["DatasetA/Training/nx44.jpg", 0], + ["DatasetA/Testing/nx24.jpg", 0], + ["DatasetA/Testing/nx10.jpg", 0], + ["DatasetA/Testing/nx23.jpg", 0], + ["DatasetA/Training/nx2.jpg", 0], + ["DatasetA/Testing/px33.jpg", 1], + ["DatasetA/Training/nx3.jpg", 0], + ["DatasetA/Training/nx32.jpg", 0], + ["DatasetA/Testing/px43.jpg", 1], + ["DatasetA/Testing/px34.jpg", 1], + ["DatasetA/Training/px22.jpg", 1], + ["DatasetA/Training/px33.jpg", 1], + ["DatasetA/Training/nx37.jpg", 0], + ["DatasetA/Training/nx45.jpg", 0], + ["DatasetA/Testing/nx6.jpg", 0], + ["DatasetA/Training/px17.jpg", 1], + ["DatasetA/Training/nx4.jpg", 0], + ["DatasetA/Testing/px32.jpg", 1], + ["DatasetA/Training/px50.jpg", 1], + ["DatasetA/Training/px35.jpg", 1], + ["DatasetA/Training/px15.jpg", 1], + ["DatasetA/Training/nx1.jpg", 0], + ["DatasetA/Training/nx8.jpg", 0], + ["DatasetA/Training/nx26.jpg", 0], + ["DatasetA/Testing/px49.jpg", 1], + ["DatasetA/Training/nx6.jpg", 0], + ["DatasetA/Training/nx50.jpg", 0], + ["DatasetA/Training/px4.jpg", 1], + ["DatasetA/Training/px1.jpg", 1], + ["DatasetA/Testing/px51.jpg", 1], + ["DatasetA/Training/px32.jpg", 1], + ["DatasetA/Testing/px48.jpg", 1], + ["DatasetA/Training/nx33.jpg", 0], + ["DatasetA/Testing/nx1.jpg", 0], + ["DatasetA/Training/px31.jpg", 1], + ["DatasetA/Training/nx34.jpg", 0], + ["DatasetA/Testing/px36.jpg", 1], + ["DatasetA/Testing/nx20.jpg", 0], + ["DatasetA/Testing/nx7.jpg", 0], + ["DatasetA/Training/nx17.jpg", 0], + ["DatasetA/Training/px3.jpg", 1], + ["DatasetA/Testing/nx13.jpg", 0], + ["DatasetA/Training/px52.jpg", 1], + ["DatasetA/Testing/px47.jpg", 1], + ["DatasetA/Testing/px42.jpg", 1], + ["DatasetA/Training/px7.jpg", 1], + ["DatasetA/Training/nx38.jpg", 0], + ["DatasetA/Training/px42.jpg", 1], + ["DatasetA/Testing/nx18.jpg", 0], + ["DatasetA/Training/px43.jpg", 1], + ["DatasetA/Testing/px29.jpg", 1] + ], + "validation": [ + ["DatasetA/Testing/nx9.jpg", 0], + ["DatasetA/Testing/px45.jpg", 1], + ["DatasetA/Training/px21.jpg", 1], + ["DatasetA/Training/nx20.jpg", 0], + ["DatasetA/Training/px45.jpg", 1], + ["DatasetA/Training/nx47.jpg", 0], + ["DatasetA/Training/nx52.jpg", 0], + ["DatasetA/Training/nx18.jpg", 0], + ["DatasetA/Training/px26.jpg", 1], + ["DatasetA/Training/px8.jpg", 1], + ["DatasetA/Training/nx10.jpg", 0], + ["DatasetA/Testing/nx8.jpg", 0], + ["DatasetA/Testing/nx14.jpg", 0], + ["DatasetA/Training/px41.jpg", 1], + ["DatasetA/Training/nx46.jpg", 0], + ["DatasetA/Training/px23.jpg", 1], + ["DatasetA/Training/px5.jpg", 1], + ["DatasetA/Testing/px46.jpg", 1], + ["DatasetA/Testing/px38.jpg", 1], + ["DatasetA/Testing/nx11.jpg", 0], + ["DatasetA/Testing/nx22.jpg", 0], + ["DatasetA/Training/nx24.jpg", 0], + ["DatasetA/Testing/nx21.jpg", 0], + ["DatasetA/Training/px10.jpg", 1], + ["DatasetA/Training/px11.jpg", 1], + ["DatasetA/Training/nx15.jpg", 0], + ["DatasetA/Testing/px27.jpg", 1], + ["DatasetA/Testing/px31.jpg", 1] + ], + "test": [ + ["DatasetA/Training/nx19.jpg", 0], + ["DatasetA/Training/nx11.jpg", 0], + ["DatasetA/Training/nx12.jpg", 0], + ["DatasetA/Training/nx14.jpg", 0], + ["DatasetA/Training/nx49.jpg", 0], + ["DatasetA/Training/nx48.jpg", 0], + ["DatasetA/Training/px38.jpg", 1], + ["DatasetA/Training/px14.jpg", 1], + ["DatasetA/Training/px34.jpg", 1], + ["DatasetA/Training/px30.jpg", 1], + ["DatasetA/Training/px27.jpg", 1], + ["DatasetA/Training/px2.jpg", 1], + ["DatasetA/Training/nx51.jpg", 0], + ["DatasetA/Testing/px44.jpg", 1], + ["DatasetA/Testing/px30.jpg", 1] + ] +} diff --git a/src/ptbench/data/indian/fold_7.json.bz2 b/src/ptbench/data/indian/fold_7.json.bz2 deleted file mode 100644 index ea48efb36b52774907d720317cec34ebf1479a5a..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 437 zcmV;m0ZRTtT4*^jL0KkKS=GWCz5og~-+%xRPzC?+Bme{oCbiv4Kmxh~#S{jZNus3F zBTObHMwo#os--4On1%uXGB8b4lSDKFAZXE`JwpbXG}8%*lK}%v(@kVn=dl&b_Z$eV zeHhwBddRQ8XLo;hY(1;O?HCO2D{nR7rpTN+BbglJ8E0+p4xr8RG;U^Xg`7MPoowu7 z?$VlbFmYN(Pf0wPSq5>P=QuTN?{iM=-qeRjmN|HD^HeH;?5Y(&O&sSUia{4$3ZNCs zSN>!)>{wPRt3s-PD^Tl3)0SnsbSjc;SS%<c6;wn_z~OL2Dyj(FNEFGEfHD>&jiV$A zMG-N`S}sIlkT_1P@t#&Q&n)uQ+TKNAir__VQ+_E%i)&pwy7^r|l-SaeLo*>MGHH@A z0U$^eSpX@c1kAS|>b11mMR6jzeQM=Np%tMO#ESp=am@)K2ogksjiS+^3eZFd01!e+ z34pFP)wuN9+PL-T+N!(}SkQ{f6|<yMVHKiItQOVA*45?7THJAT-Drr2h=_=Yf+7(T f5fKp)5g`!~5fKs*5+WzBcd)yXDZ+$?t`ODm_xHTf diff --git a/src/ptbench/data/indian/fold_7.py b/src/ptbench/data/indian/fold_7.py index 49a4d8f1..389e7f4e 100644 --- a/src/ptbench/data/indian/fold_7.py +++ b/src/ptbench/data/indian/fold_7.py @@ -2,45 +2,6 @@ # # SPDX-License-Identifier: GPL-3.0-or-later -"""Indian dataset for TB detection (cross validation fold 7) +from .datamodule import DataModule -* Split reference: [INDIAN-2013]_ with 20% of train set for the validation set -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.indian` for dataset details -""" - -from clapper.logging import setup - -from .. import return_subsets -from ..base_datamodule import BaseDataModule -from . import _maker - -logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") - - -class Fold0Module(BaseDataModule): - def __init__( - self, - train_batch_size=1, - predict_batch_size=1, - drop_incomplete_batch=False, - multiproc_kwargs=None, - ): - super().__init__( - train_batch_size=train_batch_size, - predict_batch_size=predict_batch_size, - drop_incomplete_batch=drop_incomplete_batch, - multiproc_kwargs=multiproc_kwargs, - ) - - def setup(self, stage: str): - self.dataset = _maker("fold_7") - ( - self.train_dataset, - self.validation_dataset, - self.extra_validation_datasets, - self.predict_dataset, - ) = return_subsets(self.dataset) - - -datamodule = Fold0Module +datamodule = DataModule("fold_7.json.bz2") diff --git a/src/ptbench/data/indian/fold_8.json b/src/ptbench/data/indian/fold_8.json new file mode 100644 index 00000000..5a4d1865 --- /dev/null +++ b/src/ptbench/data/indian/fold_8.json @@ -0,0 +1,163 @@ +{ + "train": [ + ["DatasetA/Testing/nx13.jpg", 0], + ["DatasetA/Testing/nx4.jpg", 0], + ["DatasetA/Training/nx29.jpg", 0], + ["DatasetA/Training/px37.jpg", 1], + ["DatasetA/Training/nx22.jpg", 0], + ["DatasetA/Training/px31.jpg", 1], + ["DatasetA/Testing/px30.jpg", 1], + ["DatasetA/Testing/nx24.jpg", 0], + ["DatasetA/Training/nx15.jpg", 0], + ["DatasetA/Testing/nx3.jpg", 0], + ["DatasetA/Training/nx37.jpg", 0], + ["DatasetA/Testing/px32.jpg", 1], + ["DatasetA/Training/nx10.jpg", 0], + ["DatasetA/Training/nx30.jpg", 0], + ["DatasetA/Training/nx24.jpg", 0], + ["DatasetA/Testing/px47.jpg", 1], + ["DatasetA/Training/nx16.jpg", 0], + ["DatasetA/Training/px41.jpg", 1], + ["DatasetA/Training/nx31.jpg", 0], + ["DatasetA/Training/nx17.jpg", 0], + ["DatasetA/Training/nx42.jpg", 0], + ["DatasetA/Testing/nx8.jpg", 0], + ["DatasetA/Training/px33.jpg", 1], + ["DatasetA/Training/px26.jpg", 1], + ["DatasetA/Training/px18.jpg", 1], + ["DatasetA/Training/nx51.jpg", 0], + ["DatasetA/Training/px20.jpg", 1], + ["DatasetA/Testing/px50.jpg", 1], + ["DatasetA/Training/nx34.jpg", 0], + ["DatasetA/Training/px47.jpg", 1], + ["DatasetA/Training/nx52.jpg", 0], + ["DatasetA/Training/px9.jpg", 1], + ["DatasetA/Training/nx46.jpg", 0], + ["DatasetA/Training/nx3.jpg", 0], + ["DatasetA/Testing/nx20.jpg", 0], + ["DatasetA/Training/px7.jpg", 1], + ["DatasetA/Training/px43.jpg", 1], + ["DatasetA/Training/px16.jpg", 1], + ["DatasetA/Training/px22.jpg", 1], + ["DatasetA/Testing/px46.jpg", 1], + ["DatasetA/Testing/px42.jpg", 1], + ["DatasetA/Testing/nx7.jpg", 0], + ["DatasetA/Training/nx43.jpg", 0], + ["DatasetA/Training/nx18.jpg", 0], + ["DatasetA/Training/px28.jpg", 1], + ["DatasetA/Testing/nx18.jpg", 0], + ["DatasetA/Training/px10.jpg", 1], + ["DatasetA/Training/px15.jpg", 1], + ["DatasetA/Training/nx12.jpg", 0], + ["DatasetA/Training/nx32.jpg", 0], + ["DatasetA/Testing/nx5.jpg", 0], + ["DatasetA/Training/px21.jpg", 1], + ["DatasetA/Training/px35.jpg", 1], + ["DatasetA/Training/px4.jpg", 1], + ["DatasetA/Training/px46.jpg", 1], + ["DatasetA/Training/px25.jpg", 1], + ["DatasetA/Testing/px33.jpg", 1], + ["DatasetA/Training/px49.jpg", 1], + ["DatasetA/Testing/px44.jpg", 1], + ["DatasetA/Training/nx27.jpg", 0], + ["DatasetA/Testing/nx21.jpg", 0], + ["DatasetA/Testing/px31.jpg", 1], + ["DatasetA/Testing/nx1.jpg", 0], + ["DatasetA/Testing/px43.jpg", 1], + ["DatasetA/Training/px32.jpg", 1], + ["DatasetA/Testing/nx17.jpg", 0], + ["DatasetA/Testing/nx25.jpg", 0], + ["DatasetA/Training/px12.jpg", 1], + ["DatasetA/Training/nx1.jpg", 0], + ["DatasetA/Training/px36.jpg", 1], + ["DatasetA/Testing/nx23.jpg", 0], + ["DatasetA/Testing/nx2.jpg", 0], + ["DatasetA/Training/px38.jpg", 1], + ["DatasetA/Training/px40.jpg", 1], + ["DatasetA/Testing/px37.jpg", 1], + ["DatasetA/Testing/px34.jpg", 1], + ["DatasetA/Training/px50.jpg", 1], + ["DatasetA/Training/px34.jpg", 1], + ["DatasetA/Training/px11.jpg", 1], + ["DatasetA/Testing/nx9.jpg", 0], + ["DatasetA/Training/px14.jpg", 1], + ["DatasetA/Testing/px45.jpg", 1], + ["DatasetA/Training/nx21.jpg", 0], + ["DatasetA/Training/nx9.jpg", 0], + ["DatasetA/Training/nx41.jpg", 0], + ["DatasetA/Training/nx4.jpg", 0], + ["DatasetA/Training/nx36.jpg", 0], + ["DatasetA/Training/px52.jpg", 1], + ["DatasetA/Training/px17.jpg", 1], + ["DatasetA/Training/nx20.jpg", 0], + ["DatasetA/Testing/px51.jpg", 1], + ["DatasetA/Training/px27.jpg", 1], + ["DatasetA/Training/nx28.jpg", 0], + ["DatasetA/Testing/nx16.jpg", 0], + ["DatasetA/Testing/nx6.jpg", 0], + ["DatasetA/Training/nx5.jpg", 0], + ["DatasetA/Training/px51.jpg", 1], + ["DatasetA/Training/px39.jpg", 1], + ["DatasetA/Training/nx44.jpg", 0], + ["DatasetA/Training/nx49.jpg", 0], + ["DatasetA/Training/px24.jpg", 1], + ["DatasetA/Training/nx33.jpg", 0], + ["DatasetA/Testing/nx14.jpg", 0], + ["DatasetA/Testing/nx11.jpg", 0], + ["DatasetA/Testing/px29.jpg", 1], + ["DatasetA/Training/nx48.jpg", 0], + ["DatasetA/Testing/px36.jpg", 1], + ["DatasetA/Testing/px35.jpg", 1], + ["DatasetA/Training/nx8.jpg", 0], + ["DatasetA/Training/px29.jpg", 1], + ["DatasetA/Training/px23.jpg", 1], + ["DatasetA/Training/nx11.jpg", 0] + ], + "validation": [ + ["DatasetA/Training/nx40.jpg", 0], + ["DatasetA/Training/nx50.jpg", 0], + ["DatasetA/Training/nx25.jpg", 0], + ["DatasetA/Training/nx38.jpg", 0], + ["DatasetA/Training/px3.jpg", 1], + ["DatasetA/Testing/nx26.jpg", 0], + ["DatasetA/Testing/px49.jpg", 1], + ["DatasetA/Training/nx35.jpg", 0], + ["DatasetA/Testing/nx10.jpg", 0], + ["DatasetA/Training/px19.jpg", 1], + ["DatasetA/Training/px8.jpg", 1], + ["DatasetA/Training/px42.jpg", 1], + ["DatasetA/Testing/nx15.jpg", 0], + ["DatasetA/Training/px30.jpg", 1], + ["DatasetA/Training/nx39.jpg", 0], + ["DatasetA/Training/px1.jpg", 1], + ["DatasetA/Training/px45.jpg", 1], + ["DatasetA/Testing/px28.jpg", 1], + ["DatasetA/Training/px6.jpg", 1], + ["DatasetA/Training/nx19.jpg", 0], + ["DatasetA/Training/nx6.jpg", 0], + ["DatasetA/Testing/px38.jpg", 1], + ["DatasetA/Training/px44.jpg", 1], + ["DatasetA/Training/px5.jpg", 1], + ["DatasetA/Testing/nx22.jpg", 0], + ["DatasetA/Training/px2.jpg", 1], + ["DatasetA/Training/nx14.jpg", 0], + ["DatasetA/Training/nx2.jpg", 0] + ], + "test": [ + ["DatasetA/Training/nx13.jpg", 0], + ["DatasetA/Training/nx26.jpg", 0], + ["DatasetA/Training/nx7.jpg", 0], + ["DatasetA/Training/nx47.jpg", 0], + ["DatasetA/Training/px48.jpg", 1], + ["DatasetA/Training/nx45.jpg", 0], + ["DatasetA/Training/px13.jpg", 1], + ["DatasetA/Testing/nx19.jpg", 0], + ["DatasetA/Testing/nx12.jpg", 0], + ["DatasetA/Testing/px48.jpg", 1], + ["DatasetA/Testing/px27.jpg", 1], + ["DatasetA/Testing/px40.jpg", 1], + ["DatasetA/Testing/px39.jpg", 1], + ["DatasetA/Testing/px52.jpg", 1], + ["DatasetA/Testing/px41.jpg", 1] + ] +} diff --git a/src/ptbench/data/indian/fold_8.json.bz2 b/src/ptbench/data/indian/fold_8.json.bz2 deleted file mode 100644 index 49a6c2341e86184469f963359c4d2102ed5249ed..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 437 zcmV;m0ZRTtT4*^jL0KkKSu^tZVgL#@-+%xRPzC?+Bme{oCbiv4Kmxt88;U3qQ~dzd zLqGrk0La>unLrH>Pyhgv)l!oX(V){tBNIbv3969@4F*8bqd<C^X{?Ie_9D4{<AD{g zqa7qytcv5ue%<MAvitX~)@V&~Zw?m~*)kgGw$C-xyuGOw3_Hdpb$4m2rnpK#mS$z5 zW>i&DEU_&NsA+103Kq9u5P~Kkl9m{VRHB42kdsrvwAx*brqbo%wrfEZkiM3iO3&=l zXl7Mmm6DThMB7Uyi$$7Il?*VZo3hNSs*tLQwhFBTvJ&k!=nzU*C5UNC3%D=|c2I~} zBre4Y1O*aQ%T}ysjAxyzwY-YK6~Kz!TkumDt&M8Bw`J4yp_zpYCJHkP2xd$(6vHHA zDM-x;n9^h!$NH^qUj3}DBv&u2`BG>_Xhm@%zrQwTVgVoofF_GZ(WHZFB!UEzOae#{ z2DsZ+<NLL>aqZ-_ReiO!V?rw^R?eYKgjR_<@I_^{wSGCPTaGTP;)sZdh=_=YA|VkW f5fKp)5)ly*5fLE~A|i6^|BJaIoG3^c`Ft?|C?36J diff --git a/src/ptbench/data/indian/fold_8.py b/src/ptbench/data/indian/fold_8.py index 9f634074..a9480359 100644 --- a/src/ptbench/data/indian/fold_8.py +++ b/src/ptbench/data/indian/fold_8.py @@ -2,45 +2,6 @@ # # SPDX-License-Identifier: GPL-3.0-or-later -"""Indian dataset for TB detection (cross validation fold 8) +from .datamodule import DataModule -* Split reference: [INDIAN-2013]_ with 20% of train set for the validation set -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.indian` for dataset details -""" - -from clapper.logging import setup - -from .. import return_subsets -from ..base_datamodule import BaseDataModule -from . import _maker - -logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") - - -class Fold0Module(BaseDataModule): - def __init__( - self, - train_batch_size=1, - predict_batch_size=1, - drop_incomplete_batch=False, - multiproc_kwargs=None, - ): - super().__init__( - train_batch_size=train_batch_size, - predict_batch_size=predict_batch_size, - drop_incomplete_batch=drop_incomplete_batch, - multiproc_kwargs=multiproc_kwargs, - ) - - def setup(self, stage: str): - self.dataset = _maker("fold_8") - ( - self.train_dataset, - self.validation_dataset, - self.extra_validation_datasets, - self.predict_dataset, - ) = return_subsets(self.dataset) - - -datamodule = Fold0Module +datamodule = DataModule("fold_8.json.bz2") diff --git a/src/ptbench/data/indian/fold_9.json b/src/ptbench/data/indian/fold_9.json new file mode 100644 index 00000000..bb906259 --- /dev/null +++ b/src/ptbench/data/indian/fold_9.json @@ -0,0 +1,163 @@ +{ + "train": [ + ["DatasetA/Training/nx35.jpg", 0], + ["DatasetA/Testing/px30.jpg", 1], + ["DatasetA/Training/px35.jpg", 1], + ["DatasetA/Testing/px39.jpg", 1], + ["DatasetA/Training/nx44.jpg", 0], + ["DatasetA/Training/nx47.jpg", 0], + ["DatasetA/Training/nx2.jpg", 0], + ["DatasetA/Testing/nx13.jpg", 0], + ["DatasetA/Training/nx28.jpg", 0], + ["DatasetA/Testing/px41.jpg", 1], + ["DatasetA/Training/nx10.jpg", 0], + ["DatasetA/Training/nx17.jpg", 0], + ["DatasetA/Testing/nx24.jpg", 0], + ["DatasetA/Testing/px48.jpg", 1], + ["DatasetA/Training/px49.jpg", 1], + ["DatasetA/Training/nx37.jpg", 0], + ["DatasetA/Training/nx21.jpg", 0], + ["DatasetA/Training/px51.jpg", 1], + ["DatasetA/Testing/px32.jpg", 1], + ["DatasetA/Training/px27.jpg", 1], + ["DatasetA/Training/nx12.jpg", 0], + ["DatasetA/Training/px12.jpg", 1], + ["DatasetA/Training/px46.jpg", 1], + ["DatasetA/Training/nx33.jpg", 0], + ["DatasetA/Training/px10.jpg", 1], + ["DatasetA/Training/px19.jpg", 1], + ["DatasetA/Training/px29.jpg", 1], + ["DatasetA/Training/nx43.jpg", 0], + ["DatasetA/Testing/nx19.jpg", 0], + ["DatasetA/Training/nx34.jpg", 0], + ["DatasetA/Training/nx45.jpg", 0], + ["DatasetA/Training/px30.jpg", 1], + ["DatasetA/Testing/nx2.jpg", 0], + ["DatasetA/Training/px24.jpg", 1], + ["DatasetA/Training/nx49.jpg", 0], + ["DatasetA/Training/px21.jpg", 1], + ["DatasetA/Training/px43.jpg", 1], + ["DatasetA/Training/nx7.jpg", 0], + ["DatasetA/Training/px44.jpg", 1], + ["DatasetA/Training/px41.jpg", 1], + ["DatasetA/Training/px9.jpg", 1], + ["DatasetA/Training/px47.jpg", 1], + ["DatasetA/Testing/nx21.jpg", 0], + ["DatasetA/Training/px32.jpg", 1], + ["DatasetA/Training/nx38.jpg", 0], + ["DatasetA/Training/px23.jpg", 1], + ["DatasetA/Training/nx50.jpg", 0], + ["DatasetA/Training/nx51.jpg", 0], + ["DatasetA/Training/px34.jpg", 1], + ["DatasetA/Testing/px51.jpg", 1], + ["DatasetA/Training/nx40.jpg", 0], + ["DatasetA/Training/nx11.jpg", 0], + ["DatasetA/Training/nx31.jpg", 0], + ["DatasetA/Testing/nx5.jpg", 0], + ["DatasetA/Testing/px49.jpg", 1], + ["DatasetA/Training/px52.jpg", 1], + ["DatasetA/Testing/nx10.jpg", 0], + ["DatasetA/Testing/px44.jpg", 1], + ["DatasetA/Training/px2.jpg", 1], + ["DatasetA/Training/nx48.jpg", 0], + ["DatasetA/Training/nx18.jpg", 0], + ["DatasetA/Training/px4.jpg", 1], + ["DatasetA/Training/nx8.jpg", 0], + ["DatasetA/Training/px16.jpg", 1], + ["DatasetA/Testing/px45.jpg", 1], + ["DatasetA/Training/nx1.jpg", 0], + ["DatasetA/Testing/px33.jpg", 1], + ["DatasetA/Training/nx52.jpg", 0], + ["DatasetA/Training/nx19.jpg", 0], + ["DatasetA/Training/nx13.jpg", 0], + ["DatasetA/Training/nx9.jpg", 0], + ["DatasetA/Testing/nx26.jpg", 0], + ["DatasetA/Testing/px46.jpg", 1], + ["DatasetA/Training/nx22.jpg", 0], + ["DatasetA/Testing/nx14.jpg", 0], + ["DatasetA/Training/nx25.jpg", 0], + ["DatasetA/Testing/px52.jpg", 1], + ["DatasetA/Training/px3.jpg", 1], + ["DatasetA/Testing/nx22.jpg", 0], + ["DatasetA/Testing/nx4.jpg", 0], + ["DatasetA/Training/px7.jpg", 1], + ["DatasetA/Testing/px34.jpg", 1], + ["DatasetA/Training/px11.jpg", 1], + ["DatasetA/Testing/nx11.jpg", 0], + ["DatasetA/Testing/px43.jpg", 1], + ["DatasetA/Testing/nx16.jpg", 0], + ["DatasetA/Training/px37.jpg", 1], + ["DatasetA/Training/nx5.jpg", 0], + ["DatasetA/Training/px1.jpg", 1], + ["DatasetA/Training/px40.jpg", 1], + ["DatasetA/Training/px26.jpg", 1], + ["DatasetA/Training/px38.jpg", 1], + ["DatasetA/Testing/px27.jpg", 1], + ["DatasetA/Testing/nx6.jpg", 0], + ["DatasetA/Training/nx24.jpg", 0], + ["DatasetA/Training/nx29.jpg", 0], + ["DatasetA/Testing/px50.jpg", 1], + ["DatasetA/Training/px14.jpg", 1], + ["DatasetA/Testing/nx20.jpg", 0], + ["DatasetA/Training/nx30.jpg", 0], + ["DatasetA/Training/nx3.jpg", 0], + ["DatasetA/Testing/nx9.jpg", 0], + ["DatasetA/Testing/nx18.jpg", 0], + ["DatasetA/Testing/px36.jpg", 1], + ["DatasetA/Training/px13.jpg", 1], + ["DatasetA/Training/px5.jpg", 1], + ["DatasetA/Testing/px28.jpg", 1], + ["DatasetA/Testing/nx3.jpg", 0], + ["DatasetA/Testing/nx12.jpg", 0], + ["DatasetA/Training/px48.jpg", 1], + ["DatasetA/Training/px31.jpg", 1], + ["DatasetA/Testing/px31.jpg", 1] + ], + "validation": [ + ["DatasetA/Training/nx15.jpg", 0], + ["DatasetA/Training/px6.jpg", 1], + ["DatasetA/Training/nx4.jpg", 0], + ["DatasetA/Training/nx6.jpg", 0], + ["DatasetA/Training/px33.jpg", 1], + ["DatasetA/Training/nx36.jpg", 0], + ["DatasetA/Testing/px42.jpg", 1], + ["DatasetA/Testing/px38.jpg", 1], + ["DatasetA/Training/nx26.jpg", 0], + ["DatasetA/Training/nx14.jpg", 0], + ["DatasetA/Training/nx39.jpg", 0], + ["DatasetA/Testing/nx17.jpg", 0], + ["DatasetA/Testing/nx1.jpg", 0], + ["DatasetA/Training/nx42.jpg", 0], + ["DatasetA/Training/nx32.jpg", 0], + ["DatasetA/Training/px25.jpg", 1], + ["DatasetA/Testing/px37.jpg", 1], + ["DatasetA/Testing/nx8.jpg", 0], + ["DatasetA/Training/px50.jpg", 1], + ["DatasetA/Testing/px35.jpg", 1], + ["DatasetA/Testing/px47.jpg", 1], + ["DatasetA/Training/px45.jpg", 1], + ["DatasetA/Training/nx41.jpg", 0], + ["DatasetA/Training/nx46.jpg", 0], + ["DatasetA/Training/px42.jpg", 1], + ["DatasetA/Training/px20.jpg", 1], + ["DatasetA/Testing/px29.jpg", 1], + ["DatasetA/Testing/px40.jpg", 1] + ], + "test": [ + ["DatasetA/Training/nx20.jpg", 0], + ["DatasetA/Training/nx16.jpg", 0], + ["DatasetA/Training/nx27.jpg", 0], + ["DatasetA/Training/px15.jpg", 1], + ["DatasetA/Training/px8.jpg", 1], + ["DatasetA/Training/px28.jpg", 1], + ["DatasetA/Training/px17.jpg", 1], + ["DatasetA/Training/px39.jpg", 1], + ["DatasetA/Training/px22.jpg", 1], + ["DatasetA/Training/px36.jpg", 1], + ["DatasetA/Training/px18.jpg", 1], + ["DatasetA/Testing/nx25.jpg", 0], + ["DatasetA/Testing/nx15.jpg", 0], + ["DatasetA/Testing/nx23.jpg", 0], + ["DatasetA/Testing/nx7.jpg", 0] + ] +} diff --git a/src/ptbench/data/indian/fold_9.json.bz2 b/src/ptbench/data/indian/fold_9.json.bz2 deleted file mode 100644 index 27a3fb922ea3f408d707e2314a4ed8bcb58b7079..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 439 zcmV;o0Z9HrT4*^jL0KkKS$%{a5C94`-+%xRPzC?+Bme{oCbiv4KmxrME-0WyPx2EL z0BNJt001;*r6>&=Xc-1fjDk;9N=%tC3<LmVV2UZ~o~ZH@O{CG0lhiTlm%gPGGk%m% z)4X$0Lsl`8iXS2<Zo>q|=%tmhbaQmM-fr?3u}e)OK%-=<ODF+@(6rQrN16eJomu7G zrk&Ohpog+CSQSQuLrAEVv@$fzWGNLyt)WY)Dqbo9Xo)!6804acYA8-v&z$*8$%RyI zMG3tTR<)T6%!E}{qN=2<S(+L!gL5+#Rb+8t8E_R<4(tUX7@Df65?DAP1tkRK*>;$L z5(+sR3ycS3R|OivikONps@(8VLKINuqK0BG2uO=#YgbP0zF!GOQV1A`r3MhFjfOEY zf?+0xXk<hfktxUet!w0>heZ@L{lh>;>L^`B4vHv${W#`?fgu7+3M&?hAuB*&0SPhy z5D*o{+P5B?TUQ>v8&y|&C`Hszkx;g^L9&V$M0P@st~R!>E=tzpi>B*DL_|bHL_`q~ hh>(bgh=_>@h=_=Ykcg2HJ$t={+>uTcBp+c1gaA?kuU!BD diff --git a/src/ptbench/data/indian/fold_9.py b/src/ptbench/data/indian/fold_9.py index a406bc17..daa85e03 100644 --- a/src/ptbench/data/indian/fold_9.py +++ b/src/ptbench/data/indian/fold_9.py @@ -2,45 +2,6 @@ # # SPDX-License-Identifier: GPL-3.0-or-later -"""Indian dataset for TB detection (cross validation fold 9) +from .datamodule import DataModule -* Split reference: [INDIAN-2013]_ with 20% of train set for the validation set -* This configuration resolution: 512 x 512 (default) -* See :py:mod:`ptbench.data.indian` for dataset details -""" - -from clapper.logging import setup - -from .. import return_subsets -from ..base_datamodule import BaseDataModule -from . import _maker - -logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") - - -class Fold0Module(BaseDataModule): - def __init__( - self, - train_batch_size=1, - predict_batch_size=1, - drop_incomplete_batch=False, - multiproc_kwargs=None, - ): - super().__init__( - train_batch_size=train_batch_size, - predict_batch_size=predict_batch_size, - drop_incomplete_batch=drop_incomplete_batch, - multiproc_kwargs=multiproc_kwargs, - ) - - def setup(self, stage: str): - self.dataset = _maker("fold_9") - ( - self.train_dataset, - self.validation_dataset, - self.extra_validation_datasets, - self.predict_dataset, - ) = return_subsets(self.dataset) - - -datamodule = Fold0Module +datamodule = DataModule("fold_9.json.bz2") -- GitLab