From 179427210442441d766d3005ae09381fc5288747 Mon Sep 17 00:00:00 2001 From: Andre Anjos <andre.dos.anjos@gmail.com> Date: Thu, 3 Aug 2023 17:54:27 +0200 Subject: [PATCH] [doc] Make it pass --- doc/api.rst | 72 +-- doc/conf.py | 1 + doc/config.rst | 32 +- doc/links.rst | 1 + doc/results/index.rst | 30 +- doc/usage/evaluation.rst | 4 +- pyproject.toml | 14 +- src/ptbench/data/datamodule.py | 72 ++- src/ptbench/data/hivtb/datamodule.py | 42 +- src/ptbench/data/hivtb/fold_0.py | 2 +- src/ptbench/data/hivtb/fold_1.py | 2 +- src/ptbench/data/hivtb/fold_2.py | 2 +- src/ptbench/data/hivtb/fold_3.py | 2 +- src/ptbench/data/hivtb/fold_4.py | 2 +- src/ptbench/data/hivtb/fold_5.py | 2 +- src/ptbench/data/hivtb/fold_6.py | 2 +- src/ptbench/data/hivtb/fold_7.py | 2 +- src/ptbench/data/hivtb/fold_8.py | 2 +- src/ptbench/data/hivtb/fold_9.py | 2 +- src/ptbench/data/indian/datamodule.py | 22 +- src/ptbench/data/indian/fold_0.py | 5 + src/ptbench/data/indian/fold_1.py | 5 + src/ptbench/data/indian/fold_2.py | 5 + src/ptbench/data/indian/fold_3.py | 5 + src/ptbench/data/indian/fold_4.py | 5 + src/ptbench/data/indian/fold_5.py | 5 + src/ptbench/data/indian/fold_6.py | 5 + src/ptbench/data/indian/fold_7.py | 5 + src/ptbench/data/indian/fold_8.py | 5 + src/ptbench/data/indian/fold_9.py | 5 + src/ptbench/data/montgomery/datamodule.py | 44 +- src/ptbench/data/montgomery/fold_0.py | 4 + src/ptbench/data/montgomery/fold_1.py | 4 + src/ptbench/data/montgomery/fold_2.py | 4 + src/ptbench/data/montgomery/fold_3.py | 4 + src/ptbench/data/montgomery/fold_4.py | 4 + src/ptbench/data/montgomery/fold_5.py | 4 + src/ptbench/data/montgomery/fold_6.py | 4 + src/ptbench/data/montgomery/fold_7.py | 4 + src/ptbench/data/montgomery/fold_8.py | 4 + src/ptbench/data/montgomery/fold_9.py | 4 + .../data/montgomery_shenzhen/default.py | 5 + .../data/montgomery_shenzhen/fold_0.py | 5 + .../data/montgomery_shenzhen/fold_1.py | 5 + .../data/montgomery_shenzhen/fold_2.py | 5 + .../data/montgomery_shenzhen/fold_3.py | 5 + .../data/montgomery_shenzhen/fold_4.py | 5 + .../data/montgomery_shenzhen/fold_5.py | 5 + .../data/montgomery_shenzhen/fold_6.py | 5 + .../data/montgomery_shenzhen/fold_7.py | 5 + .../data/montgomery_shenzhen/fold_8.py | 5 + .../data/montgomery_shenzhen/fold_9.py | 5 + .../montgomery_shenzhen_indian/datamodule.py | 3 +- .../montgomery_shenzhen_indian/default.py | 5 + .../data/montgomery_shenzhen_indian/fold_0.py | 6 + .../data/montgomery_shenzhen_indian/fold_1.py | 5 + .../data/montgomery_shenzhen_indian/fold_2.py | 6 + .../data/montgomery_shenzhen_indian/fold_3.py | 6 + .../data/montgomery_shenzhen_indian/fold_4.py | 6 + .../data/montgomery_shenzhen_indian/fold_5.py | 6 + .../data/montgomery_shenzhen_indian/fold_6.py | 6 + .../data/montgomery_shenzhen_indian/fold_7.py | 5 + .../data/montgomery_shenzhen_indian/fold_8.py | 6 + .../data/montgomery_shenzhen_indian/fold_9.py | 5 + .../datamodule.py | 3 +- src/ptbench/data/nih_cxr14/datamodule.py | 36 +- src/ptbench/data/nih_cxr14/default.py | 2 +- .../data/nih_cxr14_padchest/datamodule.py | 3 +- src/ptbench/data/padchest/datamodule.py | 436 +++++++++--------- src/ptbench/data/padchest/idiap.py | 2 +- src/ptbench/data/shenzhen/datamodule.py | 47 +- src/ptbench/data/shenzhen/default.py | 4 +- src/ptbench/data/shenzhen/fold_0.py | 4 + src/ptbench/data/shenzhen/fold_1.py | 4 + src/ptbench/data/shenzhen/fold_2.py | 4 + src/ptbench/data/shenzhen/fold_3.py | 4 + src/ptbench/data/shenzhen/fold_4.py | 4 + src/ptbench/data/shenzhen/fold_5.py | 4 + src/ptbench/data/shenzhen/fold_6.py | 4 + src/ptbench/data/shenzhen/fold_7.py | 4 + src/ptbench/data/shenzhen/fold_8.py | 4 + src/ptbench/data/shenzhen/fold_9.py | 4 + src/ptbench/data/tbpoc/datamodule.py | 49 +- src/ptbench/data/tbpoc/fold_0.py | 2 +- src/ptbench/data/tbpoc/fold_1.py | 2 +- src/ptbench/data/tbpoc/fold_2.py | 2 +- src/ptbench/data/tbpoc/fold_3.py | 2 +- src/ptbench/data/tbpoc/fold_4.py | 2 +- src/ptbench/data/tbpoc/fold_5.py | 2 +- src/ptbench/data/tbpoc/fold_6.py | 2 +- src/ptbench/data/tbpoc/fold_7.py | 2 +- src/ptbench/data/tbpoc/fold_8.py | 2 +- src/ptbench/data/tbpoc/fold_9.py | 2 +- src/ptbench/data/tbx11k/datamodule.py | 129 ++---- src/ptbench/data/tbx11k/v1_healthy_vs_atb.py | 24 + src/ptbench/data/tbx11k/v2_others_vs_atb.py | 24 + src/ptbench/data/typing.py | 17 +- src/ptbench/models/alexnet.py | 10 +- src/ptbench/models/densenet.py | 10 +- src/ptbench/models/normalizer.py | 2 +- src/ptbench/models/pasa.py | 8 +- src/ptbench/scripts/train_analysis.py | 14 +- src/ptbench/utils/checkpointer.py | 12 +- 103 files changed, 804 insertions(+), 652 deletions(-) diff --git a/doc/api.rst b/doc/api.rst index 0114d9ff..41fc340f 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -22,41 +22,49 @@ Auxiliary classes and methods to define raw dataset iterators. .. autosummary:: :toctree: api/data - ptbench.data.sample - ptbench.data.dataset - ptbench.data.utils - ptbench.data.loader - ptbench.data.transforms - ptbench.configs.datasets + ptbench.data.augmentations + ptbench.data.datamodule + ptbench.data.image_utils + ptbench.data.split + ptbench.data.typing +.. _ptbench.api.rawdata: + +Basic Datamodules +----------------- + +Pytorch_ :py:class:`torch.utils.data.DataLoader` access through lightning_ +:py:class:`lightning.pytorch.core.LightningDataModule`. + +.. autosummary:: + :toctree: api/data/raw + + ptbench.data.hivtb.datamodule + ptbench.data.indian.datamodule + ptbench.data.montgomery.datamodule + ptbench.data.nih_cxr14.datamodule + ptbench.data.padchest.datamodule + ptbench.data.shenzhen.datamodule + ptbench.data.tbpoc.datamodule + ptbench.data.tbx11k.datamodule -.. _ptbench.api.data.raw: -Raw Dataset Access ------------------- +.. _ptbench.api.remixdata: -Direct data-access through iterators. +Remixed Datamodules +------------------- + +Pytorch_ :py:class:`torch.utils.data.DataLoader` access through lightning_ +:py:class:`lightning.pytorch.core.LightningDataModule`. .. autosummary:: :toctree: api/data/raw - ptbench.data.hivtb_RS - ptbench.data.tbpoc - ptbench.data.montgomery_RS - ptbench.data.padchest - ptbench.data.hivtb - ptbench.data.indian_RS - ptbench.data.shenzhen_RS - ptbench.data.tbpoc_RS - ptbench.data.shenzhen - ptbench.data.montgomery - ptbench.data.indian - ptbench.data.nih_cxr14_re - ptbench.data.padchest_RS - ptbench.data.tbx11k_simplified - ptbench.data.tbx11k_simplified_RS - ptbench.data.tbx11k_simplified_v2 - ptbench.data.tbx11k_simplified_v2_RS + ptbench.data.montgomery_shenzhen.datamodule + ptbench.data.montgomery_shenzhen_indian.datamodule + ptbench.data.montgomery_shenzhen_indian_tbx11k.datamodule + ptbench.data.montgomery_shenzhen_indian_padchest.datamodule + ptbench.data.nih_cxr14_padchest.datamodule .. _ptbench.api.models: @@ -69,12 +77,11 @@ CNN and other models implemented. .. autosummary:: :toctree: api/models + ptbench.models.pasa ptbench.models.alexnet ptbench.models.densenet - ptbench.models.densenet_rs - ptbench.models.logistic_regression ptbench.models.normalizer - ptbench.models.pasa + ptbench.models.logistic_regression ptbench.models.signs_to_tb @@ -88,11 +95,14 @@ Functions to actuate on the data. .. autosummary:: :toctree: api/engine + ptbench.engine.device + ptbench.engine.callbacks ptbench.engine.trainer ptbench.engine.predictor ptbench.engine.evaluator + .. _ptbench.api.utils: Various utilities @@ -106,6 +116,7 @@ Reusable auxiliary functions. ptbench.utils.checkpointer ptbench.utils.download ptbench.utils.grad_cams + ptbench.utils.image ptbench.utils.measure ptbench.utils.model_serialization ptbench.utils.model_zoo @@ -114,6 +125,7 @@ Reusable auxiliary functions. ptbench.utils.resources ptbench.utils.summary ptbench.utils.table + ptbench.utils.tensorboard .. include:: links.rst diff --git a/doc/conf.py b/doc/conf.py index 49b7ceac..b69ef1b9 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -120,6 +120,7 @@ auto_intersphinx_packages = [ "pandas", "pillow", "psutil", + "scipy", "torch", "torchvision", "lightning", diff --git a/doc/config.rst b/doc/config.rst index b0774e35..34025f7f 100644 --- a/doc/config.rst +++ b/doc/config.rst @@ -11,24 +11,26 @@ This module contains preset configurations for baseline CNN architectures and datamodules. +.. _ptbench.config.models: + Models ====== .. autosummary:: - :toctree: api/configs/models + :toctree: api/models/config :template: config.rst - ptbench.configs.models.alexnet - ptbench.configs.models.alexnet_pretrained - ptbench.configs.models.densenet - ptbench.configs.models.densenet_pretrained - ptbench.configs.models.densenet_rs - ptbench.configs.models.logistic_regression - ptbench.configs.models.pasa - ptbench.configs.models.signs_to_tb + ptbench.models.config.alexnet + ptbench.models.config.alexnet_pretrained + ptbench.models.config.densenet + ptbench.models.config.densenet_pretrained + ptbench.models.config.densenet_rs + ptbench.models.config.logistic_regression + ptbench.models.config.pasa + ptbench.models.config.signs_to_tb -.. _ptbench.configs.datamodules: +.. _ptbench.config.datamodules: Preset Datamodules ================== @@ -39,7 +41,7 @@ input standard, if applicable. Use these datamodules for training and evaluatin your models. .. autosummary:: - :toctree: api/configs/datamodules + :toctree: api/data/config :template: config.rst ptbench.data.indian.default @@ -51,7 +53,7 @@ your models. ptbench.data.tbx11k.v2_others_vs_atb -.. _ptbench.configs.datamodules.remix: +.. _ptbench.config.datamodules.remix: Remix Datamodules ================= @@ -59,7 +61,7 @@ Remix Datamodules We provide some aggregated datamodules to facilitate cross-database development. .. autosummary:: - :toctree: api/configs/datamodules + :toctree: api/data/config :template: config.rst ptbench.data.montgomery_shenzhen.default @@ -70,7 +72,7 @@ We provide some aggregated datamodules to facilitate cross-database development. ptbench.data.nih_cxr14_padchest.idiap -.. _ptbench.configs.datamodules.folds: +.. _ptbench.config.datamodules.folds: Cross-validation Datamodules ============================ @@ -82,7 +84,7 @@ datamodules. Nine other folds are available for every configuration (from 1 to .. autosummary:: - :toctree: api/configs/datamodules + :toctree: api/data/config :template: config.rst ptbench.data.hivtb.fold_0 diff --git a/doc/links.rst b/doc/links.rst index 92ecafd5..d773d1ae 100644 --- a/doc/links.rst +++ b/doc/links.rst @@ -11,6 +11,7 @@ .. _pip: https://pip.pypa.io/en/stable/ .. _mamba: https://mamba.readthedocs.io/en/latest/index.html .. _pytorch: https://pytorch.org +.. _lightning: https://lightning.ai .. Raw data websites .. _montgomery: https://lhncbc.nlm.nih.gov/publication/pub9931 diff --git a/doc/results/index.rst b/doc/results/index.rst index 29ad8a10..0217fe6c 100644 --- a/doc/results/index.rst +++ b/doc/results/index.rst @@ -113,37 +113,37 @@ Thresholds used: :scale: 50% :alt: Testing sets ROC curves for Pasa model trained on normalized-kfold MC - :py:mod:`Pasa <ptbench.configs.models.pasa>`: Pasa trained on normalized-kfold MC + :py:mod:`Pasa <ptbench.models.config.pasa>`: Pasa trained on normalized-kfold MC - .. figure:: img/compare_pasa_mc_ch_kfold_500.jpg :align: center :scale: 50% :alt: Testing sets ROC curves for Pasa model trained on normalized-kfold MC-CH - :py:mod:`Pasa <ptbench.configs.models.pasa>`: Pasa trained on normalized-kfold MC-CH + :py:mod:`Pasa <ptbench.models.config.pasa>`: Pasa trained on normalized-kfold MC-CH - .. figure:: img/compare_pasa_mc_ch_in_kfold_500.jpg :align: center :scale: 50% :alt: Testing sets ROC curves for Pasa model trained on normalized-kfold MC-CH-IN - :py:mod:`Pasa <ptbench.configs.models.pasa>`: Pasa trained on normalized-kfold MC-CH-IN + :py:mod:`Pasa <ptbench.models.config.pasa>`: Pasa trained on normalized-kfold MC-CH-IN * - .. figure:: img/compare_densenet_mc_kfold_2000.jpg :align: center :scale: 50% :alt: Testing sets ROC curves for DenseNet model trained on normalized-kfold MC - :py:mod:`DenseNet <ptbench.configs.models.densenet>`: DenseNet trained on normalized-kfold MC + :py:mod:`DenseNet <ptbench.models.config.densenet>`: DenseNet trained on normalized-kfold MC - .. figure:: img/compare_densenet_mc_ch_kfold_2000.jpg :align: center :scale: 50% :alt: Testing sets ROC curves for DenseNet model trained on normalized-kfold MC-CH - :py:mod:`DenseNet <ptbench.configs.models.densenet>`: DenseNet trained on normalized-kfold MC-CH + :py:mod:`DenseNet <ptbench.models.config.densenet>`: DenseNet trained on normalized-kfold MC-CH - .. figure:: img/compare_densenet_mc_ch_in_kfold_2000.jpg :align: center :scale: 50% :alt: Testing sets ROC curves for DenseNet model trained on normalized-kfold MC-CH-IN - :py:mod:`DenseNet <ptbench.configs.models.densenet>`: DenseNet trained on normalized-kfold MC-CH-IN + :py:mod:`DenseNet <ptbench.models.config.densenet>`: DenseNet trained on normalized-kfold MC-CH-IN DenseNet-121 (pretrained on ImageNet) """"""""""""""""""""""""""""""""""""" @@ -180,19 +180,19 @@ Thresholds used: :scale: 50% :alt: Testing sets ROC curves for DenseNet model trained on normalized-kfold MC - :py:mod:`DenseNet <ptbench.configs.models.densenet>` DenseNet trained on normalized-kfold MC + :py:mod:`DenseNet <ptbench.models.config.densenet>` DenseNet trained on normalized-kfold MC - .. figure:: img/compare_densenetpreIN_mc_ch_kfold_600.jpg :align: center :scale: 50% :alt: Testing sets ROC curves for DenseNet model trained on normalized-kfold MC-CH - :py:mod:`DenseNet <ptbench.configs.models.densenet>` DenseNet trained on normalized-kfold MC-CH + :py:mod:`DenseNet <ptbench.models.config.densenet>` DenseNet trained on normalized-kfold MC-CH - .. figure:: img/compare_densenetpreIN_mc_ch_ch_kfold_600.jpg :align: center :scale: 50% :alt: Testing sets ROC curves for DenseNet model trained on normalized-kfold MC-CH-IN - :py:mod:`DenseNet <ptbench.configs.models.densenet>` DenseNet trained on normalized-kfold MC-CH-IN + :py:mod:`DenseNet <ptbench.models.config.densenet>` DenseNet trained on normalized-kfold MC-CH-IN Logistic Regression Classifier """""""""""""""""""""""""""""" @@ -229,19 +229,19 @@ Thresholds used: :scale: 50% :alt: Testing sets ROC curves for LogReg model trained on normalized-kfold MC - :py:mod:`LogReg <ptbench.configs.models.logistic_regression>`: LogReg trained on normalized-kfold MC + :py:mod:`LogReg <ptbench.models.config.logistic_regression>`: LogReg trained on normalized-kfold MC - .. figure:: img/compare_logreg_mc_ch_kfold_100.jpg :align: center :scale: 50% :alt: Testing sets ROC curves for LogReg model trained on normalized-kfold MC-CH - :py:mod:`LogReg <ptbench.configs.models.logistic_regression>`: LogReg trained on normalized-kfold MC-CH + :py:mod:`LogReg <ptbench.models.config.logistic_regression>`: LogReg trained on normalized-kfold MC-CH - .. figure:: img/compare_logreg_mc_ch_in_kfold_100.jpg :align: center :scale: 50% :alt: Testing sets ROC curves for LogReg model trained on normalized-kfold MC-CH-IN - :py:mod:`LogReg <ptbench.configs.models.logistic_regression>`: LogReg trained on normalized-kfold MC-CH-IN + :py:mod:`LogReg <ptbench.models.config.logistic_regression>`: LogReg trained on normalized-kfold MC-CH-IN DenseNet-121 (pretrained on ImageNet and NIH CXR14) """"""""""""""""""""""""""""""""""""""""""""""""""" @@ -278,19 +278,19 @@ Thresholds used: :scale: 50% :alt: Testing sets ROC curves for DenseNet model trained on normalized-kfold MC (pretrained on NIH) - :py:mod:`DenseNet <ptbench.configs.models.densenet>`: DenseNet trained on normalized-kfold MC (pretrained on NIH) + :py:mod:`DenseNet <ptbench.models.config.densenet>`: DenseNet trained on normalized-kfold MC (pretrained on NIH) - .. figure:: img/compare_densenetpre_mc_ch_kfold_300.jpg :align: center :scale: 50% :alt: Testing sets ROC curves for DenseNet model trained on normalized-kfold MC-CH (pretrained on NIH) - :py:mod:`DenseNet <ptbench.configs.models.densenet>`: DenseNet trained on normalized-kfold MC-CH (pretrained on NIH) + :py:mod:`DenseNet <ptbench.models.config.densenet>`: DenseNet trained on normalized-kfold MC-CH (pretrained on NIH) - .. figure:: img/compare_densenetpre_mc_ch_in_kfold_300.jpg :align: center :scale: 50% :alt: Testing sets ROC curves for DenseNet model trained on normalized-kfold MC-CH-IN (pretrained on NIH) - :py:mod:`DenseNet <ptbench.configs.models.densenet>`: DenseNet trained on normalized-kfold MC-CH-IN (pretrained on NIH) + :py:mod:`DenseNet <ptbench.models.config.densenet>`: DenseNet trained on normalized-kfold MC-CH-IN (pretrained on NIH) Global sensitivity analysis (relevance) diff --git a/doc/usage/evaluation.rst b/doc/usage/evaluation.rst index d8b15040..8d98ca92 100644 --- a/doc/usage/evaluation.rst +++ b/doc/usage/evaluation.rst @@ -35,8 +35,8 @@ the pre-trained model. .. tip:: An option to generate grad-CAMs is available for the :py:mod:`DensenetRS - <ptbench.configs.models_datasets.densenet_rs>` model. To activate it, use - the ``--grad-cams`` argument. + <ptbench.models.config.densenet_rs>` model. To activate it, use the + ``--grad-cams`` argument. .. tip:: diff --git a/pyproject.toml b/pyproject.toml index 56544f73..67bd370c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -70,13 +70,13 @@ ptbench = "ptbench.scripts.cli:cli" [project.entry-points."ptbench.config"] # models -pasa = "ptbench.configs.models.pasa" -signs-to-tb = "ptbench.configs.models.signs_to_tb" -logistic-regression = "ptbench.configs.models.logistic_regression" -alexnet = "ptbench.configs.models.alexnet" -alexnet-pretrained = "ptbench.configs.models.alexnet_pretrained" -densenet = "ptbench.configs.models.densenet" -densenet-pretrained = "ptbench.configs.models.densenet_pretrained" +pasa = "ptbench.models.config.pasa" +signs-to-tb = "ptbench.models.config.signs_to_tb" +logistic-regression = "ptbench.models.config.logistic_regression" +alexnet = "ptbench.models.config.alexnet" +alexnet-pretrained = "ptbench.models.config.alexnet_pretrained" +densenet = "ptbench.models.config.densenet" +densenet-pretrained = "ptbench.models.config.densenet_pretrained" # montgomery dataset (and cross-validation folds) montgomery = "ptbench.data.montgomery.default" diff --git a/src/ptbench/data/datamodule.py b/src/ptbench/data/datamodule.py index 0785e233..0ab3c36c 100644 --- a/src/ptbench/data/datamodule.py +++ b/src/ptbench/data/datamodule.py @@ -36,15 +36,12 @@ def _sample_size_bytes(s: Sample) -> int: Parameters ---------- - s The sample to be analyzed Returns ------- - - size The size in bytes occupied by this sample """ @@ -74,7 +71,6 @@ class _DelayedLoadingDataset(Dataset): Parameters ---------- - raw_dataset An iterable containing the raw dataset samples representing one of the database split datasets. @@ -132,7 +128,6 @@ def _apply_loader_and_transforms( Parameters ---------- - info The sample information, as loaded from its raw dataset dictionary @@ -147,8 +142,6 @@ def _apply_loader_and_transforms( Returns ------- - - sample The loaded and transformed sample. """ sample = load(info) @@ -165,7 +158,6 @@ class _CachedDataset(Dataset): Parameters ---------- - raw_dataset An iterable containing the raw dataset samples representing one of the database split datasets. @@ -243,7 +235,6 @@ class _ConcatDataset(Dataset): Parameters ---------- - datasets An iterable over pre-instantiated datasets. """ @@ -323,8 +314,8 @@ def _make_balanced_random_sampler( Dataset 2 proportionally less likely. This function assumes targets are stored on a dictionary entry named - ``target`` inside the metadata information for the :py:type:``Sample``, and - that its value is integer. + ``target`` inside the metadata information for the + :py:data:`.typing.Sample`, and that its value is integer. We then instantiate a pytorch sampler using the inverse probabilities (the more samples of a class, the less likely it becomes to be sampled. @@ -332,7 +323,6 @@ def _make_balanced_random_sampler( Parameters ---------- - dataset An instance of torch Dataset. :py:class:`torch.utils.data.ConcatDataset` are supported. @@ -344,15 +334,12 @@ def _make_balanced_random_sampler( Returns ------- - - sampler A sampler, to be used in a dataloader equipped with the same dataset used to calculate the relative sample weights. Raises ------ - RuntimeError If requested to balance a dataset (single, not-concatenated) without an existing target. @@ -434,23 +421,22 @@ class ConcatDataModule(lightning.LightningDataModule): Parameters ---------- - splits A dictionary that contains string keys representing dataset names, and values that are iterables over a 2-tuple containing an iterable over arbitrary, user-configurable sample representations (potentially on - disk or permanent storage), and :py:class:`RawDataLoader` (or "sample") - loader objects, which concretely implement a mechanism to load such - samples in memory, from permanent storage. + disk or permanent storage), and :py:class:`.typing.RawDataLoader` (or + "sample") loader objects, which concretely implement a mechanism to + load such samples in memory, from permanent storage. Sample representations on permanent storage may be of any iterable format (e.g. list, dictionary, etc.), for as long as the assigned - :py:class:`RawDataLoader` can properly handle it. + :py:class:`.typing.RawDataLoader` can properly handle it. .. tip:: To check the split and the loader function works correctly, you may - use :py:func:`..dataset.check_database_split_loading`. + use :py:func:`.split.check_database_split_loading`. This class expects at least one entry called ``train`` to exist in the input dictionary. Optional entries are ``validation``, and ``test``. @@ -507,24 +493,24 @@ class ConcatDataModule(lightning.LightningDataModule): multiprocessing data loading. Set to 0 to enable as many data loading instances as processing cores as available in the system. Set to >= 1 to enable that many multiprocessing instances for data loading. - - - Attributes - ---------- - - model_transforms - A list of transforms (torch modules) that will be applied after - raw-data-loading, and just before data is fed into the model or - eventual data-augmentation transformations for all data loaders - produced by this data module. This part of the pipeline receives data - as output by the raw-data-loader, or model-related transforms (e.g. - resize adaptions), if any is specified. If data is cached, it is - cached **after** model-transforms are applied, as that is a potential - memory saver (e.g., if it contains a resizing operation to smaller - images). """ - DatasetDictionary = dict[str, Dataset] + DatasetDictionary: typing.TypeAlias = dict[str, Dataset] + """A dictionary of datasets mapping names to actual datasets.""" + + model_transforms: list[Transform] | None + """Transforms required to fit data into the model. + + A list of transforms (torch modules) that will be applied after raw- + data-loading. and just before data is fed into the model or eventual + data-augmentation transformations for all data loaders produced by + this data module. This part of the pipeline receives data as output + by the raw-data-loader, or model-related transforms (e.g. resize + adaptions), if any is specified. If data is cached, it is cached + **after** model-transforms are applied, as that is a potential + memory saver (e.g., if it contains a resizing operation to smaller + images). + """ def __init__( self, @@ -653,7 +639,6 @@ class ConcatDataModule(lightning.LightningDataModule): Parameters ---------- - batch_size Number of samples in every **training** batch (this parameter affects memory requirements for the network). If the number of samples in the @@ -696,7 +681,6 @@ class ConcatDataModule(lightning.LightningDataModule): Parameters ---------- - name Name of the dataset to setup. """ @@ -769,7 +753,6 @@ class ConcatDataModule(lightning.LightningDataModule): Parameters ---------- - stage Name of the stage to which the setup is applicable. Can be one of ``fit``, ``validate``, ``test`` or ``predict``. Each stage @@ -808,7 +791,6 @@ class ConcatDataModule(lightning.LightningDataModule): Parameters ---------- - stage Name of the stage to which the teardown is applicable. Can be one of ``fit``, ``validate``, ``test`` or ``predict``. Each stage @@ -903,19 +885,19 @@ class CachingDataModule(ConcatDataModule): Parameters ---------- - database_split A dictionary that contains string keys representing dataset names, and values that are iterables over sample representations (potentially on - disk). These objects are passed to an unique :py:class:`RawDataLoader` - for loading the :py:class:`Sample` data (and metadata) in memory. It + disk). These objects are passed to an unique + :py:class:`.typing.RawDataLoader` for loading the + :py:data:`.typing.Sample` data (and metadata) in memory. It therefore assumes the whole split is homogeneous and can be loaded in the same way. .. tip:: To check the split and the loader function works correctly, you may - use :py:func:`..dataset.check_database_split_loading`. + use :py:func:`.split.check_database_split_loading`. This class expects at least one entry called ``train`` to exist in the input dictionary. Optional entries are ``validation``, and ``test``. diff --git a/src/ptbench/data/hivtb/datamodule.py b/src/ptbench/data/hivtb/datamodule.py index b5b84ec4..2f1dcb85 100644 --- a/src/ptbench/data/hivtb/datamodule.py +++ b/src/ptbench/data/hivtb/datamodule.py @@ -19,17 +19,11 @@ from ..typing import Sample class RawDataLoader(_BaseRawDataLoader): - """A specialized raw-data-loader for the HIV-TB dataset. - - Attributes - ---------- - - datadir - This variable contains the base directory where the database raw data - is stored. - """ + """A specialized raw-data-loader for the HIV-TB dataset.""" datadir: str + """This variable contains the base directory where the database raw data is + stored.""" def __init__(self): self.datadir = load_rc().get( @@ -41,8 +35,7 @@ class RawDataLoader(_BaseRawDataLoader): Parameters ---------- - - sample: + sample A tuple containing the path suffix, within the dataset root folder, where to find the image to be loaded, and an integer, representing the sample label. @@ -50,8 +43,6 @@ class RawDataLoader(_BaseRawDataLoader): Returns ------- - - sample The sample representation """ image = PIL.Image.open(os.path.join(self.datadir, sample[0])).convert( @@ -73,8 +64,7 @@ class RawDataLoader(_BaseRawDataLoader): Parameters ---------- - - sample: + sample A tuple containing the path suffix, within the dataset root folder, where to find the image to be loaded, and an integer, representing the sample label. @@ -82,8 +72,6 @@ class RawDataLoader(_BaseRawDataLoader): Returns ------- - - label The integer label associated with the sample """ return sample[1] @@ -108,23 +96,23 @@ class DataModule(CachingDataModule): * Raw data input (on disk): - * BMP (BMP3) and JPEG grayscale images encoded as 8-bit RGB, with - varying resolution + * BMP (BMP3) and JPEG grayscale images encoded as 8-bit RGB, with + varying resolution * Output image: - * Transforms: + * Transforms: - * Load raw BMP or JPEG with :py:mod:`PIL` - * Remove black borders - * Convert to torch tensor - * Torch center cropping to get square image + * Load raw BMP or JPEG with :py:mod:`PIL` + * Remove black borders + * Convert to torch tensor + * Torch center cropping to get square image * Final specifications - * Grayscale, encoded as a single plane tensor, 32-bit floats, - square at 2048 x 2048 pixels - * Labels: 0 (healthy), 1 (active tuberculosis) + * Grayscale, encoded as a single plane tensor, 32-bit floats, + square at 2048 x 2048 pixels + * Labels: 0 (healthy), 1 (active tuberculosis) """ def __init__(self, split_filename: str): diff --git a/src/ptbench/data/hivtb/fold_0.py b/src/ptbench/data/hivtb/fold_0.py index 57d77952..6919907d 100644 --- a/src/ptbench/data/hivtb/fold_0.py +++ b/src/ptbench/data/hivtb/fold_0.py @@ -7,5 +7,5 @@ from .datamodule import DataModule datamodule = DataModule("fold-0.json") """HIV-TB dataset for TB detection (cross validation fold 0). -See :py:class:`DataModule` for technical details. +See :py:class:`.hivtb.datamodule.DataModule` for technical details. """ diff --git a/src/ptbench/data/hivtb/fold_1.py b/src/ptbench/data/hivtb/fold_1.py index c91a968f..1f4b1dd0 100644 --- a/src/ptbench/data/hivtb/fold_1.py +++ b/src/ptbench/data/hivtb/fold_1.py @@ -7,5 +7,5 @@ from .datamodule import DataModule datamodule = DataModule("fold-1.json") """HIV-TB dataset for TB detection (cross validation fold 1). -See :py:class:`DataModule` for technical details. +See :py:class:`.hivtb.datamodule.DataModule` for technical details. """ diff --git a/src/ptbench/data/hivtb/fold_2.py b/src/ptbench/data/hivtb/fold_2.py index 323e80a0..9c9fbe65 100644 --- a/src/ptbench/data/hivtb/fold_2.py +++ b/src/ptbench/data/hivtb/fold_2.py @@ -7,5 +7,5 @@ from .datamodule import DataModule datamodule = DataModule("fold-2.json") """HIV-TB dataset for TB detection (cross validation fold 2). -See :py:class:`DataModule` for technical details. +See :py:class:`.hivtb.datamodule.DataModule` for technical details. """ diff --git a/src/ptbench/data/hivtb/fold_3.py b/src/ptbench/data/hivtb/fold_3.py index 1eed4c05..ef07c591 100644 --- a/src/ptbench/data/hivtb/fold_3.py +++ b/src/ptbench/data/hivtb/fold_3.py @@ -7,5 +7,5 @@ from .datamodule import DataModule datamodule = DataModule("fold-3.json") """HIV-TB dataset for TB detection (cross validation fold 3). -See :py:class:`DataModule` for technical details. +See :py:class:`.hivtb.datamodule.DataModule` for technical details. """ diff --git a/src/ptbench/data/hivtb/fold_4.py b/src/ptbench/data/hivtb/fold_4.py index 9cfa6186..6683006d 100644 --- a/src/ptbench/data/hivtb/fold_4.py +++ b/src/ptbench/data/hivtb/fold_4.py @@ -7,5 +7,5 @@ from .datamodule import DataModule datamodule = DataModule("fold-4.json") """HIV-TB dataset for TB detection (cross validation fold 4). -See :py:class:`DataModule` for technical details. +See :py:class:`.hivtb.datamodule.DataModule` for technical details. """ diff --git a/src/ptbench/data/hivtb/fold_5.py b/src/ptbench/data/hivtb/fold_5.py index 591fef37..cf67833a 100644 --- a/src/ptbench/data/hivtb/fold_5.py +++ b/src/ptbench/data/hivtb/fold_5.py @@ -7,5 +7,5 @@ from .datamodule import DataModule datamodule = DataModule("fold-5.json") """HIV-TB dataset for TB detection (cross validation fold 5). -See :py:class:`DataModule` for technical details. +See :py:class:`.hivtb.datamodule.DataModule` for technical details. """ diff --git a/src/ptbench/data/hivtb/fold_6.py b/src/ptbench/data/hivtb/fold_6.py index fb5e1614..94614fc4 100644 --- a/src/ptbench/data/hivtb/fold_6.py +++ b/src/ptbench/data/hivtb/fold_6.py @@ -7,5 +7,5 @@ from .datamodule import DataModule datamodule = DataModule("fold-6.json") """HIV-TB dataset for TB detection (cross validation fold 6). -See :py:class:`DataModule` for technical details. +See :py:class:`.hivtb.datamodule.DataModule` for technical details. """ diff --git a/src/ptbench/data/hivtb/fold_7.py b/src/ptbench/data/hivtb/fold_7.py index d64db483..259446c8 100644 --- a/src/ptbench/data/hivtb/fold_7.py +++ b/src/ptbench/data/hivtb/fold_7.py @@ -7,5 +7,5 @@ from .datamodule import DataModule datamodule = DataModule("fold-7.json") """HIV-TB dataset for TB detection (cross validation fold 7). -See :py:class:`DataModule` for technical details. +See :py:class:`.hivtb.datamodule.DataModule` for technical details. """ diff --git a/src/ptbench/data/hivtb/fold_8.py b/src/ptbench/data/hivtb/fold_8.py index 8a0f87d1..d243db08 100644 --- a/src/ptbench/data/hivtb/fold_8.py +++ b/src/ptbench/data/hivtb/fold_8.py @@ -7,5 +7,5 @@ from .datamodule import DataModule datamodule = DataModule("fold-8.json") """HIV-TB dataset for TB detection (cross validation fold 8). -See :py:class:`DataModule` for technical details. +See :py:class:`.hivtb.datamodule.DataModule` for technical details. """ diff --git a/src/ptbench/data/hivtb/fold_9.py b/src/ptbench/data/hivtb/fold_9.py index d92de50e..340bc661 100644 --- a/src/ptbench/data/hivtb/fold_9.py +++ b/src/ptbench/data/hivtb/fold_9.py @@ -7,5 +7,5 @@ from .datamodule import DataModule datamodule = DataModule("fold-9.json") """HIV-TB dataset for TB detection (cross validation fold 9). -See :py:class:`DataModule` for technical details. +See :py:class:`.hivtb.datamodule.DataModule` for technical details. """ diff --git a/src/ptbench/data/indian/datamodule.py b/src/ptbench/data/indian/datamodule.py index f6017cad..ee53da80 100644 --- a/src/ptbench/data/indian/datamodule.py +++ b/src/ptbench/data/indian/datamodule.py @@ -33,23 +33,23 @@ class DataModule(CachingDataModule): * Raw data input (on disk): - * PNG RGB 8-bit depth images with "inverted" grayscale scale - * Variable width and height + * PNG RGB 8-bit depth images with "inverted" grayscale scale + * Variable width and height * Output image: - * Transforms: + * Transforms: - * Load raw PNG with :py:mod:`PIL` - * Remove black borders - * Convert to torch tensor - * Torch center cropping to get square image + * Load raw PNG with :py:mod:`PIL` + * Remove black borders + * Convert to torch tensor + * Torch center cropping to get square image - * Final specifications: + * Final specifications: - * Grayscale, encoded as a single plane tensor, 32-bit floats, - square, with varying resolutions, depending on the input raw image - * Labels: 0 (healthy), 1 (active tuberculosis) + * Grayscale, encoded as a single plane tensor, 32-bit floats, + square, with varying resolutions, depending on the input raw image + * Labels: 0 (healthy), 1 (active tuberculosis) """ def __init__(self, split_filename: str): diff --git a/src/ptbench/data/indian/fold_0.py b/src/ptbench/data/indian/fold_0.py index 91711d38..635b542f 100644 --- a/src/ptbench/data/indian/fold_0.py +++ b/src/ptbench/data/indian/fold_0.py @@ -5,3 +5,8 @@ from .datamodule import DataModule datamodule = DataModule("fold-0.json") +"""Indian collection dataset for computer-aided diagnosis (cross validation +fold 0). + +See :py:class:`.indian.datamodule.DataModule` for technical details. +""" diff --git a/src/ptbench/data/indian/fold_1.py b/src/ptbench/data/indian/fold_1.py index 17372695..5a3a0213 100644 --- a/src/ptbench/data/indian/fold_1.py +++ b/src/ptbench/data/indian/fold_1.py @@ -5,3 +5,8 @@ from .datamodule import DataModule datamodule = DataModule("fold-1.json") +"""Indian collection dataset for computer-aided diagnosis (cross validation +fold 1). + +See :py:class:`.indian.datamodule.DataModule` for technical details. +""" diff --git a/src/ptbench/data/indian/fold_2.py b/src/ptbench/data/indian/fold_2.py index 91346efa..cbf1aee4 100644 --- a/src/ptbench/data/indian/fold_2.py +++ b/src/ptbench/data/indian/fold_2.py @@ -5,3 +5,8 @@ from .datamodule import DataModule datamodule = DataModule("fold-2.json") +"""Indian collection dataset for computer-aided diagnosis (cross validation +fold 2). + +See :py:class:`.indian.datamodule.DataModule` for technical details. +""" diff --git a/src/ptbench/data/indian/fold_3.py b/src/ptbench/data/indian/fold_3.py index 4156c3ee..369c9528 100644 --- a/src/ptbench/data/indian/fold_3.py +++ b/src/ptbench/data/indian/fold_3.py @@ -5,3 +5,8 @@ from .datamodule import DataModule datamodule = DataModule("fold-3.json") +"""Indian collection dataset for computer-aided diagnosis (cross validation +fold 3). + +See :py:class:`.indian.datamodule.DataModule` for technical details. +""" diff --git a/src/ptbench/data/indian/fold_4.py b/src/ptbench/data/indian/fold_4.py index 6d5b73d1..e9137b65 100644 --- a/src/ptbench/data/indian/fold_4.py +++ b/src/ptbench/data/indian/fold_4.py @@ -5,3 +5,8 @@ from .datamodule import DataModule datamodule = DataModule("fold-4.json") +"""Indian collection dataset for computer-aided diagnosis (cross validation +fold 4). + +See :py:class:`.indian.datamodule.DataModule` for technical details. +""" diff --git a/src/ptbench/data/indian/fold_5.py b/src/ptbench/data/indian/fold_5.py index 27a7442d..d6f34d69 100644 --- a/src/ptbench/data/indian/fold_5.py +++ b/src/ptbench/data/indian/fold_5.py @@ -5,3 +5,8 @@ from .datamodule import DataModule datamodule = DataModule("fold-5.json") +"""Indian collection dataset for computer-aided diagnosis (cross validation +fold 5). + +See :py:class:`.indian.datamodule.DataModule` for technical details. +""" diff --git a/src/ptbench/data/indian/fold_6.py b/src/ptbench/data/indian/fold_6.py index 1270cd58..a293530d 100644 --- a/src/ptbench/data/indian/fold_6.py +++ b/src/ptbench/data/indian/fold_6.py @@ -5,3 +5,8 @@ from .datamodule import DataModule datamodule = DataModule("fold-6.json") +"""Indian collection dataset for computer-aided diagnosis (cross validation +fold 6). + +See :py:class:`.indian.datamodule.DataModule` for technical details. +""" diff --git a/src/ptbench/data/indian/fold_7.py b/src/ptbench/data/indian/fold_7.py index e7b5a1c3..22ea3439 100644 --- a/src/ptbench/data/indian/fold_7.py +++ b/src/ptbench/data/indian/fold_7.py @@ -5,3 +5,8 @@ from .datamodule import DataModule datamodule = DataModule("fold-7.json") +"""Indian collection dataset for computer-aided diagnosis (cross validation +fold 7). + +See :py:class:`.indian.datamodule.DataModule` for technical details. +""" diff --git a/src/ptbench/data/indian/fold_8.py b/src/ptbench/data/indian/fold_8.py index 1abbe7b2..77cf20ee 100644 --- a/src/ptbench/data/indian/fold_8.py +++ b/src/ptbench/data/indian/fold_8.py @@ -5,3 +5,8 @@ from .datamodule import DataModule datamodule = DataModule("fold-8.json") +"""Indian collection dataset for computer-aided diagnosis (cross validation +fold 8). + +See :py:class:`.indian.datamodule.DataModule` for technical details. +""" diff --git a/src/ptbench/data/indian/fold_9.py b/src/ptbench/data/indian/fold_9.py index 71de470d..a0f881bc 100644 --- a/src/ptbench/data/indian/fold_9.py +++ b/src/ptbench/data/indian/fold_9.py @@ -5,3 +5,8 @@ from .datamodule import DataModule datamodule = DataModule("fold-9.json") +"""Indian collection dataset for computer-aided diagnosis (cross validation +fold 9). + +See :py:class:`.indian.datamodule.DataModule` for technical details. +""" diff --git a/src/ptbench/data/montgomery/datamodule.py b/src/ptbench/data/montgomery/datamodule.py index 12ead6e8..f3ac8ffc 100644 --- a/src/ptbench/data/montgomery/datamodule.py +++ b/src/ptbench/data/montgomery/datamodule.py @@ -19,17 +19,11 @@ from ..typing import Sample class RawDataLoader(_BaseRawDataLoader): - """A specialized raw-data-loader for the Montgomery dataset. - - Attributes - ---------- - - datadir - This variable contains the base directory where the database raw data - is stored. - """ + """A specialized raw-data-loader for the Montgomery dataset.""" datadir: str + """This variable contains the base directory where the database raw data is + stored.""" def __init__(self): self.datadir = load_rc().get( @@ -41,8 +35,7 @@ class RawDataLoader(_BaseRawDataLoader): Parameters ---------- - - sample: + sample A tuple containing the path suffix, within the dataset root folder, where to find the image to be loaded, and an integer, representing the sample label. @@ -50,8 +43,6 @@ class RawDataLoader(_BaseRawDataLoader): Returns ------- - - sample The sample representation """ # N.B.: Montgomery images are encoded as grayscale PNGs, so no need to @@ -73,8 +64,7 @@ class RawDataLoader(_BaseRawDataLoader): Parameters ---------- - - sample: + sample A tuple containing the path suffix, within the dataset root folder, where to find the image to be loaded, and an integer, representing the sample label. @@ -82,8 +72,6 @@ class RawDataLoader(_BaseRawDataLoader): Returns ------- - - label The integer label associated with the sample """ return sample[1] @@ -111,23 +99,23 @@ class DataModule(CachingDataModule): * Raw data input (on disk): - * PNG images 8 bit grayscale - * resolution: fixed to one of the cases above + * PNG images 8 bit grayscale + * resolution: fixed to one of the cases above * Output image: - * Transforms: + * Transforms: - * Load raw PNG with :py:mod:`PIL` - * Remove black borders - * Convert to torch tensor - * Torch center cropping to get square image + * Load raw PNG with :py:mod:`PIL` + * Remove black borders + * Convert to torch tensor + * Torch center cropping to get square image - * Final specifications + * Final specifications - * Grayscale, encoded as a single plane tensor, 32-bit floats, - square at 4020 x 4020 pixels - * Labels: 0 (healthy), 1 (active tuberculosis) + * Grayscale, encoded as a single plane tensor, 32-bit floats, + square at 4020 x 4020 pixels + * Labels: 0 (healthy), 1 (active tuberculosis) """ def __init__(self, split_filename: str): diff --git a/src/ptbench/data/montgomery/fold_0.py b/src/ptbench/data/montgomery/fold_0.py index 91711d38..a271fa0f 100644 --- a/src/ptbench/data/montgomery/fold_0.py +++ b/src/ptbench/data/montgomery/fold_0.py @@ -5,3 +5,7 @@ from .datamodule import DataModule datamodule = DataModule("fold-0.json") +"""Montgomery datamodule for TB detection (cross validation fold 0). + +See :py:class:`.montgomery.datamodule.DataModule` for technical details. +""" diff --git a/src/ptbench/data/montgomery/fold_1.py b/src/ptbench/data/montgomery/fold_1.py index 17372695..626cf2d0 100644 --- a/src/ptbench/data/montgomery/fold_1.py +++ b/src/ptbench/data/montgomery/fold_1.py @@ -5,3 +5,7 @@ from .datamodule import DataModule datamodule = DataModule("fold-1.json") +"""Montgomery datamodule for TB detection (cross validation fold 1). + +See :py:class:`.montgomery.datamodule.DataModule` for technical details. +""" diff --git a/src/ptbench/data/montgomery/fold_2.py b/src/ptbench/data/montgomery/fold_2.py index 91346efa..b6146d06 100644 --- a/src/ptbench/data/montgomery/fold_2.py +++ b/src/ptbench/data/montgomery/fold_2.py @@ -5,3 +5,7 @@ from .datamodule import DataModule datamodule = DataModule("fold-2.json") +"""Montgomery datamodule for TB detection (cross validation fold 2). + +See :py:class:`.montgomery.datamodule.DataModule` for technical details. +""" diff --git a/src/ptbench/data/montgomery/fold_3.py b/src/ptbench/data/montgomery/fold_3.py index 4156c3ee..f1b1c53e 100644 --- a/src/ptbench/data/montgomery/fold_3.py +++ b/src/ptbench/data/montgomery/fold_3.py @@ -5,3 +5,7 @@ from .datamodule import DataModule datamodule = DataModule("fold-3.json") +"""Montgomery datamodule for TB detection (cross validation fold 3). + +See :py:class:`.montgomery.datamodule.DataModule` for technical details. +""" diff --git a/src/ptbench/data/montgomery/fold_4.py b/src/ptbench/data/montgomery/fold_4.py index 6d5b73d1..eaa5fd27 100644 --- a/src/ptbench/data/montgomery/fold_4.py +++ b/src/ptbench/data/montgomery/fold_4.py @@ -5,3 +5,7 @@ from .datamodule import DataModule datamodule = DataModule("fold-4.json") +"""Montgomery datamodule for TB detection (cross validation fold 4). + +See :py:class:`.montgomery.datamodule.DataModule` for technical details. +""" diff --git a/src/ptbench/data/montgomery/fold_5.py b/src/ptbench/data/montgomery/fold_5.py index 27a7442d..142ae715 100644 --- a/src/ptbench/data/montgomery/fold_5.py +++ b/src/ptbench/data/montgomery/fold_5.py @@ -5,3 +5,7 @@ from .datamodule import DataModule datamodule = DataModule("fold-5.json") +"""Montgomery datamodule for TB detection (cross validation fold 5). + +See :py:class:`.montgomery.datamodule.DataModule` for technical details. +""" diff --git a/src/ptbench/data/montgomery/fold_6.py b/src/ptbench/data/montgomery/fold_6.py index 1270cd58..715d9b84 100644 --- a/src/ptbench/data/montgomery/fold_6.py +++ b/src/ptbench/data/montgomery/fold_6.py @@ -5,3 +5,7 @@ from .datamodule import DataModule datamodule = DataModule("fold-6.json") +"""Montgomery datamodule for TB detection (cross validation fold 6). + +See :py:class:`.montgomery.datamodule.DataModule` for technical details. +""" diff --git a/src/ptbench/data/montgomery/fold_7.py b/src/ptbench/data/montgomery/fold_7.py index e7b5a1c3..fc53e24e 100644 --- a/src/ptbench/data/montgomery/fold_7.py +++ b/src/ptbench/data/montgomery/fold_7.py @@ -5,3 +5,7 @@ from .datamodule import DataModule datamodule = DataModule("fold-7.json") +"""Montgomery datamodule for TB detection (cross validation fold 7). + +See :py:class:`.montgomery.datamodule.DataModule` for technical details. +""" diff --git a/src/ptbench/data/montgomery/fold_8.py b/src/ptbench/data/montgomery/fold_8.py index 1abbe7b2..2b917a6b 100644 --- a/src/ptbench/data/montgomery/fold_8.py +++ b/src/ptbench/data/montgomery/fold_8.py @@ -5,3 +5,7 @@ from .datamodule import DataModule datamodule = DataModule("fold-8.json") +"""Montgomery datamodule for TB detection (cross validation fold 8). + +See :py:class:`.montgomery.datamodule.DataModule` for technical details. +""" diff --git a/src/ptbench/data/montgomery/fold_9.py b/src/ptbench/data/montgomery/fold_9.py index 71de470d..f404ace1 100644 --- a/src/ptbench/data/montgomery/fold_9.py +++ b/src/ptbench/data/montgomery/fold_9.py @@ -5,3 +5,7 @@ from .datamodule import DataModule datamodule = DataModule("fold-9.json") +"""Montgomery datamodule for TB detection (cross validation fold 9). + +See :py:class:`.montgomery.datamodule.DataModule` for technical details. +""" diff --git a/src/ptbench/data/montgomery_shenzhen/default.py b/src/ptbench/data/montgomery_shenzhen/default.py index 2b8a8fb2..c332f4be 100644 --- a/src/ptbench/data/montgomery_shenzhen/default.py +++ b/src/ptbench/data/montgomery_shenzhen/default.py @@ -5,3 +5,8 @@ from .datamodule import DataModule datamodule = DataModule("default.json") +"""Aggregated datamodule composed of Montgomery and Shenzhen datasets (default +split). + +See :py:class:`.montgomery_shenzhen.datamodule.DataModule` for technical details. +""" diff --git a/src/ptbench/data/montgomery_shenzhen/fold_0.py b/src/ptbench/data/montgomery_shenzhen/fold_0.py index 91711d38..f3e8ef02 100644 --- a/src/ptbench/data/montgomery_shenzhen/fold_0.py +++ b/src/ptbench/data/montgomery_shenzhen/fold_0.py @@ -5,3 +5,8 @@ from .datamodule import DataModule datamodule = DataModule("fold-0.json") +"""Aggregated datamodule composed of Montgomery and Shenzhen datasets (cross +validation fold 0). + +See :py:class:`.montgomery_shenzhen.datamodule.DataModule` for technical details. +""" diff --git a/src/ptbench/data/montgomery_shenzhen/fold_1.py b/src/ptbench/data/montgomery_shenzhen/fold_1.py index 17372695..f6d73de3 100644 --- a/src/ptbench/data/montgomery_shenzhen/fold_1.py +++ b/src/ptbench/data/montgomery_shenzhen/fold_1.py @@ -5,3 +5,8 @@ from .datamodule import DataModule datamodule = DataModule("fold-1.json") +"""Aggregated datamodule composed of Montgomery and Shenzhen datasets (cross +validation fold 1). + +See :py:class:`.montgomery_shenzhen.datamodule.DataModule` for technical details. +""" diff --git a/src/ptbench/data/montgomery_shenzhen/fold_2.py b/src/ptbench/data/montgomery_shenzhen/fold_2.py index 91346efa..9b956052 100644 --- a/src/ptbench/data/montgomery_shenzhen/fold_2.py +++ b/src/ptbench/data/montgomery_shenzhen/fold_2.py @@ -5,3 +5,8 @@ from .datamodule import DataModule datamodule = DataModule("fold-2.json") +"""Aggregated datamodule composed of Montgomery and Shenzhen datasets (cross +validation fold 2). + +See :py:class:`.montgomery_shenzhen.datamodule.DataModule` for technical details. +""" diff --git a/src/ptbench/data/montgomery_shenzhen/fold_3.py b/src/ptbench/data/montgomery_shenzhen/fold_3.py index 4156c3ee..826e0abb 100644 --- a/src/ptbench/data/montgomery_shenzhen/fold_3.py +++ b/src/ptbench/data/montgomery_shenzhen/fold_3.py @@ -5,3 +5,8 @@ from .datamodule import DataModule datamodule = DataModule("fold-3.json") +"""Aggregated datamodule composed of Montgomery and Shenzhen datasets (cross +validation fold 3). + +See :py:class:`.montgomery_shenzhen.datamodule.DataModule` for technical details. +""" diff --git a/src/ptbench/data/montgomery_shenzhen/fold_4.py b/src/ptbench/data/montgomery_shenzhen/fold_4.py index 6d5b73d1..f7261a43 100644 --- a/src/ptbench/data/montgomery_shenzhen/fold_4.py +++ b/src/ptbench/data/montgomery_shenzhen/fold_4.py @@ -5,3 +5,8 @@ from .datamodule import DataModule datamodule = DataModule("fold-4.json") +"""Aggregated datamodule composed of Montgomery and Shenzhen datasets (cross +validation fold 4). + +See :py:class:`.montgomery_shenzhen.datamodule.DataModule` for technical details. +""" diff --git a/src/ptbench/data/montgomery_shenzhen/fold_5.py b/src/ptbench/data/montgomery_shenzhen/fold_5.py index 27a7442d..7dfb6f90 100644 --- a/src/ptbench/data/montgomery_shenzhen/fold_5.py +++ b/src/ptbench/data/montgomery_shenzhen/fold_5.py @@ -5,3 +5,8 @@ from .datamodule import DataModule datamodule = DataModule("fold-5.json") +"""Aggregated datamodule composed of Montgomery and Shenzhen datasets (cross +validation fold 5). + +See :py:class:`.montgomery_shenzhen.datamodule.DataModule` for technical details. +""" diff --git a/src/ptbench/data/montgomery_shenzhen/fold_6.py b/src/ptbench/data/montgomery_shenzhen/fold_6.py index 1270cd58..57f19130 100644 --- a/src/ptbench/data/montgomery_shenzhen/fold_6.py +++ b/src/ptbench/data/montgomery_shenzhen/fold_6.py @@ -5,3 +5,8 @@ from .datamodule import DataModule datamodule = DataModule("fold-6.json") +"""Aggregated datamodule composed of Montgomery and Shenzhen datasets (cross +validation fold 6). + +See :py:class:`.montgomery_shenzhen.datamodule.DataModule` for technical details. +""" diff --git a/src/ptbench/data/montgomery_shenzhen/fold_7.py b/src/ptbench/data/montgomery_shenzhen/fold_7.py index e7b5a1c3..3fc7c8bf 100644 --- a/src/ptbench/data/montgomery_shenzhen/fold_7.py +++ b/src/ptbench/data/montgomery_shenzhen/fold_7.py @@ -5,3 +5,8 @@ from .datamodule import DataModule datamodule = DataModule("fold-7.json") +"""Aggregated datamodule composed of Montgomery and Shenzhen datasets (cross +validation fold 7). + +See :py:class:`.montgomery_shenzhen.datamodule.DataModule` for technical details. +""" diff --git a/src/ptbench/data/montgomery_shenzhen/fold_8.py b/src/ptbench/data/montgomery_shenzhen/fold_8.py index 1abbe7b2..7b631543 100644 --- a/src/ptbench/data/montgomery_shenzhen/fold_8.py +++ b/src/ptbench/data/montgomery_shenzhen/fold_8.py @@ -5,3 +5,8 @@ from .datamodule import DataModule datamodule = DataModule("fold-8.json") +"""Aggregated datamodule composed of Montgomery and Shenzhen datasets (cross +validation fold 8). + +See :py:class:`.montgomery_shenzhen.datamodule.DataModule` for technical details. +""" diff --git a/src/ptbench/data/montgomery_shenzhen/fold_9.py b/src/ptbench/data/montgomery_shenzhen/fold_9.py index 71de470d..565b71e7 100644 --- a/src/ptbench/data/montgomery_shenzhen/fold_9.py +++ b/src/ptbench/data/montgomery_shenzhen/fold_9.py @@ -5,3 +5,8 @@ from .datamodule import DataModule datamodule = DataModule("fold-9.json") +"""Aggregated datamodule composed of Montgomery and Shenzhen datasets (cross +validation fold 9). + +See :py:class:`.montgomery_shenzhen.datamodule.DataModule` for technical details. +""" diff --git a/src/ptbench/data/montgomery_shenzhen_indian/datamodule.py b/src/ptbench/data/montgomery_shenzhen_indian/datamodule.py index ea2d2a4f..3c555a62 100644 --- a/src/ptbench/data/montgomery_shenzhen_indian/datamodule.py +++ b/src/ptbench/data/montgomery_shenzhen_indian/datamodule.py @@ -12,7 +12,8 @@ from ..shenzhen.datamodule import make_split as make_shenzhen_split class DataModule(ConcatDataModule): - """Aggregated datamodule composed of Montgomery and Shenzhen datasets.""" + """Aggregated datamodule composed of Montgomery, Shenzhen and Indian + datasets.""" def __init__(self, split_filename: str): montgomery_loader = MontgomeryLoader() diff --git a/src/ptbench/data/montgomery_shenzhen_indian/default.py b/src/ptbench/data/montgomery_shenzhen_indian/default.py index 2b8a8fb2..e4a05f63 100644 --- a/src/ptbench/data/montgomery_shenzhen_indian/default.py +++ b/src/ptbench/data/montgomery_shenzhen_indian/default.py @@ -5,3 +5,8 @@ from .datamodule import DataModule datamodule = DataModule("default.json") +"""Aggregated datamodule composed of Montgomery, Shenzhen and Indian datasets. + +See :py:class:`.montgomery_shenzhen_indian.datamodule.DataModule` for technical +details. +""" diff --git a/src/ptbench/data/montgomery_shenzhen_indian/fold_0.py b/src/ptbench/data/montgomery_shenzhen_indian/fold_0.py index 91711d38..6f08e25a 100644 --- a/src/ptbench/data/montgomery_shenzhen_indian/fold_0.py +++ b/src/ptbench/data/montgomery_shenzhen_indian/fold_0.py @@ -5,3 +5,9 @@ from .datamodule import DataModule datamodule = DataModule("fold-0.json") +"""Aggregated datamodule composed of Montgomery, Shenzhen and Indian datasets +(cross validation fold 0). + +See :py:class:`.montgomery_shenzhen_indian.datamodule.DataModule` for technical +details. +""" diff --git a/src/ptbench/data/montgomery_shenzhen_indian/fold_1.py b/src/ptbench/data/montgomery_shenzhen_indian/fold_1.py index 17372695..3d7529cf 100644 --- a/src/ptbench/data/montgomery_shenzhen_indian/fold_1.py +++ b/src/ptbench/data/montgomery_shenzhen_indian/fold_1.py @@ -5,3 +5,8 @@ from .datamodule import DataModule datamodule = DataModule("fold-1.json") +"""Aggregated datamodule composed of Montgomery, Shenzhen and Indian datasets +(cross validation fold 1). + +See :py:class:`.montgomery_shenzhen_indian.datamodule.DataModule` for technical details. +""" diff --git a/src/ptbench/data/montgomery_shenzhen_indian/fold_2.py b/src/ptbench/data/montgomery_shenzhen_indian/fold_2.py index 91346efa..2e914281 100644 --- a/src/ptbench/data/montgomery_shenzhen_indian/fold_2.py +++ b/src/ptbench/data/montgomery_shenzhen_indian/fold_2.py @@ -5,3 +5,9 @@ from .datamodule import DataModule datamodule = DataModule("fold-2.json") +"""Aggregated datamodule composed of Montgomery, Shenzhen and Indian datasets +(cross validation fold 2). + +See :py:class:`.montgomery_shenzhen_indian.datamodule.DataModule` for technical +details. +""" diff --git a/src/ptbench/data/montgomery_shenzhen_indian/fold_3.py b/src/ptbench/data/montgomery_shenzhen_indian/fold_3.py index 4156c3ee..dfd4c9cd 100644 --- a/src/ptbench/data/montgomery_shenzhen_indian/fold_3.py +++ b/src/ptbench/data/montgomery_shenzhen_indian/fold_3.py @@ -5,3 +5,9 @@ from .datamodule import DataModule datamodule = DataModule("fold-3.json") +"""Aggregated datamodule composed of Montgomery, Shenzhen and Indian datasets +(cross validation fold 3). + +See :py:class:`.montgomery_shenzhen_indian.datamodule.DataModule` for technical +details. +""" diff --git a/src/ptbench/data/montgomery_shenzhen_indian/fold_4.py b/src/ptbench/data/montgomery_shenzhen_indian/fold_4.py index 6d5b73d1..84e66cef 100644 --- a/src/ptbench/data/montgomery_shenzhen_indian/fold_4.py +++ b/src/ptbench/data/montgomery_shenzhen_indian/fold_4.py @@ -5,3 +5,9 @@ from .datamodule import DataModule datamodule = DataModule("fold-4.json") +"""Aggregated datamodule composed of Montgomery, Shenzhen and Indian datasets +(cross validation fold 4). + +See :py:class:`.montgomery_shenzhen_indian.datamodule.DataModule` for technical +details. +""" diff --git a/src/ptbench/data/montgomery_shenzhen_indian/fold_5.py b/src/ptbench/data/montgomery_shenzhen_indian/fold_5.py index 27a7442d..650292f7 100644 --- a/src/ptbench/data/montgomery_shenzhen_indian/fold_5.py +++ b/src/ptbench/data/montgomery_shenzhen_indian/fold_5.py @@ -5,3 +5,9 @@ from .datamodule import DataModule datamodule = DataModule("fold-5.json") +"""Aggregated datamodule composed of Montgomery, Shenzhen and Indian datasets +(cross validation fold 5). + +See :py:class:`.montgomery_shenzhen_indian.datamodule.DataModule` for technical +details. +""" diff --git a/src/ptbench/data/montgomery_shenzhen_indian/fold_6.py b/src/ptbench/data/montgomery_shenzhen_indian/fold_6.py index 1270cd58..8f673689 100644 --- a/src/ptbench/data/montgomery_shenzhen_indian/fold_6.py +++ b/src/ptbench/data/montgomery_shenzhen_indian/fold_6.py @@ -5,3 +5,9 @@ from .datamodule import DataModule datamodule = DataModule("fold-6.json") +"""Aggregated datamodule composed of Montgomery, Shenzhen and Indian datasets +(cross validation fold 6). + +See :py:class:`.montgomery_shenzhen_indian.datamodule.DataModule` for technical +details. +""" diff --git a/src/ptbench/data/montgomery_shenzhen_indian/fold_7.py b/src/ptbench/data/montgomery_shenzhen_indian/fold_7.py index e7b5a1c3..33eed540 100644 --- a/src/ptbench/data/montgomery_shenzhen_indian/fold_7.py +++ b/src/ptbench/data/montgomery_shenzhen_indian/fold_7.py @@ -5,3 +5,8 @@ from .datamodule import DataModule datamodule = DataModule("fold-7.json") +"""Aggregated datamodule composed of Montgomery, Shenzhen and Indian datasets +(cross validation fold 7). + +See :py:class:`.montgomery_shenzhen_indian.datamodule.DataModule` for technical details. +""" diff --git a/src/ptbench/data/montgomery_shenzhen_indian/fold_8.py b/src/ptbench/data/montgomery_shenzhen_indian/fold_8.py index 1abbe7b2..4de80f69 100644 --- a/src/ptbench/data/montgomery_shenzhen_indian/fold_8.py +++ b/src/ptbench/data/montgomery_shenzhen_indian/fold_8.py @@ -5,3 +5,9 @@ from .datamodule import DataModule datamodule = DataModule("fold-8.json") +"""Aggregated datamodule composed of Montgomery, Shenzhen and Indian datasets +(cross validation fold 8). + +See :py:class:`.montgomery_shenzhen_indian.datamodule.DataModule` for technical +details. +""" diff --git a/src/ptbench/data/montgomery_shenzhen_indian/fold_9.py b/src/ptbench/data/montgomery_shenzhen_indian/fold_9.py index 71de470d..f33e691b 100644 --- a/src/ptbench/data/montgomery_shenzhen_indian/fold_9.py +++ b/src/ptbench/data/montgomery_shenzhen_indian/fold_9.py @@ -5,3 +5,8 @@ from .datamodule import DataModule datamodule = DataModule("fold-9.json") +"""Aggregated datamodule composed of Montgomery, Shenzhen and Indian datasets +(cross validation fold 9). + +See :py:class:`.montgomery_shenzhen_indian.datamodule.DataModule` for technical details. +""" diff --git a/src/ptbench/data/montgomery_shenzhen_indian_padchest/datamodule.py b/src/ptbench/data/montgomery_shenzhen_indian_padchest/datamodule.py index 2fdcfc67..9f6a3b09 100644 --- a/src/ptbench/data/montgomery_shenzhen_indian_padchest/datamodule.py +++ b/src/ptbench/data/montgomery_shenzhen_indian_padchest/datamodule.py @@ -38,7 +38,8 @@ class DataModule(ConcatDataModule): (montgomery_split["validation"], montgomery_loader), (shenzhen_split["validation"], shenzhen_loader), (indian_split["validation"], indian_loader), - (padchest_split["validation"], padchest_loader), + # there is no validation set on padchest... + # (padchest_split["validation"], padchest_loader), ], "test": [ (montgomery_split["test"], montgomery_loader), diff --git a/src/ptbench/data/nih_cxr14/datamodule.py b/src/ptbench/data/nih_cxr14/datamodule.py index 1594be18..58c828b4 100644 --- a/src/ptbench/data/nih_cxr14/datamodule.py +++ b/src/ptbench/data/nih_cxr14/datamodule.py @@ -18,26 +18,22 @@ from ..typing import Sample class RawDataLoader(_BaseRawDataLoader): - """A specialized raw-data-loader for the NIH CXR-14 dataset. + """A specialized raw-data-loader for the NIH CXR-14 dataset.""" - Attributes - ---------- + datadir: str + """This variable contains the base directory where the database raw data is + stored.""" - datadir - This variable contains the base directory where the database raw data - is stored. + idiap_file_organisation: bool + """If should use the Idiap's filesystem organisation when looking up data. - idiap_file_organisation - This variable will be ``True``, if the user has set the configuration - parameter ``nih_cxr14.idiap_file_organisation`` in the global - configuration file. It will cause internal loader to search for files - in a slightly different folder structure, that was adapted to Idiap's - requirements (number of files per folder to be less than 10k). + This variable will be ``True``, if the user has set the configuration + parameter ``nih_cxr14.idiap_file_organisation`` in the global configuration + file. It will cause internal loader to search for files in a slightly + different folder structure, that was adapted to Idiap's requirements + (number of files per folder to be less than 10k). """ - datadir: str - idiap_file_organisation: bool - def __init__(self): rc = load_rc() self.datadir = rc.get("datadir.nih_cxr14", os.path.realpath(os.curdir)) @@ -50,8 +46,7 @@ class RawDataLoader(_BaseRawDataLoader): Parameters ---------- - - sample: + sample A tuple containing the path suffix, within the dataset root folder, where to find the image to be loaded, and an integer, representing the sample label. @@ -59,8 +54,6 @@ class RawDataLoader(_BaseRawDataLoader): Returns ------- - - sample The sample representation """ file_path = sample[0] # default @@ -91,8 +84,7 @@ class RawDataLoader(_BaseRawDataLoader): Parameters ---------- - - sample: + sample A tuple containing the path suffix, within the dataset root folder, where to find the image to be loaded, and an integer, representing the sample label. @@ -100,8 +92,6 @@ class RawDataLoader(_BaseRawDataLoader): Returns ------- - - labels The integer labels associated with the sample """ return sample[1] diff --git a/src/ptbench/data/nih_cxr14/default.py b/src/ptbench/data/nih_cxr14/default.py index b9f7b6c5..af4f2172 100644 --- a/src/ptbench/data/nih_cxr14/default.py +++ b/src/ptbench/data/nih_cxr14/default.py @@ -11,5 +11,5 @@ datamodule = DataModule("default.json.bz2") * Validation samples: 6350 * Test samples: 4054 -See :py:class:`DataModule` for technical details. +See :py:class:`.nih_cxr14.datamodule.DataModule` for technical details. """ diff --git a/src/ptbench/data/nih_cxr14_padchest/datamodule.py b/src/ptbench/data/nih_cxr14_padchest/datamodule.py index 335679bf..f1ce1f91 100644 --- a/src/ptbench/data/nih_cxr14_padchest/datamodule.py +++ b/src/ptbench/data/nih_cxr14_padchest/datamodule.py @@ -31,7 +31,8 @@ class DataModule(ConcatDataModule): ], "test": [ (cxr14_split["test"], cxr14_loader), - (padchest_split["test"], padchest_loader), + # there is no test set on padchest + # (padchest_split["test"], padchest_loader), ], } ) diff --git a/src/ptbench/data/padchest/datamodule.py b/src/ptbench/data/padchest/datamodule.py index a09c5e67..69a37ea8 100644 --- a/src/ptbench/data/padchest/datamodule.py +++ b/src/ptbench/data/padchest/datamodule.py @@ -20,17 +20,11 @@ from ..typing import Sample class RawDataLoader(_BaseRawDataLoader): - """A specialized raw-data-loader for the PadChest dataset. - - Attributes - ---------- - - datadir - This variable contains the base directory where the database raw data - is stored. - """ + """A specialized raw-data-loader for the PadChest dataset.""" datadir: str + """This variable contains the base directory where the database raw data is + stored.""" def __init__(self): rc = load_rc() @@ -41,8 +35,7 @@ class RawDataLoader(_BaseRawDataLoader): Parameters ---------- - - sample: + sample A tuple containing the path suffix, within the dataset root folder, where to find the image to be loaded, and an integer, representing the sample label. @@ -50,8 +43,6 @@ class RawDataLoader(_BaseRawDataLoader): Returns ------- - - sample The sample representation """ # N.B.: PadChest images are encoded as 16-bit grayscale images @@ -73,8 +64,7 @@ class RawDataLoader(_BaseRawDataLoader): Parameters ---------- - - sample: + sample A tuple containing the path suffix, within the dataset root folder, where to find the image to be loaded, and an integer, representing the sample label. @@ -82,8 +72,6 @@ class RawDataLoader(_BaseRawDataLoader): Returns ------- - - labels The integer labels associated with the sample """ return sample[1] @@ -117,213 +105,213 @@ class DataModule(CachingDataModule): * Labels: [PADCHEST-2019]_ * Output image: - * Transforms: - - * Load raw 16-bit PNG with :py:mod:`PIL` - * Remove excess black borders - * Convert image to 32-bit floats between 0. and 1. - * Convert to tensor - * Center crop, preserving the minimum dimension (height or width) - - * Final specifications - - * Grayscale, encoded as a 1-plane 32-bit float image, square with - varying resolutions depending on the raw input image - * Labels, in order (some of which may not be present in all splits): - - * COPD signs - * Chilaiditi sign - * NSG tube - * abnormal foreign body - * abscess - * adenopathy - * air bronchogram - * air fluid level - * air trapping - * alveolar pattern - * aortic aneurysm - * aortic atheromatosis - * aortic button enlargement - * aortic elongation - * aortic endoprosthesis - * apical pleural thickening - * artificial aortic heart valve - * artificial heart valve - * artificial mitral heart valve - * asbestosis signs - * ascendent aortic elongation - * atelectasis - * atelectasis basal - * atypical pneumonia - * axial hyperostosis - * azygoesophageal recess shift - * azygos lobe - * blastic bone lesion - * bone cement - * bone metastasis - * breast mass - * bronchiectasis - * bronchovascular markings - * bullas - * calcified adenopathy - * calcified densities - * calcified fibroadenoma - * calcified granuloma - * calcified mediastinal adenopathy - * calcified pleural plaques - * calcified pleural thickening - * callus rib fracture - * cardiomegaly - * catheter - * cavitation - * central vascular redistribution - * central venous catheter - * central venous catheter via jugular vein - * central venous catheter via subclavian vein - * central venous catheter via umbilical vein - * cervical rib - * chest drain tube - * chronic changes - * clavicle fracture - * consolidation - * costochondral junction hypertrophy - * costophrenic angle blunting - * cyst - * dai - * descendent aortic elongation - * dextrocardia - * diaphragmatic eventration - * double J stent - * dual chamber device - * electrical device - * emphysema - * empyema - * end on vessel - * endoprosthesis - * endotracheal tube - * esophagic dilatation - * exclude - * external foreign body - * fibrotic band - * fissure thickening - * flattened diaphragm - * fracture - * gastrostomy tube - * goiter - * granuloma - * ground glass pattern - * gynecomastia - * heart insufficiency - * heart valve calcified - * hemidiaphragm elevation - * hiatal hernia - * hilar congestion - * hilar enlargement - * humeral fracture - * humeral prosthesis - * hydropneumothorax - * hyperinflated lung - * hypoexpansion - * hypoexpansion basal - * increased density - * infiltrates - * interstitial pattern - * kerley lines - * kyphosis - * laminar atelectasis - * lepidic adenocarcinoma - * lipomatosis - * lobar atelectasis - * loculated fissural effusion - * loculated pleural effusion - * lung metastasis - * lung vascular paucity - * lymphangitis carcinomatosa - * lytic bone lesion - * major fissure thickening - * mammary prosthesis - * mass - * mastectomy - * mediastinal enlargement - * mediastinal mass - * mediastinal shift - * mediastinic lipomatosis - * metal - * miliary opacities - * minor fissure thickening - * multiple nodules - * nephrostomy tube - * nipple shadow - * nodule - * non axial articular degenerative changes - * normal - * obesity - * osteopenia - * osteoporosis - * osteosynthesis material - * pacemaker - * pectum carinatum - * pectum excavatum - * pericardial effusion - * pleural effusion - * pleural mass - * pleural plaques - * pleural thickening - * pneumomediastinum - * pneumonia - * pneumoperitoneo - * pneumothorax - * post radiotherapy changes - * prosthesis - * pseudonodule - * pulmonary artery enlargement - * pulmonary artery hypertension - * pulmonary edema - * pulmonary fibrosis - * pulmonary hypertension - * pulmonary mass - * pulmonary venous hypertension - * reservoir central venous catheter - * respiratory distress - * reticular interstitial pattern - * reticulonodular interstitial pattern - * rib fracture - * right sided aortic arch - * round atelectasis - * sclerotic bone lesion - * scoliosis - * segmental atelectasis - * single chamber device - * soft tissue mass - * sternoclavicular junction hypertrophy - * sternotomy - * subacromial space narrowing - * subcutaneous emphysema - * suboptimal study - * superior mediastinal enlargement - * supra aortic elongation - * surgery - * surgery breast - * surgery heart - * surgery humeral - * surgery lung - * surgery neck - * suture material - * thoracic cage deformation - * total atelectasis - * tracheal shift - * tracheostomy tube - * tuberculosis - * tuberculosis sequelae - * unchanged - * vascular hilar enlargement - * vascular redistribution - * ventriculoperitoneal drain tube - * vertebral anterior compression - * vertebral compression - * vertebral degenerative changes - * vertebral fracture - * volume loss + * Transforms: + + * Load raw 16-bit PNG with :py:mod:`PIL` + * Remove excess black borders + * Convert image to 32-bit floats between 0. and 1. + * Convert to tensor + * Center crop, preserving the minimum dimension (height or width) + + * Final specifications + + * Grayscale, encoded as a 1-plane 32-bit float image, square with + varying resolutions depending on the raw input image + * Labels, in order (some of which may not be present in all splits): + + * COPD signs + * Chilaiditi sign + * NSG tube + * abnormal foreign body + * abscess + * adenopathy + * air bronchogram + * air fluid level + * air trapping + * alveolar pattern + * aortic aneurysm + * aortic atheromatosis + * aortic button enlargement + * aortic elongation + * aortic endoprosthesis + * apical pleural thickening + * artificial aortic heart valve + * artificial heart valve + * artificial mitral heart valve + * asbestosis signs + * ascendent aortic elongation + * atelectasis + * atelectasis basal + * atypical pneumonia + * axial hyperostosis + * azygoesophageal recess shift + * azygos lobe + * blastic bone lesion + * bone cement + * bone metastasis + * breast mass + * bronchiectasis + * bronchovascular markings + * bullas + * calcified adenopathy + * calcified densities + * calcified fibroadenoma + * calcified granuloma + * calcified mediastinal adenopathy + * calcified pleural plaques + * calcified pleural thickening + * callus rib fracture + * cardiomegaly + * catheter + * cavitation + * central vascular redistribution + * central venous catheter + * central venous catheter via jugular vein + * central venous catheter via subclavian vein + * central venous catheter via umbilical vein + * cervical rib + * chest drain tube + * chronic changes + * clavicle fracture + * consolidation + * costochondral junction hypertrophy + * costophrenic angle blunting + * cyst + * dai + * descendent aortic elongation + * dextrocardia + * diaphragmatic eventration + * double J stent + * dual chamber device + * electrical device + * emphysema + * empyema + * end on vessel + * endoprosthesis + * endotracheal tube + * esophagic dilatation + * exclude + * external foreign body + * fibrotic band + * fissure thickening + * flattened diaphragm + * fracture + * gastrostomy tube + * goiter + * granuloma + * ground glass pattern + * gynecomastia + * heart insufficiency + * heart valve calcified + * hemidiaphragm elevation + * hiatal hernia + * hilar congestion + * hilar enlargement + * humeral fracture + * humeral prosthesis + * hydropneumothorax + * hyperinflated lung + * hypoexpansion + * hypoexpansion basal + * increased density + * infiltrates + * interstitial pattern + * kerley lines + * kyphosis + * laminar atelectasis + * lepidic adenocarcinoma + * lipomatosis + * lobar atelectasis + * loculated fissural effusion + * loculated pleural effusion + * lung metastasis + * lung vascular paucity + * lymphangitis carcinomatosa + * lytic bone lesion + * major fissure thickening + * mammary prosthesis + * mass + * mastectomy + * mediastinal enlargement + * mediastinal mass + * mediastinal shift + * mediastinic lipomatosis + * metal + * miliary opacities + * minor fissure thickening + * multiple nodules + * nephrostomy tube + * nipple shadow + * nodule + * non axial articular degenerative changes + * normal + * obesity + * osteopenia + * osteoporosis + * osteosynthesis material + * pacemaker + * pectum carinatum + * pectum excavatum + * pericardial effusion + * pleural effusion + * pleural mass + * pleural plaques + * pleural thickening + * pneumomediastinum + * pneumonia + * pneumoperitoneo + * pneumothorax + * post radiotherapy changes + * prosthesis + * pseudonodule + * pulmonary artery enlargement + * pulmonary artery hypertension + * pulmonary edema + * pulmonary fibrosis + * pulmonary hypertension + * pulmonary mass + * pulmonary venous hypertension + * reservoir central venous catheter + * respiratory distress + * reticular interstitial pattern + * reticulonodular interstitial pattern + * rib fracture + * right sided aortic arch + * round atelectasis + * sclerotic bone lesion + * scoliosis + * segmental atelectasis + * single chamber device + * soft tissue mass + * sternoclavicular junction hypertrophy + * sternotomy + * subacromial space narrowing + * subcutaneous emphysema + * suboptimal study + * superior mediastinal enlargement + * supra aortic elongation + * surgery + * surgery breast + * surgery heart + * surgery humeral + * surgery lung + * surgery neck + * suture material + * thoracic cage deformation + * total atelectasis + * tracheal shift + * tracheostomy tube + * tuberculosis + * tuberculosis sequelae + * unchanged + * vascular hilar enlargement + * vascular redistribution + * ventriculoperitoneal drain tube + * vertebral anterior compression + * vertebral compression + * vertebral degenerative changes + * vertebral fracture + * volume loss """ def __init__(self, split_filename: str): diff --git a/src/ptbench/data/padchest/idiap.py b/src/ptbench/data/padchest/idiap.py index ea8b3dbd..52c52123 100644 --- a/src/ptbench/data/padchest/idiap.py +++ b/src/ptbench/data/padchest/idiap.py @@ -8,7 +8,7 @@ datamodule = DataModule("idiap.json.bz2") """Padchest dataset for computer-aided diagnosis (``idiap`` split). This split contains all images in the database. Read documentation of -:py:class:`DataModule` for technical details. +:py:class:`.padchest.datamodule.DataModule` for technical details. * Split reference: ours * Training samples: 96'269 diff --git a/src/ptbench/data/shenzhen/datamodule.py b/src/ptbench/data/shenzhen/datamodule.py index 0596007e..221bc869 100644 --- a/src/ptbench/data/shenzhen/datamodule.py +++ b/src/ptbench/data/shenzhen/datamodule.py @@ -19,20 +19,11 @@ from ..typing import Sample class RawDataLoader(_BaseRawDataLoader): - """A specialized raw-data-loader for the Shenzen dataset. - - Attributes - ---------- - - datadir - This variable contains the base directory where the database raw data - is stored. - - transform - Transforms that are always applied to the loaded raw images. - """ + """A specialized raw-data-loader for the Shenzhen dataset.""" datadir: str + """This variable contains the base directory where the database raw data is + stored.""" def __init__(self, config_variable: str = "datadir.shenzhen"): self.datadir = load_rc().get( @@ -44,8 +35,7 @@ class RawDataLoader(_BaseRawDataLoader): Parameters ---------- - - sample: + sample A tuple containing the path suffix, within the dataset root folder, where to find the image to be loaded, and an integer, representing the sample label. @@ -53,8 +43,6 @@ class RawDataLoader(_BaseRawDataLoader): Returns ------- - - sample The sample representation """ # N.B.: Image.convert("L") is required to normalize grayscale back to @@ -78,8 +66,7 @@ class RawDataLoader(_BaseRawDataLoader): Parameters ---------- - - sample: + sample A tuple containing the path suffix, within the dataset root folder, where to find the image to be loaded, and an integer, representing the sample label. @@ -87,8 +74,6 @@ class RawDataLoader(_BaseRawDataLoader): Returns ------- - - label The integer label associated with the sample """ return sample[1] @@ -117,23 +102,23 @@ class DataModule(CachingDataModule): * Raw data input (on disk): - * PNG 8-bit RGB images (grayscale, but encoded as RGB images with - "inverted" grayscale scale requiring special treatment). - * Variable width and height of 3000 x 3000 pixels or less + * PNG 8-bit RGB images (grayscale, but encoded as RGB images with + "inverted" grayscale scale requiring special treatment). + * Variable width and height of 3000 x 3000 pixels or less * Output image: - * Transforms: + * Transforms: - * Load raw PNG with :py:mod:`PIL` - * Remove black borders - * Torch center cropping to get square image + * Load raw PNG with :py:mod:`PIL` + * Remove black borders + * Torch center cropping to get square image - * Final specifications: + * Final specifications: - * Grayscale, encoded as a single plane tensor, 32-bit floats, - square with varying resolutions, depending on the input image - * Labels: 0 (healthy), 1 (active tuberculosis) + * Grayscale, encoded as a single plane tensor, 32-bit floats, + square with varying resolutions, depending on the input image + * Labels: 0 (healthy), 1 (active tuberculosis) """ def __init__(self, split_filename: str): diff --git a/src/ptbench/data/shenzhen/default.py b/src/ptbench/data/shenzhen/default.py index 93517c03..0a73847a 100644 --- a/src/ptbench/data/shenzhen/default.py +++ b/src/ptbench/data/shenzhen/default.py @@ -5,9 +5,11 @@ from .datamodule import DataModule datamodule = DataModule("default.json") -"""Default Shenzen TB database split. +"""Default Shenzhen TB database split. * Training samples: 64% of TB and healthy CXR (including labels) * Validation samples: 16% of TB and healthy CXR (including labels) * Test samples: 20% of TB and healthy CXR (including labels) + +See :py:class:`.shenzhen.datamodule.DataModule` for technical details. """ diff --git a/src/ptbench/data/shenzhen/fold_0.py b/src/ptbench/data/shenzhen/fold_0.py index 91711d38..dc895f78 100644 --- a/src/ptbench/data/shenzhen/fold_0.py +++ b/src/ptbench/data/shenzhen/fold_0.py @@ -5,3 +5,7 @@ from .datamodule import DataModule datamodule = DataModule("fold-0.json") +"""Shenzhen datamodule for computer-aided diagnosis (cross validation fold 0). + +See :py:class:`.shenzhen.datamodule.DataModule` for technical details. +""" diff --git a/src/ptbench/data/shenzhen/fold_1.py b/src/ptbench/data/shenzhen/fold_1.py index 17372695..ce062462 100644 --- a/src/ptbench/data/shenzhen/fold_1.py +++ b/src/ptbench/data/shenzhen/fold_1.py @@ -5,3 +5,7 @@ from .datamodule import DataModule datamodule = DataModule("fold-1.json") +"""Shenzhen datamodule for computer-aided diagnosis (cross validation fold 1). + +See :py:class:`.shenzhen.datamodule.DataModule` for technical details. +""" diff --git a/src/ptbench/data/shenzhen/fold_2.py b/src/ptbench/data/shenzhen/fold_2.py index 91346efa..3f061c37 100644 --- a/src/ptbench/data/shenzhen/fold_2.py +++ b/src/ptbench/data/shenzhen/fold_2.py @@ -5,3 +5,7 @@ from .datamodule import DataModule datamodule = DataModule("fold-2.json") +"""Shenzhen datamodule for computer-aided diagnosis (cross validation fold 2). + +See :py:class:`.shenzhen.datamodule.DataModule` for technical details. +""" diff --git a/src/ptbench/data/shenzhen/fold_3.py b/src/ptbench/data/shenzhen/fold_3.py index 4156c3ee..e3bb067e 100644 --- a/src/ptbench/data/shenzhen/fold_3.py +++ b/src/ptbench/data/shenzhen/fold_3.py @@ -5,3 +5,7 @@ from .datamodule import DataModule datamodule = DataModule("fold-3.json") +"""Shenzhen datamodule for computer-aided diagnosis (cross validation fold 3). + +See :py:class:`.shenzhen.datamodule.DataModule` for technical details. +""" diff --git a/src/ptbench/data/shenzhen/fold_4.py b/src/ptbench/data/shenzhen/fold_4.py index 6d5b73d1..f0cb843b 100644 --- a/src/ptbench/data/shenzhen/fold_4.py +++ b/src/ptbench/data/shenzhen/fold_4.py @@ -5,3 +5,7 @@ from .datamodule import DataModule datamodule = DataModule("fold-4.json") +"""Shenzhen datamodule for computer-aided diagnosis (cross validation fold 4). + +See :py:class:`.shenzhen.datamodule.DataModule` for technical details. +""" diff --git a/src/ptbench/data/shenzhen/fold_5.py b/src/ptbench/data/shenzhen/fold_5.py index 27a7442d..6a27ac51 100644 --- a/src/ptbench/data/shenzhen/fold_5.py +++ b/src/ptbench/data/shenzhen/fold_5.py @@ -5,3 +5,7 @@ from .datamodule import DataModule datamodule = DataModule("fold-5.json") +"""Shenzhen datamodule for computer-aided diagnosis (cross validation fold 5). + +See :py:class:`.shenzhen.datamodule.DataModule` for technical details. +""" diff --git a/src/ptbench/data/shenzhen/fold_6.py b/src/ptbench/data/shenzhen/fold_6.py index 1270cd58..302d2b8d 100644 --- a/src/ptbench/data/shenzhen/fold_6.py +++ b/src/ptbench/data/shenzhen/fold_6.py @@ -5,3 +5,7 @@ from .datamodule import DataModule datamodule = DataModule("fold-6.json") +"""Shenzhen datamodule for computer-aided diagnosis (cross validation fold 6). + +See :py:class:`.shenzhen.datamodule.DataModule` for technical details. +""" diff --git a/src/ptbench/data/shenzhen/fold_7.py b/src/ptbench/data/shenzhen/fold_7.py index e7b5a1c3..a07f4d29 100644 --- a/src/ptbench/data/shenzhen/fold_7.py +++ b/src/ptbench/data/shenzhen/fold_7.py @@ -5,3 +5,7 @@ from .datamodule import DataModule datamodule = DataModule("fold-7.json") +"""Shenzhen datamodule for computer-aided diagnosis (cross validation fold 7). + +See :py:class:`.shenzhen.datamodule.DataModule` for technical details. +""" diff --git a/src/ptbench/data/shenzhen/fold_8.py b/src/ptbench/data/shenzhen/fold_8.py index 1abbe7b2..0c5e6d22 100644 --- a/src/ptbench/data/shenzhen/fold_8.py +++ b/src/ptbench/data/shenzhen/fold_8.py @@ -5,3 +5,7 @@ from .datamodule import DataModule datamodule = DataModule("fold-8.json") +"""Shenzhen datamodule for computer-aided diagnosis (cross validation fold 8). + +See :py:class:`.shenzhen.datamodule.DataModule` for technical details. +""" diff --git a/src/ptbench/data/shenzhen/fold_9.py b/src/ptbench/data/shenzhen/fold_9.py index 71de470d..bb73ea16 100644 --- a/src/ptbench/data/shenzhen/fold_9.py +++ b/src/ptbench/data/shenzhen/fold_9.py @@ -5,3 +5,7 @@ from .datamodule import DataModule datamodule = DataModule("fold-9.json") +"""Shenzhen datamodule for computer-aided diagnosis (cross validation fold 9). + +See :py:class:`.shenzhen.datamodule.DataModule` for technical details. +""" diff --git a/src/ptbench/data/tbpoc/datamodule.py b/src/ptbench/data/tbpoc/datamodule.py index 31e2aac4..f564e232 100644 --- a/src/ptbench/data/tbpoc/datamodule.py +++ b/src/ptbench/data/tbpoc/datamodule.py @@ -19,20 +19,11 @@ from ..typing import Sample class RawDataLoader(_BaseRawDataLoader): - """A specialized raw-data-loader for the Shenzen dataset. - - Attributes - ---------- - - datadir - This variable contains the base directory where the database raw data - is stored. - - transform - Transforms that are always applied to the loaded raw images. - """ + """A specialized raw-data-loader for the Shenzen dataset.""" datadir: str + """This variable contains the base directory where the database raw data is + stored.""" def __init__(self, config_variable: str = "datadir.tbpoc"): self.datadir = load_rc().get( @@ -44,8 +35,7 @@ class RawDataLoader(_BaseRawDataLoader): Parameters ---------- - - sample: + sample A tuple containing the path suffix, within the dataset root folder, where to find the image to be loaded, and an integer, representing the sample label. @@ -53,8 +43,6 @@ class RawDataLoader(_BaseRawDataLoader): Returns ------- - - sample The sample representation """ # images from TBPOC are encoded as grayscale JPEGs, no need to @@ -76,8 +64,7 @@ class RawDataLoader(_BaseRawDataLoader): Parameters ---------- - - sample: + sample A tuple containing the path suffix, within the dataset root folder, where to find the image to be loaded, and an integer, representing the sample label. @@ -85,8 +72,6 @@ class RawDataLoader(_BaseRawDataLoader): Returns ------- - - label The integer label associated with the sample """ return sample[1] @@ -111,24 +96,24 @@ class DataModule(CachingDataModule): * Raw data input (on disk): - * JPEG 8-bit Grayscale images - * resolution: fixed to one of the cases above + * JPEG 8-bit Grayscale images + * resolution: fixed to one of the cases above * Output image: - * Transforms: + * Transforms: - * Load raw grayscale jpeg with :py:mod:`PIL` - * Remove black borders - * Convert to torch tensor - * Torch center cropping to get square image + * Load raw grayscale jpeg with :py:mod:`PIL` + * Remove black borders + * Convert to torch tensor + * Torch center cropping to get square image - * Final specifications: + * Final specifications: - * Grayscale, encoded as a single plane tensor, 32-bit floats, - square with varying resolutions (2048 x 2048 being the maximum), - but also depending on black borders' sizes on the input image. - * Labels: 0 (healthy), 1 (active tuberculosis) + * Grayscale, encoded as a single plane tensor, 32-bit floats, + square with varying resolutions (2048 x 2048 being the maximum), + but also depending on black borders' sizes on the input image. + * Labels: 0 (healthy), 1 (active tuberculosis) """ def __init__(self, split_filename: str): diff --git a/src/ptbench/data/tbpoc/fold_0.py b/src/ptbench/data/tbpoc/fold_0.py index 775f64cf..2beb07fd 100644 --- a/src/ptbench/data/tbpoc/fold_0.py +++ b/src/ptbench/data/tbpoc/fold_0.py @@ -7,5 +7,5 @@ from .datamodule import DataModule datamodule = DataModule("fold-0.json") """TB-POC dataset for TB detection (cross validation fold 0). -See :py:class:`DataModule` for technical details. +See :py:class:`.tbpoc.datamodule.DataModule` for technical details. """ diff --git a/src/ptbench/data/tbpoc/fold_1.py b/src/ptbench/data/tbpoc/fold_1.py index 6f0f137f..338d99b2 100644 --- a/src/ptbench/data/tbpoc/fold_1.py +++ b/src/ptbench/data/tbpoc/fold_1.py @@ -7,5 +7,5 @@ from .datamodule import DataModule datamodule = DataModule("fold-1.json") """TB-POC dataset for TB detection (cross validation fold 1). -See :py:class:`DataModule` for technical details. +See :py:class:`.tbpoc.datamodule.DataModule` for technical details. """ diff --git a/src/ptbench/data/tbpoc/fold_2.py b/src/ptbench/data/tbpoc/fold_2.py index 662fd32c..9df72b54 100644 --- a/src/ptbench/data/tbpoc/fold_2.py +++ b/src/ptbench/data/tbpoc/fold_2.py @@ -7,5 +7,5 @@ from .datamodule import DataModule datamodule = DataModule("fold-2.json") """TB-POC dataset for TB detection (cross validation fold 2). -See :py:class:`DataModule` for technical details. +See :py:class:`.tbpoc.datamodule.DataModule` for technical details. """ diff --git a/src/ptbench/data/tbpoc/fold_3.py b/src/ptbench/data/tbpoc/fold_3.py index c52b8c2e..514bf12c 100644 --- a/src/ptbench/data/tbpoc/fold_3.py +++ b/src/ptbench/data/tbpoc/fold_3.py @@ -7,5 +7,5 @@ from .datamodule import DataModule datamodule = DataModule("fold-3.json") """TB-POC dataset for TB detection (cross validation fold 3). -See :py:class:`DataModule` for technical details. +See :py:class:`.tbpoc.datamodule.DataModule` for technical details. """ diff --git a/src/ptbench/data/tbpoc/fold_4.py b/src/ptbench/data/tbpoc/fold_4.py index 6de0dc13..d4f87280 100644 --- a/src/ptbench/data/tbpoc/fold_4.py +++ b/src/ptbench/data/tbpoc/fold_4.py @@ -7,5 +7,5 @@ from .datamodule import DataModule datamodule = DataModule("fold-4.json") """TB-POC dataset for TB detection (cross validation fold 4). -See :py:class:`DataModule` for technical details. +See :py:class:`.tbpoc.datamodule.DataModule` for technical details. """ diff --git a/src/ptbench/data/tbpoc/fold_5.py b/src/ptbench/data/tbpoc/fold_5.py index bdca5a36..2df9a7ff 100644 --- a/src/ptbench/data/tbpoc/fold_5.py +++ b/src/ptbench/data/tbpoc/fold_5.py @@ -7,5 +7,5 @@ from .datamodule import DataModule datamodule = DataModule("fold-5.json") """TB-POC dataset for TB detection (cross validation fold 5). -See :py:class:`DataModule` for technical details. +See :py:class:`.tbpoc.datamodule.DataModule` for technical details. """ diff --git a/src/ptbench/data/tbpoc/fold_6.py b/src/ptbench/data/tbpoc/fold_6.py index c17ba0ba..5d4fd08a 100644 --- a/src/ptbench/data/tbpoc/fold_6.py +++ b/src/ptbench/data/tbpoc/fold_6.py @@ -7,5 +7,5 @@ from .datamodule import DataModule datamodule = DataModule("fold-6.json") """TB-POC dataset for TB detection (cross validation fold 6). -See :py:class:`DataModule` for technical details. +See :py:class:`.tbpoc.datamodule.DataModule` for technical details. """ diff --git a/src/ptbench/data/tbpoc/fold_7.py b/src/ptbench/data/tbpoc/fold_7.py index 4310f2f4..3b0b137f 100644 --- a/src/ptbench/data/tbpoc/fold_7.py +++ b/src/ptbench/data/tbpoc/fold_7.py @@ -7,5 +7,5 @@ from .datamodule import DataModule datamodule = DataModule("fold-7.json") """TB-POC dataset for TB detection (cross validation fold 7). -See :py:class:`DataModule` for technical details. +See :py:class:`.tbpoc.datamodule.DataModule` for technical details. """ diff --git a/src/ptbench/data/tbpoc/fold_8.py b/src/ptbench/data/tbpoc/fold_8.py index d7fa5d10..f0304467 100644 --- a/src/ptbench/data/tbpoc/fold_8.py +++ b/src/ptbench/data/tbpoc/fold_8.py @@ -7,5 +7,5 @@ from .datamodule import DataModule datamodule = DataModule("fold-8.json") """TB-POC dataset for TB detection (cross validation fold 8). -See :py:class:`DataModule` for technical details. +See :py:class:`.tbpoc.datamodule.DataModule` for technical details. """ diff --git a/src/ptbench/data/tbpoc/fold_9.py b/src/ptbench/data/tbpoc/fold_9.py index f37e1f36..327c7156 100644 --- a/src/ptbench/data/tbpoc/fold_9.py +++ b/src/ptbench/data/tbpoc/fold_9.py @@ -7,5 +7,5 @@ from .datamodule import DataModule datamodule = DataModule("fold-9.json") """TB-POC dataset for TB detection (cross validation fold 9). -See :py:class:`DataModule` for technical details. +See :py:class:`.tbpoc.datamodule.DataModule` for technical details. """ diff --git a/src/ptbench/data/tbx11k/datamodule.py b/src/ptbench/data/tbx11k/datamodule.py index 45785ddb..45cab752 100644 --- a/src/ptbench/data/tbx11k/datamodule.py +++ b/src/ptbench/data/tbx11k/datamodule.py @@ -47,17 +47,11 @@ finding locations, as described above. class RawDataLoader(_BaseRawDataLoader): - """A specialized raw-data-loader for the TBX11k dataset. - - Attributes - ---------- - - datadir - This variable contains the base directory where the database raw data - is stored. - """ + """A specialized raw-data-loader for the TBX11k dataset.""" datadir: str + """This variable contains the base directory where the database raw data is + stored.""" def __init__(self): self.datadir = load_rc().get( @@ -69,8 +63,7 @@ class RawDataLoader(_BaseRawDataLoader): Parameters ---------- - - sample: + sample A tuple containing the path suffix, within the dataset root folder, where to find the image to be loaded, an integer, representing the sample label, and possible radiological findings represented by @@ -79,8 +72,6 @@ class RawDataLoader(_BaseRawDataLoader): Returns ------- - - sample The sample representation """ image = PIL.Image.open(os.path.join(self.datadir, sample[0])) @@ -98,8 +89,7 @@ class RawDataLoader(_BaseRawDataLoader): Parameters ---------- - - sample: + sample A tuple containing the path suffix, within the dataset root folder, where to find the image to be loaded, an integer, representing the sample label, and possible radiological findings represented by @@ -108,8 +98,6 @@ class RawDataLoader(_BaseRawDataLoader): Returns ------- - - label The integer label associated with the sample """ return sample[1] @@ -121,8 +109,7 @@ class RawDataLoader(_BaseRawDataLoader): Parameters ---------- - - sample: + sample A tuple containing the path suffix, within the dataset root folder, where to find the image to be loaded, an integer, representing the sample label, and possible radiological findings represented by @@ -131,8 +118,6 @@ class RawDataLoader(_BaseRawDataLoader): Returns ------- - - annotations Bounding box annotations, if any available with the sample. """ return sample[2] if len(sample) > 2 else [] # type: ignore @@ -194,91 +179,55 @@ class DataModule(CachingDataModule): - imgs/tb/tb1104.png - imgs/tb/tb1143.png - - Original train dataset samples: - - Healthy: 3000 - - Sick (but no TB): 3000 - - Active TB only: 473 - - Latent TB only: 103 - - Both active and latent TB: 23 - - Unknown: 1 - - Total: 6600 - - - Original validation dataset samples: - - Healthy: 800 - - Sick (but no TB): 800 - - Latent TB only: 36 - - Active TB only: 157 - - Both active and latent TB: 7 - - Total: 1800 - - - Original test dataset samples: - - Unknown: 3302 - - Total: 3302 - - * Because the test set does not have annotations, we generate train, - validation and test datasets as such: + * Original train dataset samples: - - The original validation dataset becomes our test set. - - The original train dataset is split into new train and validation - datasets (validation ratio = 0.203 w.r.t. original train dataset size). - The selection of samples is stratified (see comments through our split - code, which is shipped alongside this file.) - - * Split v1 contains healthy subjects against active TB cases (total samples - = 4430): - - - ``train`` dataset samples: - - Healthy: 2390 - - Active TB only: 377 - - Total: 2767 - - - ``validation`` dataset samples: - - Healthy: 610 - - Active TB only: 96 - - Total: 706 + - Healthy: 3000 + - Sick (but no TB): 3000 + - Active TB only: 473 + - Latent TB only: 103 + - Both active and latent TB: 23 + - Unknown: 1 + - Total: 6600 - - ``test`` dataset samples: - - Healthy: 800 - - Active TB only: 157 - - Total: 957 + * Original validation dataset samples: - * Split v2 contains healthy, sick (no TB), and latent TB subjects against - active TB cases (total samples = 8369): + - Healthy: 800 + - Sick (but no TB): 800 + - Latent TB only: 36 + - Active TB only: 157 + - Both active and latent TB: 7 + - Total: 1800 - - ``train`` dataset samples: - - Healthy, Sick or Latent TB: 4864 - - Active TB only: 377 - - Total: 5241 + * Original test dataset samples: - - ``validation`` dataset samples: - - Healthy, Sick or Latent TB: 1239 - - Active TB only: 96 - - Total: 1335 + - Unknown: 3302 + - Total: 3302 - - ``test`` dataset samples: - - Healthy, Sick or Latent TB: 1636 - - Active TB only: 157 - - Total: 1793 + * Because the test set does not have annotations, we generated train, + validation and test datasets as such: + - The original validation dataset becomes our test set. + - The original train dataset is split into new train and validation + datasets (validation ratio = 0.203 w.r.t. original train dataset size). + The selection of samples is stratified (see comments through our split + code, which is shipped alongside this file.) Data specifications: - * Raw data input (on disk): - - * PNG images 8 bits RGB, 512 x 512 pixels + * Raw data input (on disk): PNG images 8 bits RGB, 512 x 512 pixels * Output image: - * Transforms: + * Transforms: - * Load raw PNG with :py:mod:`PIL` + - Load raw PNG with :py:mod:`PIL` - * Final specifications: + * Final specifications: - * RGB, encoded as a 3-plane tensor using 32-bit floats, square - (512x512 pixels) - * Labels: 0 (healthy, latent tb or sick but no tb depending on the - protocol), 1 (active tuberculosis) + - RGB, encoded as a 3-plane tensor using 32-bit floats, square + (512x512 pixels) + - Labels: 0 (healthy, latent tb or sick but no tb depending on the + protocol), 1 (active tuberculosis) """ def __init__(self, split_filename: str): diff --git a/src/ptbench/data/tbx11k/v1_healthy_vs_atb.py b/src/ptbench/data/tbx11k/v1_healthy_vs_atb.py index 9a5cf8e3..fe0de82a 100644 --- a/src/ptbench/data/tbx11k/v1_healthy_vs_atb.py +++ b/src/ptbench/data/tbx11k/v1_healthy_vs_atb.py @@ -5,3 +5,27 @@ from .datamodule import DataModule datamodule = DataModule("v1-healthy-vs-atb.json") +"""TBX11k dataset for TB detection. Split ``v1`` (healthy against active TB +cases). + +Split v1 contains healthy subjects against active TB cases (total samples = +4430): + +* ``train`` dataset samples: + + - Healthy: 2390 + - Active TB only: 377 + - Total: 2767 + +* ``validation`` dataset samples: + + - Healthy: 610 + - Active TB only: 96 + - Total: 706 + +* ``test`` dataset samples: + + - Healthy: 800 + - Active TB only: 157 + - Total: 957 +""" diff --git a/src/ptbench/data/tbx11k/v2_others_vs_atb.py b/src/ptbench/data/tbx11k/v2_others_vs_atb.py index ccb98222..5bee7bd0 100644 --- a/src/ptbench/data/tbx11k/v2_others_vs_atb.py +++ b/src/ptbench/data/tbx11k/v2_others_vs_atb.py @@ -5,3 +5,27 @@ from .datamodule import DataModule datamodule = DataModule("v2-others-vs-atb.json") +"""TBX11k dataset for TB detection. Split ``v1`` (everything else against +active TB cases). + +Split v2 contains healthy, sick (no TB), and latent TB subjects against +active TB cases (total samples = 8369): + +* ``train`` dataset samples: + + - Healthy, Sick or Latent TB: 4864 + - Active TB only: 377 + - Total: 5241 + +* ``validation`` dataset samples: + + - Healthy, Sick or Latent TB: 1239 + - Active TB only: 96 + - Total: 1335 + +* ``test`` dataset samples: + + - Healthy, Sick or Latent TB: 1636 + - Active TB only: 157 + - Total: 1793 +""" diff --git a/src/ptbench/data/typing.py b/src/ptbench/data/typing.py index 6f41b39e..c1df54c6 100644 --- a/src/ptbench/data/typing.py +++ b/src/ptbench/data/typing.py @@ -52,9 +52,9 @@ DatabaseSplit: typing.TypeAlias = collections.abc.Mapping[ ] """The definition of a database split. -A database split maps dataset (subset) names to sequences of objects -that, through :py:class:`RawDataLoader`s, eventually become -:py:class:`Sample`s in the processing pipeline. +A database split maps dataset (subset) names to sequences of objects that, +through a :py:class:`RawDataLoader`, eventually becomes a :py:data:`.Sample` in +the processing pipeline. """ ConcatDatabaseSplit: typing.TypeAlias = collections.abc.Mapping[ @@ -63,12 +63,11 @@ ConcatDatabaseSplit: typing.TypeAlias = collections.abc.Mapping[ ] """The definition of a complex database split composed of several other splits. -A database split maps dataset (subset) names to sequences of objects -that, through :py:class:`RawDataLoader`s, eventually become -:py:class:`Sample`s in the processing pipeline. Objects of this subtype -allow the construction of complex splits composed of cannibalized parts -of other splits. Each split may be assigned a different -:py:class:`RawDataLoader`. +A database split maps dataset (subset) names to sequences of objects that, +through a :py:class:`.RawDataLoader`, eventually becomes a :py:data:`.Sample` in +the processing pipeline. Objects of this subtype allow the construction of +complex splits composed of cannibalized parts of other splits. Each split may +be assigned a different :py:class:`.RawDataLoader`. """ diff --git a/src/ptbench/models/alexnet.py b/src/ptbench/models/alexnet.py index 5daed2a3..9096f608 100644 --- a/src/ptbench/models/alexnet.py +++ b/src/ptbench/models/alexnet.py @@ -27,7 +27,6 @@ class Alexnet(pl.LightningModule): Parameters ---------- - train_loss The loss to be used during the training. @@ -124,8 +123,7 @@ class Alexnet(pl.LightningModule): Parameters ---------- - - checkpoint: + checkpoint Loaded checkpoint """ checkpoint["normalizer"] = self.normalizer @@ -136,8 +134,7 @@ class Alexnet(pl.LightningModule): Parameters ---------- - - checkpoint: + checkpoint Loaded checkpoint """ logger.info("Restoring normalizer from checkpoint.") @@ -151,8 +148,7 @@ class Alexnet(pl.LightningModule): Parameters ---------- - - dataloader: :py:class:`torch.utils.data.DataLoader` + dataloader A torch Dataloader from which to compute the mean and std. Will not be used if the model is pretrained. """ diff --git a/src/ptbench/models/densenet.py b/src/ptbench/models/densenet.py index 0663b60b..97ebaf78 100644 --- a/src/ptbench/models/densenet.py +++ b/src/ptbench/models/densenet.py @@ -25,7 +25,6 @@ class Densenet(pl.LightningModule): Parameters ---------- - train_loss The loss to be used during the training. @@ -124,8 +123,7 @@ class Densenet(pl.LightningModule): Parameters ---------- - - checkpoint: + checkpoint Loaded checkpoint """ checkpoint["normalizer"] = self.normalizer @@ -136,8 +134,7 @@ class Densenet(pl.LightningModule): Parameters ---------- - - checkpoint: + checkpoint Loaded checkpoint """ logger.info("Restoring normalizer from checkpoint.") @@ -151,8 +148,7 @@ class Densenet(pl.LightningModule): Parameters ---------- - - dataloader: :py:class:`torch.utils.data.DataLoader` + dataloader A torch Dataloader from which to compute the mean and std. Will not be used if the model is pretrained. """ diff --git a/src/ptbench/models/normalizer.py b/src/ptbench/models/normalizer.py index 147efc7e..576f21cc 100644 --- a/src/ptbench/models/normalizer.py +++ b/src/ptbench/models/normalizer.py @@ -1,7 +1,7 @@ # SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> # # SPDX-License-Identifier: GPL-3.0-or-later -"""A network model that prefixes a subtract/divide step to any other module.""" +"""Functions to compute normalisation factors based on dataloaders.""" import torch import torch.nn diff --git a/src/ptbench/models/pasa.py b/src/ptbench/models/pasa.py index 3f10757f..5d6e20b4 100644 --- a/src/ptbench/models/pasa.py +++ b/src/ptbench/models/pasa.py @@ -31,7 +31,6 @@ class Pasa(pl.LightningModule): Parameters ---------- - train_loss The loss to be used during the training. @@ -204,8 +203,7 @@ class Pasa(pl.LightningModule): Parameters ---------- - - checkpoint: + checkpoint Loaded checkpoint """ checkpoint["normalizer"] = self.normalizer @@ -216,8 +214,7 @@ class Pasa(pl.LightningModule): Parameters ---------- - - checkpoint: + checkpoint Loaded checkpoint """ logger.info("Restoring normalizer from checkpoint.") @@ -228,7 +225,6 @@ class Pasa(pl.LightningModule): Parameters ---------- - dataloader A torch Dataloader from which to compute the mean and std """ diff --git a/src/ptbench/scripts/train_analysis.py b/src/ptbench/scripts/train_analysis.py index 4061164f..6fd623f8 100644 --- a/src/ptbench/scripts/train_analysis.py +++ b/src/ptbench/scripts/train_analysis.py @@ -24,12 +24,14 @@ def create_figures(df: pandas.DataFrame) -> list[plt.figure]: It is assumed that some metric names are of the form <metric>/<subset>. All subsets for a metric will be displayed on the same figure. + Parameters ---------- df: Pandas dataframe containing the data to plot. + Returns ------- @@ -115,17 +117,7 @@ def train_analysis( **_, ) -> None: """Creates a plot for each metric in the training logs and saves them in a - pdf file. - - Parameters - ---------- - - logdir: - Directory containing tensorboard event files. - - output_pdf: - The pdf file in which to save the plots. - """ + pdf file.""" from matplotlib.backends.backend_pdf import PdfPages diff --git a/src/ptbench/utils/checkpointer.py b/src/ptbench/utils/checkpointer.py index 67190e2f..88cdfbb7 100644 --- a/src/ptbench/utils/checkpointer.py +++ b/src/ptbench/utils/checkpointer.py @@ -17,23 +17,25 @@ def get_checkpoint( Can return the best or last checkpoint, or a checkpoint at a specific path. Ensures the checkpoint exists, raising an error if it is not the case. - If resume_from is None, checks the output directory if a checkpoint already exists and returns it. - If no checkpoint is found, returns None. + If resume_from is ``None``, checks the output directory if a checkpoint + already exists and returns it. If no checkpoint is found, returns ``None``. + Parameters ---------- - output_folder: + output_folder Directory in which checkpoints are stored. - resume_from: + resume_from Which model to get. Can be one of "best", "last", or a path to a checkpoint. If None, gets the last checkpoint if it exists, otherwise returns None + Returns ------- - checkpoint_file: + checkpoint_file Path to the requested checkpoint or None. """ last_checkpoint_path = os.path.join(output_folder, "model_final_epoch.ckpt") -- GitLab