diff --git a/doc/api.rst b/doc/api.rst index 0114d9ff62b83dfc2178ade21426a7a2cb334dfe..41fc340f311746d5585e6448126fbfbafe853899 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -22,41 +22,49 @@ Auxiliary classes and methods to define raw dataset iterators. .. autosummary:: :toctree: api/data - ptbench.data.sample - ptbench.data.dataset - ptbench.data.utils - ptbench.data.loader - ptbench.data.transforms - ptbench.configs.datasets + ptbench.data.augmentations + ptbench.data.datamodule + ptbench.data.image_utils + ptbench.data.split + ptbench.data.typing +.. _ptbench.api.rawdata: + +Basic Datamodules +----------------- + +Pytorch_ :py:class:`torch.utils.data.DataLoader` access through lightning_ +:py:class:`lightning.pytorch.core.LightningDataModule`. + +.. autosummary:: + :toctree: api/data/raw + + ptbench.data.hivtb.datamodule + ptbench.data.indian.datamodule + ptbench.data.montgomery.datamodule + ptbench.data.nih_cxr14.datamodule + ptbench.data.padchest.datamodule + ptbench.data.shenzhen.datamodule + ptbench.data.tbpoc.datamodule + ptbench.data.tbx11k.datamodule -.. _ptbench.api.data.raw: -Raw Dataset Access ------------------- +.. _ptbench.api.remixdata: -Direct data-access through iterators. +Remixed Datamodules +------------------- + +Pytorch_ :py:class:`torch.utils.data.DataLoader` access through lightning_ +:py:class:`lightning.pytorch.core.LightningDataModule`. .. autosummary:: :toctree: api/data/raw - ptbench.data.hivtb_RS - ptbench.data.tbpoc - ptbench.data.montgomery_RS - ptbench.data.padchest - ptbench.data.hivtb - ptbench.data.indian_RS - ptbench.data.shenzhen_RS - ptbench.data.tbpoc_RS - ptbench.data.shenzhen - ptbench.data.montgomery - ptbench.data.indian - ptbench.data.nih_cxr14_re - ptbench.data.padchest_RS - ptbench.data.tbx11k_simplified - ptbench.data.tbx11k_simplified_RS - ptbench.data.tbx11k_simplified_v2 - ptbench.data.tbx11k_simplified_v2_RS + ptbench.data.montgomery_shenzhen.datamodule + ptbench.data.montgomery_shenzhen_indian.datamodule + ptbench.data.montgomery_shenzhen_indian_tbx11k.datamodule + ptbench.data.montgomery_shenzhen_indian_padchest.datamodule + ptbench.data.nih_cxr14_padchest.datamodule .. _ptbench.api.models: @@ -69,12 +77,11 @@ CNN and other models implemented. .. autosummary:: :toctree: api/models + ptbench.models.pasa ptbench.models.alexnet ptbench.models.densenet - ptbench.models.densenet_rs - ptbench.models.logistic_regression ptbench.models.normalizer - ptbench.models.pasa + ptbench.models.logistic_regression ptbench.models.signs_to_tb @@ -88,11 +95,14 @@ Functions to actuate on the data. .. autosummary:: :toctree: api/engine + ptbench.engine.device + ptbench.engine.callbacks ptbench.engine.trainer ptbench.engine.predictor ptbench.engine.evaluator + .. _ptbench.api.utils: Various utilities @@ -106,6 +116,7 @@ Reusable auxiliary functions. ptbench.utils.checkpointer ptbench.utils.download ptbench.utils.grad_cams + ptbench.utils.image ptbench.utils.measure ptbench.utils.model_serialization ptbench.utils.model_zoo @@ -114,6 +125,7 @@ Reusable auxiliary functions. ptbench.utils.resources ptbench.utils.summary ptbench.utils.table + ptbench.utils.tensorboard .. include:: links.rst diff --git a/doc/conf.py b/doc/conf.py index 49b7ceacf910e8ea1e1d2c5c602654a678b64908..b69ef1b9b5fe2b3af962ea6bc41e7cd4df7d1c00 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -120,6 +120,7 @@ auto_intersphinx_packages = [ "pandas", "pillow", "psutil", + "scipy", "torch", "torchvision", "lightning", diff --git a/doc/config.rst b/doc/config.rst index b0774e352c5dba22fa007fae73b55466684afe0e..34025f7f70d8a05299c67491c1115c06674c67ff 100644 --- a/doc/config.rst +++ b/doc/config.rst @@ -11,24 +11,26 @@ This module contains preset configurations for baseline CNN architectures and datamodules. +.. _ptbench.config.models: + Models ====== .. autosummary:: - :toctree: api/configs/models + :toctree: api/models/config :template: config.rst - ptbench.configs.models.alexnet - ptbench.configs.models.alexnet_pretrained - ptbench.configs.models.densenet - ptbench.configs.models.densenet_pretrained - ptbench.configs.models.densenet_rs - ptbench.configs.models.logistic_regression - ptbench.configs.models.pasa - ptbench.configs.models.signs_to_tb + ptbench.models.config.alexnet + ptbench.models.config.alexnet_pretrained + ptbench.models.config.densenet + ptbench.models.config.densenet_pretrained + ptbench.models.config.densenet_rs + ptbench.models.config.logistic_regression + ptbench.models.config.pasa + ptbench.models.config.signs_to_tb -.. _ptbench.configs.datamodules: +.. _ptbench.config.datamodules: Preset Datamodules ================== @@ -39,7 +41,7 @@ input standard, if applicable. Use these datamodules for training and evaluatin your models. .. autosummary:: - :toctree: api/configs/datamodules + :toctree: api/data/config :template: config.rst ptbench.data.indian.default @@ -51,7 +53,7 @@ your models. ptbench.data.tbx11k.v2_others_vs_atb -.. _ptbench.configs.datamodules.remix: +.. _ptbench.config.datamodules.remix: Remix Datamodules ================= @@ -59,7 +61,7 @@ Remix Datamodules We provide some aggregated datamodules to facilitate cross-database development. .. autosummary:: - :toctree: api/configs/datamodules + :toctree: api/data/config :template: config.rst ptbench.data.montgomery_shenzhen.default @@ -70,7 +72,7 @@ We provide some aggregated datamodules to facilitate cross-database development. ptbench.data.nih_cxr14_padchest.idiap -.. _ptbench.configs.datamodules.folds: +.. _ptbench.config.datamodules.folds: Cross-validation Datamodules ============================ @@ -82,7 +84,7 @@ datamodules. Nine other folds are available for every configuration (from 1 to .. autosummary:: - :toctree: api/configs/datamodules + :toctree: api/data/config :template: config.rst ptbench.data.hivtb.fold_0 diff --git a/doc/links.rst b/doc/links.rst index 92ecafd5a8e0fd128efaaccd637d2b2731f2f788..d773d1aec33e5f0a5f802b6934160a9bfe51c96c 100644 --- a/doc/links.rst +++ b/doc/links.rst @@ -11,6 +11,7 @@ .. _pip: https://pip.pypa.io/en/stable/ .. _mamba: https://mamba.readthedocs.io/en/latest/index.html .. _pytorch: https://pytorch.org +.. _lightning: https://lightning.ai .. Raw data websites .. _montgomery: https://lhncbc.nlm.nih.gov/publication/pub9931 diff --git a/doc/results/index.rst b/doc/results/index.rst index 29ad8a10c4c54f3b8164fd04acab529f30e0ad29..0217fe6c798c9d1d0ad388f363313fa7c2819c42 100644 --- a/doc/results/index.rst +++ b/doc/results/index.rst @@ -113,37 +113,37 @@ Thresholds used: :scale: 50% :alt: Testing sets ROC curves for Pasa model trained on normalized-kfold MC - :py:mod:`Pasa <ptbench.configs.models.pasa>`: Pasa trained on normalized-kfold MC + :py:mod:`Pasa <ptbench.models.config.pasa>`: Pasa trained on normalized-kfold MC - .. figure:: img/compare_pasa_mc_ch_kfold_500.jpg :align: center :scale: 50% :alt: Testing sets ROC curves for Pasa model trained on normalized-kfold MC-CH - :py:mod:`Pasa <ptbench.configs.models.pasa>`: Pasa trained on normalized-kfold MC-CH + :py:mod:`Pasa <ptbench.models.config.pasa>`: Pasa trained on normalized-kfold MC-CH - .. figure:: img/compare_pasa_mc_ch_in_kfold_500.jpg :align: center :scale: 50% :alt: Testing sets ROC curves for Pasa model trained on normalized-kfold MC-CH-IN - :py:mod:`Pasa <ptbench.configs.models.pasa>`: Pasa trained on normalized-kfold MC-CH-IN + :py:mod:`Pasa <ptbench.models.config.pasa>`: Pasa trained on normalized-kfold MC-CH-IN * - .. figure:: img/compare_densenet_mc_kfold_2000.jpg :align: center :scale: 50% :alt: Testing sets ROC curves for DenseNet model trained on normalized-kfold MC - :py:mod:`DenseNet <ptbench.configs.models.densenet>`: DenseNet trained on normalized-kfold MC + :py:mod:`DenseNet <ptbench.models.config.densenet>`: DenseNet trained on normalized-kfold MC - .. figure:: img/compare_densenet_mc_ch_kfold_2000.jpg :align: center :scale: 50% :alt: Testing sets ROC curves for DenseNet model trained on normalized-kfold MC-CH - :py:mod:`DenseNet <ptbench.configs.models.densenet>`: DenseNet trained on normalized-kfold MC-CH + :py:mod:`DenseNet <ptbench.models.config.densenet>`: DenseNet trained on normalized-kfold MC-CH - .. figure:: img/compare_densenet_mc_ch_in_kfold_2000.jpg :align: center :scale: 50% :alt: Testing sets ROC curves for DenseNet model trained on normalized-kfold MC-CH-IN - :py:mod:`DenseNet <ptbench.configs.models.densenet>`: DenseNet trained on normalized-kfold MC-CH-IN + :py:mod:`DenseNet <ptbench.models.config.densenet>`: DenseNet trained on normalized-kfold MC-CH-IN DenseNet-121 (pretrained on ImageNet) """"""""""""""""""""""""""""""""""""" @@ -180,19 +180,19 @@ Thresholds used: :scale: 50% :alt: Testing sets ROC curves for DenseNet model trained on normalized-kfold MC - :py:mod:`DenseNet <ptbench.configs.models.densenet>` DenseNet trained on normalized-kfold MC + :py:mod:`DenseNet <ptbench.models.config.densenet>` DenseNet trained on normalized-kfold MC - .. figure:: img/compare_densenetpreIN_mc_ch_kfold_600.jpg :align: center :scale: 50% :alt: Testing sets ROC curves for DenseNet model trained on normalized-kfold MC-CH - :py:mod:`DenseNet <ptbench.configs.models.densenet>` DenseNet trained on normalized-kfold MC-CH + :py:mod:`DenseNet <ptbench.models.config.densenet>` DenseNet trained on normalized-kfold MC-CH - .. figure:: img/compare_densenetpreIN_mc_ch_ch_kfold_600.jpg :align: center :scale: 50% :alt: Testing sets ROC curves for DenseNet model trained on normalized-kfold MC-CH-IN - :py:mod:`DenseNet <ptbench.configs.models.densenet>` DenseNet trained on normalized-kfold MC-CH-IN + :py:mod:`DenseNet <ptbench.models.config.densenet>` DenseNet trained on normalized-kfold MC-CH-IN Logistic Regression Classifier """""""""""""""""""""""""""""" @@ -229,19 +229,19 @@ Thresholds used: :scale: 50% :alt: Testing sets ROC curves for LogReg model trained on normalized-kfold MC - :py:mod:`LogReg <ptbench.configs.models.logistic_regression>`: LogReg trained on normalized-kfold MC + :py:mod:`LogReg <ptbench.models.config.logistic_regression>`: LogReg trained on normalized-kfold MC - .. figure:: img/compare_logreg_mc_ch_kfold_100.jpg :align: center :scale: 50% :alt: Testing sets ROC curves for LogReg model trained on normalized-kfold MC-CH - :py:mod:`LogReg <ptbench.configs.models.logistic_regression>`: LogReg trained on normalized-kfold MC-CH + :py:mod:`LogReg <ptbench.models.config.logistic_regression>`: LogReg trained on normalized-kfold MC-CH - .. figure:: img/compare_logreg_mc_ch_in_kfold_100.jpg :align: center :scale: 50% :alt: Testing sets ROC curves for LogReg model trained on normalized-kfold MC-CH-IN - :py:mod:`LogReg <ptbench.configs.models.logistic_regression>`: LogReg trained on normalized-kfold MC-CH-IN + :py:mod:`LogReg <ptbench.models.config.logistic_regression>`: LogReg trained on normalized-kfold MC-CH-IN DenseNet-121 (pretrained on ImageNet and NIH CXR14) """"""""""""""""""""""""""""""""""""""""""""""""""" @@ -278,19 +278,19 @@ Thresholds used: :scale: 50% :alt: Testing sets ROC curves for DenseNet model trained on normalized-kfold MC (pretrained on NIH) - :py:mod:`DenseNet <ptbench.configs.models.densenet>`: DenseNet trained on normalized-kfold MC (pretrained on NIH) + :py:mod:`DenseNet <ptbench.models.config.densenet>`: DenseNet trained on normalized-kfold MC (pretrained on NIH) - .. figure:: img/compare_densenetpre_mc_ch_kfold_300.jpg :align: center :scale: 50% :alt: Testing sets ROC curves for DenseNet model trained on normalized-kfold MC-CH (pretrained on NIH) - :py:mod:`DenseNet <ptbench.configs.models.densenet>`: DenseNet trained on normalized-kfold MC-CH (pretrained on NIH) + :py:mod:`DenseNet <ptbench.models.config.densenet>`: DenseNet trained on normalized-kfold MC-CH (pretrained on NIH) - .. figure:: img/compare_densenetpre_mc_ch_in_kfold_300.jpg :align: center :scale: 50% :alt: Testing sets ROC curves for DenseNet model trained on normalized-kfold MC-CH-IN (pretrained on NIH) - :py:mod:`DenseNet <ptbench.configs.models.densenet>`: DenseNet trained on normalized-kfold MC-CH-IN (pretrained on NIH) + :py:mod:`DenseNet <ptbench.models.config.densenet>`: DenseNet trained on normalized-kfold MC-CH-IN (pretrained on NIH) Global sensitivity analysis (relevance) diff --git a/doc/usage/evaluation.rst b/doc/usage/evaluation.rst index d8b15040228d0781707073f9e538dd0949608246..8d98ca924873c9dec0c6781641541cceb9f36ee7 100644 --- a/doc/usage/evaluation.rst +++ b/doc/usage/evaluation.rst @@ -35,8 +35,8 @@ the pre-trained model. .. tip:: An option to generate grad-CAMs is available for the :py:mod:`DensenetRS - <ptbench.configs.models_datasets.densenet_rs>` model. To activate it, use - the ``--grad-cams`` argument. + <ptbench.models.config.densenet_rs>` model. To activate it, use the + ``--grad-cams`` argument. .. tip:: diff --git a/pyproject.toml b/pyproject.toml index 56544f738ff8e0c10580844c1506d7e6ad73b6a6..67bd370c5e8827b4d2e2a38e24db8ee3272057fb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -70,13 +70,13 @@ ptbench = "ptbench.scripts.cli:cli" [project.entry-points."ptbench.config"] # models -pasa = "ptbench.configs.models.pasa" -signs-to-tb = "ptbench.configs.models.signs_to_tb" -logistic-regression = "ptbench.configs.models.logistic_regression" -alexnet = "ptbench.configs.models.alexnet" -alexnet-pretrained = "ptbench.configs.models.alexnet_pretrained" -densenet = "ptbench.configs.models.densenet" -densenet-pretrained = "ptbench.configs.models.densenet_pretrained" +pasa = "ptbench.models.config.pasa" +signs-to-tb = "ptbench.models.config.signs_to_tb" +logistic-regression = "ptbench.models.config.logistic_regression" +alexnet = "ptbench.models.config.alexnet" +alexnet-pretrained = "ptbench.models.config.alexnet_pretrained" +densenet = "ptbench.models.config.densenet" +densenet-pretrained = "ptbench.models.config.densenet_pretrained" # montgomery dataset (and cross-validation folds) montgomery = "ptbench.data.montgomery.default" diff --git a/src/ptbench/data/datamodule.py b/src/ptbench/data/datamodule.py index 0785e2332d61978373dea1a9b536559848a5c117..0ab3c36caa3692c86750a70082e36da1a1d8c71d 100644 --- a/src/ptbench/data/datamodule.py +++ b/src/ptbench/data/datamodule.py @@ -36,15 +36,12 @@ def _sample_size_bytes(s: Sample) -> int: Parameters ---------- - s The sample to be analyzed Returns ------- - - size The size in bytes occupied by this sample """ @@ -74,7 +71,6 @@ class _DelayedLoadingDataset(Dataset): Parameters ---------- - raw_dataset An iterable containing the raw dataset samples representing one of the database split datasets. @@ -132,7 +128,6 @@ def _apply_loader_and_transforms( Parameters ---------- - info The sample information, as loaded from its raw dataset dictionary @@ -147,8 +142,6 @@ def _apply_loader_and_transforms( Returns ------- - - sample The loaded and transformed sample. """ sample = load(info) @@ -165,7 +158,6 @@ class _CachedDataset(Dataset): Parameters ---------- - raw_dataset An iterable containing the raw dataset samples representing one of the database split datasets. @@ -243,7 +235,6 @@ class _ConcatDataset(Dataset): Parameters ---------- - datasets An iterable over pre-instantiated datasets. """ @@ -323,8 +314,8 @@ def _make_balanced_random_sampler( Dataset 2 proportionally less likely. This function assumes targets are stored on a dictionary entry named - ``target`` inside the metadata information for the :py:type:``Sample``, and - that its value is integer. + ``target`` inside the metadata information for the + :py:data:`.typing.Sample`, and that its value is integer. We then instantiate a pytorch sampler using the inverse probabilities (the more samples of a class, the less likely it becomes to be sampled. @@ -332,7 +323,6 @@ def _make_balanced_random_sampler( Parameters ---------- - dataset An instance of torch Dataset. :py:class:`torch.utils.data.ConcatDataset` are supported. @@ -344,15 +334,12 @@ def _make_balanced_random_sampler( Returns ------- - - sampler A sampler, to be used in a dataloader equipped with the same dataset used to calculate the relative sample weights. Raises ------ - RuntimeError If requested to balance a dataset (single, not-concatenated) without an existing target. @@ -434,23 +421,22 @@ class ConcatDataModule(lightning.LightningDataModule): Parameters ---------- - splits A dictionary that contains string keys representing dataset names, and values that are iterables over a 2-tuple containing an iterable over arbitrary, user-configurable sample representations (potentially on - disk or permanent storage), and :py:class:`RawDataLoader` (or "sample") - loader objects, which concretely implement a mechanism to load such - samples in memory, from permanent storage. + disk or permanent storage), and :py:class:`.typing.RawDataLoader` (or + "sample") loader objects, which concretely implement a mechanism to + load such samples in memory, from permanent storage. Sample representations on permanent storage may be of any iterable format (e.g. list, dictionary, etc.), for as long as the assigned - :py:class:`RawDataLoader` can properly handle it. + :py:class:`.typing.RawDataLoader` can properly handle it. .. tip:: To check the split and the loader function works correctly, you may - use :py:func:`..dataset.check_database_split_loading`. + use :py:func:`.split.check_database_split_loading`. This class expects at least one entry called ``train`` to exist in the input dictionary. Optional entries are ``validation``, and ``test``. @@ -507,24 +493,24 @@ class ConcatDataModule(lightning.LightningDataModule): multiprocessing data loading. Set to 0 to enable as many data loading instances as processing cores as available in the system. Set to >= 1 to enable that many multiprocessing instances for data loading. - - - Attributes - ---------- - - model_transforms - A list of transforms (torch modules) that will be applied after - raw-data-loading, and just before data is fed into the model or - eventual data-augmentation transformations for all data loaders - produced by this data module. This part of the pipeline receives data - as output by the raw-data-loader, or model-related transforms (e.g. - resize adaptions), if any is specified. If data is cached, it is - cached **after** model-transforms are applied, as that is a potential - memory saver (e.g., if it contains a resizing operation to smaller - images). """ - DatasetDictionary = dict[str, Dataset] + DatasetDictionary: typing.TypeAlias = dict[str, Dataset] + """A dictionary of datasets mapping names to actual datasets.""" + + model_transforms: list[Transform] | None + """Transforms required to fit data into the model. + + A list of transforms (torch modules) that will be applied after raw- + data-loading. and just before data is fed into the model or eventual + data-augmentation transformations for all data loaders produced by + this data module. This part of the pipeline receives data as output + by the raw-data-loader, or model-related transforms (e.g. resize + adaptions), if any is specified. If data is cached, it is cached + **after** model-transforms are applied, as that is a potential + memory saver (e.g., if it contains a resizing operation to smaller + images). + """ def __init__( self, @@ -653,7 +639,6 @@ class ConcatDataModule(lightning.LightningDataModule): Parameters ---------- - batch_size Number of samples in every **training** batch (this parameter affects memory requirements for the network). If the number of samples in the @@ -696,7 +681,6 @@ class ConcatDataModule(lightning.LightningDataModule): Parameters ---------- - name Name of the dataset to setup. """ @@ -769,7 +753,6 @@ class ConcatDataModule(lightning.LightningDataModule): Parameters ---------- - stage Name of the stage to which the setup is applicable. Can be one of ``fit``, ``validate``, ``test`` or ``predict``. Each stage @@ -808,7 +791,6 @@ class ConcatDataModule(lightning.LightningDataModule): Parameters ---------- - stage Name of the stage to which the teardown is applicable. Can be one of ``fit``, ``validate``, ``test`` or ``predict``. Each stage @@ -903,19 +885,19 @@ class CachingDataModule(ConcatDataModule): Parameters ---------- - database_split A dictionary that contains string keys representing dataset names, and values that are iterables over sample representations (potentially on - disk). These objects are passed to an unique :py:class:`RawDataLoader` - for loading the :py:class:`Sample` data (and metadata) in memory. It + disk). These objects are passed to an unique + :py:class:`.typing.RawDataLoader` for loading the + :py:data:`.typing.Sample` data (and metadata) in memory. It therefore assumes the whole split is homogeneous and can be loaded in the same way. .. tip:: To check the split and the loader function works correctly, you may - use :py:func:`..dataset.check_database_split_loading`. + use :py:func:`.split.check_database_split_loading`. This class expects at least one entry called ``train`` to exist in the input dictionary. Optional entries are ``validation``, and ``test``. diff --git a/src/ptbench/data/hivtb/datamodule.py b/src/ptbench/data/hivtb/datamodule.py index b5b84ec434ea4ce4145119bf565d0d8ee8a091d9..2f1dcb855033cc5bb182c571dad2d3e1932f6e82 100644 --- a/src/ptbench/data/hivtb/datamodule.py +++ b/src/ptbench/data/hivtb/datamodule.py @@ -19,17 +19,11 @@ from ..typing import Sample class RawDataLoader(_BaseRawDataLoader): - """A specialized raw-data-loader for the HIV-TB dataset. - - Attributes - ---------- - - datadir - This variable contains the base directory where the database raw data - is stored. - """ + """A specialized raw-data-loader for the HIV-TB dataset.""" datadir: str + """This variable contains the base directory where the database raw data is + stored.""" def __init__(self): self.datadir = load_rc().get( @@ -41,8 +35,7 @@ class RawDataLoader(_BaseRawDataLoader): Parameters ---------- - - sample: + sample A tuple containing the path suffix, within the dataset root folder, where to find the image to be loaded, and an integer, representing the sample label. @@ -50,8 +43,6 @@ class RawDataLoader(_BaseRawDataLoader): Returns ------- - - sample The sample representation """ image = PIL.Image.open(os.path.join(self.datadir, sample[0])).convert( @@ -73,8 +64,7 @@ class RawDataLoader(_BaseRawDataLoader): Parameters ---------- - - sample: + sample A tuple containing the path suffix, within the dataset root folder, where to find the image to be loaded, and an integer, representing the sample label. @@ -82,8 +72,6 @@ class RawDataLoader(_BaseRawDataLoader): Returns ------- - - label The integer label associated with the sample """ return sample[1] @@ -108,23 +96,23 @@ class DataModule(CachingDataModule): * Raw data input (on disk): - * BMP (BMP3) and JPEG grayscale images encoded as 8-bit RGB, with - varying resolution + * BMP (BMP3) and JPEG grayscale images encoded as 8-bit RGB, with + varying resolution * Output image: - * Transforms: + * Transforms: - * Load raw BMP or JPEG with :py:mod:`PIL` - * Remove black borders - * Convert to torch tensor - * Torch center cropping to get square image + * Load raw BMP or JPEG with :py:mod:`PIL` + * Remove black borders + * Convert to torch tensor + * Torch center cropping to get square image * Final specifications - * Grayscale, encoded as a single plane tensor, 32-bit floats, - square at 2048 x 2048 pixels - * Labels: 0 (healthy), 1 (active tuberculosis) + * Grayscale, encoded as a single plane tensor, 32-bit floats, + square at 2048 x 2048 pixels + * Labels: 0 (healthy), 1 (active tuberculosis) """ def __init__(self, split_filename: str): diff --git a/src/ptbench/data/hivtb/fold_0.py b/src/ptbench/data/hivtb/fold_0.py index 57d77952ad0f012f4c7224f38dc293aa58d72dcd..6919907d05c598567e376abbc7f6256de422d086 100644 --- a/src/ptbench/data/hivtb/fold_0.py +++ b/src/ptbench/data/hivtb/fold_0.py @@ -7,5 +7,5 @@ from .datamodule import DataModule datamodule = DataModule("fold-0.json") """HIV-TB dataset for TB detection (cross validation fold 0). -See :py:class:`DataModule` for technical details. +See :py:class:`.hivtb.datamodule.DataModule` for technical details. """ diff --git a/src/ptbench/data/hivtb/fold_1.py b/src/ptbench/data/hivtb/fold_1.py index c91a968f500204bd1fa30e43e168dbf3e7f0edab..1f4b1dd05dfa9d1bb1265e083aa268fdad84bd60 100644 --- a/src/ptbench/data/hivtb/fold_1.py +++ b/src/ptbench/data/hivtb/fold_1.py @@ -7,5 +7,5 @@ from .datamodule import DataModule datamodule = DataModule("fold-1.json") """HIV-TB dataset for TB detection (cross validation fold 1). -See :py:class:`DataModule` for technical details. +See :py:class:`.hivtb.datamodule.DataModule` for technical details. """ diff --git a/src/ptbench/data/hivtb/fold_2.py b/src/ptbench/data/hivtb/fold_2.py index 323e80a02a0b44b5691d13abc971679182e2d97f..9c9fbe653b9d16e7a1884886a664560a8c7b50c4 100644 --- a/src/ptbench/data/hivtb/fold_2.py +++ b/src/ptbench/data/hivtb/fold_2.py @@ -7,5 +7,5 @@ from .datamodule import DataModule datamodule = DataModule("fold-2.json") """HIV-TB dataset for TB detection (cross validation fold 2). -See :py:class:`DataModule` for technical details. +See :py:class:`.hivtb.datamodule.DataModule` for technical details. """ diff --git a/src/ptbench/data/hivtb/fold_3.py b/src/ptbench/data/hivtb/fold_3.py index 1eed4c056648bce88f174ccbce8a71efe69fc136..ef07c5911afee96ebddcfc1778de93c5688618af 100644 --- a/src/ptbench/data/hivtb/fold_3.py +++ b/src/ptbench/data/hivtb/fold_3.py @@ -7,5 +7,5 @@ from .datamodule import DataModule datamodule = DataModule("fold-3.json") """HIV-TB dataset for TB detection (cross validation fold 3). -See :py:class:`DataModule` for technical details. +See :py:class:`.hivtb.datamodule.DataModule` for technical details. """ diff --git a/src/ptbench/data/hivtb/fold_4.py b/src/ptbench/data/hivtb/fold_4.py index 9cfa6186d6dc7d44f8bcfa56d7c978e7bf346c54..6683006d46bfd818184a8e483a182a058da4cdc9 100644 --- a/src/ptbench/data/hivtb/fold_4.py +++ b/src/ptbench/data/hivtb/fold_4.py @@ -7,5 +7,5 @@ from .datamodule import DataModule datamodule = DataModule("fold-4.json") """HIV-TB dataset for TB detection (cross validation fold 4). -See :py:class:`DataModule` for technical details. +See :py:class:`.hivtb.datamodule.DataModule` for technical details. """ diff --git a/src/ptbench/data/hivtb/fold_5.py b/src/ptbench/data/hivtb/fold_5.py index 591fef3732b522569a92082cb7e3c208c16bf2da..cf67833ade85503f8f9d3322969c155fd98f1f6a 100644 --- a/src/ptbench/data/hivtb/fold_5.py +++ b/src/ptbench/data/hivtb/fold_5.py @@ -7,5 +7,5 @@ from .datamodule import DataModule datamodule = DataModule("fold-5.json") """HIV-TB dataset for TB detection (cross validation fold 5). -See :py:class:`DataModule` for technical details. +See :py:class:`.hivtb.datamodule.DataModule` for technical details. """ diff --git a/src/ptbench/data/hivtb/fold_6.py b/src/ptbench/data/hivtb/fold_6.py index fb5e1614b349779d42771bd165a9a1d96c6cb83d..94614fc4887046c671e4b395e070d4e38e5a7143 100644 --- a/src/ptbench/data/hivtb/fold_6.py +++ b/src/ptbench/data/hivtb/fold_6.py @@ -7,5 +7,5 @@ from .datamodule import DataModule datamodule = DataModule("fold-6.json") """HIV-TB dataset for TB detection (cross validation fold 6). -See :py:class:`DataModule` for technical details. +See :py:class:`.hivtb.datamodule.DataModule` for technical details. """ diff --git a/src/ptbench/data/hivtb/fold_7.py b/src/ptbench/data/hivtb/fold_7.py index d64db4837f24058d34b2daf4c8383595aee7be21..259446c884f32d1d4f2f0c0e748a448d62a756e4 100644 --- a/src/ptbench/data/hivtb/fold_7.py +++ b/src/ptbench/data/hivtb/fold_7.py @@ -7,5 +7,5 @@ from .datamodule import DataModule datamodule = DataModule("fold-7.json") """HIV-TB dataset for TB detection (cross validation fold 7). -See :py:class:`DataModule` for technical details. +See :py:class:`.hivtb.datamodule.DataModule` for technical details. """ diff --git a/src/ptbench/data/hivtb/fold_8.py b/src/ptbench/data/hivtb/fold_8.py index 8a0f87d10c934f08249ed4f0206c09b5bbc6a7a9..d243db082cdbc758308cffd074a449e54529dfce 100644 --- a/src/ptbench/data/hivtb/fold_8.py +++ b/src/ptbench/data/hivtb/fold_8.py @@ -7,5 +7,5 @@ from .datamodule import DataModule datamodule = DataModule("fold-8.json") """HIV-TB dataset for TB detection (cross validation fold 8). -See :py:class:`DataModule` for technical details. +See :py:class:`.hivtb.datamodule.DataModule` for technical details. """ diff --git a/src/ptbench/data/hivtb/fold_9.py b/src/ptbench/data/hivtb/fold_9.py index d92de50e75cd18ea9b99d1bdb010f6f88872b9cc..340bc66169660a56f4007dac763414f2e0bf2eb4 100644 --- a/src/ptbench/data/hivtb/fold_9.py +++ b/src/ptbench/data/hivtb/fold_9.py @@ -7,5 +7,5 @@ from .datamodule import DataModule datamodule = DataModule("fold-9.json") """HIV-TB dataset for TB detection (cross validation fold 9). -See :py:class:`DataModule` for technical details. +See :py:class:`.hivtb.datamodule.DataModule` for technical details. """ diff --git a/src/ptbench/data/indian/datamodule.py b/src/ptbench/data/indian/datamodule.py index f6017cad4e7deac52c74c8478c3697f64dffa9c7..ee53da8045ba01d5434428665c64f1a830197b56 100644 --- a/src/ptbench/data/indian/datamodule.py +++ b/src/ptbench/data/indian/datamodule.py @@ -33,23 +33,23 @@ class DataModule(CachingDataModule): * Raw data input (on disk): - * PNG RGB 8-bit depth images with "inverted" grayscale scale - * Variable width and height + * PNG RGB 8-bit depth images with "inverted" grayscale scale + * Variable width and height * Output image: - * Transforms: + * Transforms: - * Load raw PNG with :py:mod:`PIL` - * Remove black borders - * Convert to torch tensor - * Torch center cropping to get square image + * Load raw PNG with :py:mod:`PIL` + * Remove black borders + * Convert to torch tensor + * Torch center cropping to get square image - * Final specifications: + * Final specifications: - * Grayscale, encoded as a single plane tensor, 32-bit floats, - square, with varying resolutions, depending on the input raw image - * Labels: 0 (healthy), 1 (active tuberculosis) + * Grayscale, encoded as a single plane tensor, 32-bit floats, + square, with varying resolutions, depending on the input raw image + * Labels: 0 (healthy), 1 (active tuberculosis) """ def __init__(self, split_filename: str): diff --git a/src/ptbench/data/indian/fold_0.py b/src/ptbench/data/indian/fold_0.py index 91711d3863fc916ee55949b858f19c030d180bf1..635b542f70fe5d2a1733ce324dd0b5aa7c15dcb8 100644 --- a/src/ptbench/data/indian/fold_0.py +++ b/src/ptbench/data/indian/fold_0.py @@ -5,3 +5,8 @@ from .datamodule import DataModule datamodule = DataModule("fold-0.json") +"""Indian collection dataset for computer-aided diagnosis (cross validation +fold 0). + +See :py:class:`.indian.datamodule.DataModule` for technical details. +""" diff --git a/src/ptbench/data/indian/fold_1.py b/src/ptbench/data/indian/fold_1.py index 173726955f475465cfcecd394e0d086d059d4968..5a3a0213717508854ed579b53620b7f4b57230b8 100644 --- a/src/ptbench/data/indian/fold_1.py +++ b/src/ptbench/data/indian/fold_1.py @@ -5,3 +5,8 @@ from .datamodule import DataModule datamodule = DataModule("fold-1.json") +"""Indian collection dataset for computer-aided diagnosis (cross validation +fold 1). + +See :py:class:`.indian.datamodule.DataModule` for technical details. +""" diff --git a/src/ptbench/data/indian/fold_2.py b/src/ptbench/data/indian/fold_2.py index 91346efa569d498929371e6be75d205b30b22e88..cbf1aee45872a7af3434fa18b5241bc3251ab868 100644 --- a/src/ptbench/data/indian/fold_2.py +++ b/src/ptbench/data/indian/fold_2.py @@ -5,3 +5,8 @@ from .datamodule import DataModule datamodule = DataModule("fold-2.json") +"""Indian collection dataset for computer-aided diagnosis (cross validation +fold 2). + +See :py:class:`.indian.datamodule.DataModule` for technical details. +""" diff --git a/src/ptbench/data/indian/fold_3.py b/src/ptbench/data/indian/fold_3.py index 4156c3ee7b88aaedf705e05c028aa6ba47868e12..369c952879249aba105feb35b50d232193433c6f 100644 --- a/src/ptbench/data/indian/fold_3.py +++ b/src/ptbench/data/indian/fold_3.py @@ -5,3 +5,8 @@ from .datamodule import DataModule datamodule = DataModule("fold-3.json") +"""Indian collection dataset for computer-aided diagnosis (cross validation +fold 3). + +See :py:class:`.indian.datamodule.DataModule` for technical details. +""" diff --git a/src/ptbench/data/indian/fold_4.py b/src/ptbench/data/indian/fold_4.py index 6d5b73d1b344993c55e16d8be6c7829ce9abce9e..e9137b65946436acc291999b39b7ae060bbb10af 100644 --- a/src/ptbench/data/indian/fold_4.py +++ b/src/ptbench/data/indian/fold_4.py @@ -5,3 +5,8 @@ from .datamodule import DataModule datamodule = DataModule("fold-4.json") +"""Indian collection dataset for computer-aided diagnosis (cross validation +fold 4). + +See :py:class:`.indian.datamodule.DataModule` for technical details. +""" diff --git a/src/ptbench/data/indian/fold_5.py b/src/ptbench/data/indian/fold_5.py index 27a7442dbdde02eea9cb95b9532818d44a48721a..d6f34d692f88aa23d5e61311eed454af95b9825d 100644 --- a/src/ptbench/data/indian/fold_5.py +++ b/src/ptbench/data/indian/fold_5.py @@ -5,3 +5,8 @@ from .datamodule import DataModule datamodule = DataModule("fold-5.json") +"""Indian collection dataset for computer-aided diagnosis (cross validation +fold 5). + +See :py:class:`.indian.datamodule.DataModule` for technical details. +""" diff --git a/src/ptbench/data/indian/fold_6.py b/src/ptbench/data/indian/fold_6.py index 1270cd58cbd36261bcee05b594ec746a181ad113..a293530d31af50b15e74e115551736f5883d3d23 100644 --- a/src/ptbench/data/indian/fold_6.py +++ b/src/ptbench/data/indian/fold_6.py @@ -5,3 +5,8 @@ from .datamodule import DataModule datamodule = DataModule("fold-6.json") +"""Indian collection dataset for computer-aided diagnosis (cross validation +fold 6). + +See :py:class:`.indian.datamodule.DataModule` for technical details. +""" diff --git a/src/ptbench/data/indian/fold_7.py b/src/ptbench/data/indian/fold_7.py index e7b5a1c3aedde45ff94fcdcc22c550fb30f9fd27..22ea34390925f94eb3b14b73add96b10afd1d087 100644 --- a/src/ptbench/data/indian/fold_7.py +++ b/src/ptbench/data/indian/fold_7.py @@ -5,3 +5,8 @@ from .datamodule import DataModule datamodule = DataModule("fold-7.json") +"""Indian collection dataset for computer-aided diagnosis (cross validation +fold 7). + +See :py:class:`.indian.datamodule.DataModule` for technical details. +""" diff --git a/src/ptbench/data/indian/fold_8.py b/src/ptbench/data/indian/fold_8.py index 1abbe7b20bb320a9fa2b217838c88243ea60d64b..77cf20ee0c861ee5999d77592a787034ba7ac6b4 100644 --- a/src/ptbench/data/indian/fold_8.py +++ b/src/ptbench/data/indian/fold_8.py @@ -5,3 +5,8 @@ from .datamodule import DataModule datamodule = DataModule("fold-8.json") +"""Indian collection dataset for computer-aided diagnosis (cross validation +fold 8). + +See :py:class:`.indian.datamodule.DataModule` for technical details. +""" diff --git a/src/ptbench/data/indian/fold_9.py b/src/ptbench/data/indian/fold_9.py index 71de470d58fa8cd673104ade245d66d3fd2b86a3..a0f881bc9a256e50d69669d66d5a95bea7a9a3c5 100644 --- a/src/ptbench/data/indian/fold_9.py +++ b/src/ptbench/data/indian/fold_9.py @@ -5,3 +5,8 @@ from .datamodule import DataModule datamodule = DataModule("fold-9.json") +"""Indian collection dataset for computer-aided diagnosis (cross validation +fold 9). + +See :py:class:`.indian.datamodule.DataModule` for technical details. +""" diff --git a/src/ptbench/data/montgomery/datamodule.py b/src/ptbench/data/montgomery/datamodule.py index 12ead6e81a8e2673cd8b388d52bf3dd7ef9d7849..f3ac8ffce2e95fc4446829b658233386a700f6a8 100644 --- a/src/ptbench/data/montgomery/datamodule.py +++ b/src/ptbench/data/montgomery/datamodule.py @@ -19,17 +19,11 @@ from ..typing import Sample class RawDataLoader(_BaseRawDataLoader): - """A specialized raw-data-loader for the Montgomery dataset. - - Attributes - ---------- - - datadir - This variable contains the base directory where the database raw data - is stored. - """ + """A specialized raw-data-loader for the Montgomery dataset.""" datadir: str + """This variable contains the base directory where the database raw data is + stored.""" def __init__(self): self.datadir = load_rc().get( @@ -41,8 +35,7 @@ class RawDataLoader(_BaseRawDataLoader): Parameters ---------- - - sample: + sample A tuple containing the path suffix, within the dataset root folder, where to find the image to be loaded, and an integer, representing the sample label. @@ -50,8 +43,6 @@ class RawDataLoader(_BaseRawDataLoader): Returns ------- - - sample The sample representation """ # N.B.: Montgomery images are encoded as grayscale PNGs, so no need to @@ -73,8 +64,7 @@ class RawDataLoader(_BaseRawDataLoader): Parameters ---------- - - sample: + sample A tuple containing the path suffix, within the dataset root folder, where to find the image to be loaded, and an integer, representing the sample label. @@ -82,8 +72,6 @@ class RawDataLoader(_BaseRawDataLoader): Returns ------- - - label The integer label associated with the sample """ return sample[1] @@ -111,23 +99,23 @@ class DataModule(CachingDataModule): * Raw data input (on disk): - * PNG images 8 bit grayscale - * resolution: fixed to one of the cases above + * PNG images 8 bit grayscale + * resolution: fixed to one of the cases above * Output image: - * Transforms: + * Transforms: - * Load raw PNG with :py:mod:`PIL` - * Remove black borders - * Convert to torch tensor - * Torch center cropping to get square image + * Load raw PNG with :py:mod:`PIL` + * Remove black borders + * Convert to torch tensor + * Torch center cropping to get square image - * Final specifications + * Final specifications - * Grayscale, encoded as a single plane tensor, 32-bit floats, - square at 4020 x 4020 pixels - * Labels: 0 (healthy), 1 (active tuberculosis) + * Grayscale, encoded as a single plane tensor, 32-bit floats, + square at 4020 x 4020 pixels + * Labels: 0 (healthy), 1 (active tuberculosis) """ def __init__(self, split_filename: str): diff --git a/src/ptbench/data/montgomery/fold_0.py b/src/ptbench/data/montgomery/fold_0.py index 91711d3863fc916ee55949b858f19c030d180bf1..a271fa0f8a1b00a4915d4c77c01d488419e8e2f2 100644 --- a/src/ptbench/data/montgomery/fold_0.py +++ b/src/ptbench/data/montgomery/fold_0.py @@ -5,3 +5,7 @@ from .datamodule import DataModule datamodule = DataModule("fold-0.json") +"""Montgomery datamodule for TB detection (cross validation fold 0). + +See :py:class:`.montgomery.datamodule.DataModule` for technical details. +""" diff --git a/src/ptbench/data/montgomery/fold_1.py b/src/ptbench/data/montgomery/fold_1.py index 173726955f475465cfcecd394e0d086d059d4968..626cf2d0ac931e4ffee7dd2296392eff70bc9bcf 100644 --- a/src/ptbench/data/montgomery/fold_1.py +++ b/src/ptbench/data/montgomery/fold_1.py @@ -5,3 +5,7 @@ from .datamodule import DataModule datamodule = DataModule("fold-1.json") +"""Montgomery datamodule for TB detection (cross validation fold 1). + +See :py:class:`.montgomery.datamodule.DataModule` for technical details. +""" diff --git a/src/ptbench/data/montgomery/fold_2.py b/src/ptbench/data/montgomery/fold_2.py index 91346efa569d498929371e6be75d205b30b22e88..b6146d068959f5b0b9deee9bed151c5158623cda 100644 --- a/src/ptbench/data/montgomery/fold_2.py +++ b/src/ptbench/data/montgomery/fold_2.py @@ -5,3 +5,7 @@ from .datamodule import DataModule datamodule = DataModule("fold-2.json") +"""Montgomery datamodule for TB detection (cross validation fold 2). + +See :py:class:`.montgomery.datamodule.DataModule` for technical details. +""" diff --git a/src/ptbench/data/montgomery/fold_3.py b/src/ptbench/data/montgomery/fold_3.py index 4156c3ee7b88aaedf705e05c028aa6ba47868e12..f1b1c53eccd410b292da85fd129d61e48001e590 100644 --- a/src/ptbench/data/montgomery/fold_3.py +++ b/src/ptbench/data/montgomery/fold_3.py @@ -5,3 +5,7 @@ from .datamodule import DataModule datamodule = DataModule("fold-3.json") +"""Montgomery datamodule for TB detection (cross validation fold 3). + +See :py:class:`.montgomery.datamodule.DataModule` for technical details. +""" diff --git a/src/ptbench/data/montgomery/fold_4.py b/src/ptbench/data/montgomery/fold_4.py index 6d5b73d1b344993c55e16d8be6c7829ce9abce9e..eaa5fd27b841c44ce202b3d07d96cd90ef523d84 100644 --- a/src/ptbench/data/montgomery/fold_4.py +++ b/src/ptbench/data/montgomery/fold_4.py @@ -5,3 +5,7 @@ from .datamodule import DataModule datamodule = DataModule("fold-4.json") +"""Montgomery datamodule for TB detection (cross validation fold 4). + +See :py:class:`.montgomery.datamodule.DataModule` for technical details. +""" diff --git a/src/ptbench/data/montgomery/fold_5.py b/src/ptbench/data/montgomery/fold_5.py index 27a7442dbdde02eea9cb95b9532818d44a48721a..142ae715356d4cfd83e7b628cddda25459e70e35 100644 --- a/src/ptbench/data/montgomery/fold_5.py +++ b/src/ptbench/data/montgomery/fold_5.py @@ -5,3 +5,7 @@ from .datamodule import DataModule datamodule = DataModule("fold-5.json") +"""Montgomery datamodule for TB detection (cross validation fold 5). + +See :py:class:`.montgomery.datamodule.DataModule` for technical details. +""" diff --git a/src/ptbench/data/montgomery/fold_6.py b/src/ptbench/data/montgomery/fold_6.py index 1270cd58cbd36261bcee05b594ec746a181ad113..715d9b84650b71923b391cbb85e02c77e5084132 100644 --- a/src/ptbench/data/montgomery/fold_6.py +++ b/src/ptbench/data/montgomery/fold_6.py @@ -5,3 +5,7 @@ from .datamodule import DataModule datamodule = DataModule("fold-6.json") +"""Montgomery datamodule for TB detection (cross validation fold 6). + +See :py:class:`.montgomery.datamodule.DataModule` for technical details. +""" diff --git a/src/ptbench/data/montgomery/fold_7.py b/src/ptbench/data/montgomery/fold_7.py index e7b5a1c3aedde45ff94fcdcc22c550fb30f9fd27..fc53e24ef3a927dc0c56d1f62780701529a0b1ae 100644 --- a/src/ptbench/data/montgomery/fold_7.py +++ b/src/ptbench/data/montgomery/fold_7.py @@ -5,3 +5,7 @@ from .datamodule import DataModule datamodule = DataModule("fold-7.json") +"""Montgomery datamodule for TB detection (cross validation fold 7). + +See :py:class:`.montgomery.datamodule.DataModule` for technical details. +""" diff --git a/src/ptbench/data/montgomery/fold_8.py b/src/ptbench/data/montgomery/fold_8.py index 1abbe7b20bb320a9fa2b217838c88243ea60d64b..2b917a6b46c494de3d08286bd5920fe03c35bc18 100644 --- a/src/ptbench/data/montgomery/fold_8.py +++ b/src/ptbench/data/montgomery/fold_8.py @@ -5,3 +5,7 @@ from .datamodule import DataModule datamodule = DataModule("fold-8.json") +"""Montgomery datamodule for TB detection (cross validation fold 8). + +See :py:class:`.montgomery.datamodule.DataModule` for technical details. +""" diff --git a/src/ptbench/data/montgomery/fold_9.py b/src/ptbench/data/montgomery/fold_9.py index 71de470d58fa8cd673104ade245d66d3fd2b86a3..f404ace1c6b8f1ce4a18b68343298016f21f3b7d 100644 --- a/src/ptbench/data/montgomery/fold_9.py +++ b/src/ptbench/data/montgomery/fold_9.py @@ -5,3 +5,7 @@ from .datamodule import DataModule datamodule = DataModule("fold-9.json") +"""Montgomery datamodule for TB detection (cross validation fold 9). + +See :py:class:`.montgomery.datamodule.DataModule` for technical details. +""" diff --git a/src/ptbench/data/montgomery_shenzhen/default.py b/src/ptbench/data/montgomery_shenzhen/default.py index 2b8a8fb25e09b85f372e854a2f2ce29bcdf56ddc..c332f4be47fac3374bfb0841c3a2ebbe4e2a0932 100644 --- a/src/ptbench/data/montgomery_shenzhen/default.py +++ b/src/ptbench/data/montgomery_shenzhen/default.py @@ -5,3 +5,8 @@ from .datamodule import DataModule datamodule = DataModule("default.json") +"""Aggregated datamodule composed of Montgomery and Shenzhen datasets (default +split). + +See :py:class:`.montgomery_shenzhen.datamodule.DataModule` for technical details. +""" diff --git a/src/ptbench/data/montgomery_shenzhen/fold_0.py b/src/ptbench/data/montgomery_shenzhen/fold_0.py index 91711d3863fc916ee55949b858f19c030d180bf1..f3e8ef02acb941787257ec145485e299ce5ae596 100644 --- a/src/ptbench/data/montgomery_shenzhen/fold_0.py +++ b/src/ptbench/data/montgomery_shenzhen/fold_0.py @@ -5,3 +5,8 @@ from .datamodule import DataModule datamodule = DataModule("fold-0.json") +"""Aggregated datamodule composed of Montgomery and Shenzhen datasets (cross +validation fold 0). + +See :py:class:`.montgomery_shenzhen.datamodule.DataModule` for technical details. +""" diff --git a/src/ptbench/data/montgomery_shenzhen/fold_1.py b/src/ptbench/data/montgomery_shenzhen/fold_1.py index 173726955f475465cfcecd394e0d086d059d4968..f6d73de3c2a180a23472914cc079ce93a8348a24 100644 --- a/src/ptbench/data/montgomery_shenzhen/fold_1.py +++ b/src/ptbench/data/montgomery_shenzhen/fold_1.py @@ -5,3 +5,8 @@ from .datamodule import DataModule datamodule = DataModule("fold-1.json") +"""Aggregated datamodule composed of Montgomery and Shenzhen datasets (cross +validation fold 1). + +See :py:class:`.montgomery_shenzhen.datamodule.DataModule` for technical details. +""" diff --git a/src/ptbench/data/montgomery_shenzhen/fold_2.py b/src/ptbench/data/montgomery_shenzhen/fold_2.py index 91346efa569d498929371e6be75d205b30b22e88..9b9560528d1c293183959748f4f0c50f1242aabf 100644 --- a/src/ptbench/data/montgomery_shenzhen/fold_2.py +++ b/src/ptbench/data/montgomery_shenzhen/fold_2.py @@ -5,3 +5,8 @@ from .datamodule import DataModule datamodule = DataModule("fold-2.json") +"""Aggregated datamodule composed of Montgomery and Shenzhen datasets (cross +validation fold 2). + +See :py:class:`.montgomery_shenzhen.datamodule.DataModule` for technical details. +""" diff --git a/src/ptbench/data/montgomery_shenzhen/fold_3.py b/src/ptbench/data/montgomery_shenzhen/fold_3.py index 4156c3ee7b88aaedf705e05c028aa6ba47868e12..826e0abb50e9f15e84bd7ee544c32ce358063e99 100644 --- a/src/ptbench/data/montgomery_shenzhen/fold_3.py +++ b/src/ptbench/data/montgomery_shenzhen/fold_3.py @@ -5,3 +5,8 @@ from .datamodule import DataModule datamodule = DataModule("fold-3.json") +"""Aggregated datamodule composed of Montgomery and Shenzhen datasets (cross +validation fold 3). + +See :py:class:`.montgomery_shenzhen.datamodule.DataModule` for technical details. +""" diff --git a/src/ptbench/data/montgomery_shenzhen/fold_4.py b/src/ptbench/data/montgomery_shenzhen/fold_4.py index 6d5b73d1b344993c55e16d8be6c7829ce9abce9e..f7261a431690eedf7f970a0f95067b3cda1647cc 100644 --- a/src/ptbench/data/montgomery_shenzhen/fold_4.py +++ b/src/ptbench/data/montgomery_shenzhen/fold_4.py @@ -5,3 +5,8 @@ from .datamodule import DataModule datamodule = DataModule("fold-4.json") +"""Aggregated datamodule composed of Montgomery and Shenzhen datasets (cross +validation fold 4). + +See :py:class:`.montgomery_shenzhen.datamodule.DataModule` for technical details. +""" diff --git a/src/ptbench/data/montgomery_shenzhen/fold_5.py b/src/ptbench/data/montgomery_shenzhen/fold_5.py index 27a7442dbdde02eea9cb95b9532818d44a48721a..7dfb6f9077ce98d8238bf7e7695b4d233e3d83a7 100644 --- a/src/ptbench/data/montgomery_shenzhen/fold_5.py +++ b/src/ptbench/data/montgomery_shenzhen/fold_5.py @@ -5,3 +5,8 @@ from .datamodule import DataModule datamodule = DataModule("fold-5.json") +"""Aggregated datamodule composed of Montgomery and Shenzhen datasets (cross +validation fold 5). + +See :py:class:`.montgomery_shenzhen.datamodule.DataModule` for technical details. +""" diff --git a/src/ptbench/data/montgomery_shenzhen/fold_6.py b/src/ptbench/data/montgomery_shenzhen/fold_6.py index 1270cd58cbd36261bcee05b594ec746a181ad113..57f19130059efb743db6dd7127fb8ea6886edc2c 100644 --- a/src/ptbench/data/montgomery_shenzhen/fold_6.py +++ b/src/ptbench/data/montgomery_shenzhen/fold_6.py @@ -5,3 +5,8 @@ from .datamodule import DataModule datamodule = DataModule("fold-6.json") +"""Aggregated datamodule composed of Montgomery and Shenzhen datasets (cross +validation fold 6). + +See :py:class:`.montgomery_shenzhen.datamodule.DataModule` for technical details. +""" diff --git a/src/ptbench/data/montgomery_shenzhen/fold_7.py b/src/ptbench/data/montgomery_shenzhen/fold_7.py index e7b5a1c3aedde45ff94fcdcc22c550fb30f9fd27..3fc7c8bfa135edc6e8f7c8e6b6e8a8adc056279d 100644 --- a/src/ptbench/data/montgomery_shenzhen/fold_7.py +++ b/src/ptbench/data/montgomery_shenzhen/fold_7.py @@ -5,3 +5,8 @@ from .datamodule import DataModule datamodule = DataModule("fold-7.json") +"""Aggregated datamodule composed of Montgomery and Shenzhen datasets (cross +validation fold 7). + +See :py:class:`.montgomery_shenzhen.datamodule.DataModule` for technical details. +""" diff --git a/src/ptbench/data/montgomery_shenzhen/fold_8.py b/src/ptbench/data/montgomery_shenzhen/fold_8.py index 1abbe7b20bb320a9fa2b217838c88243ea60d64b..7b631543deb1c925ef9d8f66a2e686256361510c 100644 --- a/src/ptbench/data/montgomery_shenzhen/fold_8.py +++ b/src/ptbench/data/montgomery_shenzhen/fold_8.py @@ -5,3 +5,8 @@ from .datamodule import DataModule datamodule = DataModule("fold-8.json") +"""Aggregated datamodule composed of Montgomery and Shenzhen datasets (cross +validation fold 8). + +See :py:class:`.montgomery_shenzhen.datamodule.DataModule` for technical details. +""" diff --git a/src/ptbench/data/montgomery_shenzhen/fold_9.py b/src/ptbench/data/montgomery_shenzhen/fold_9.py index 71de470d58fa8cd673104ade245d66d3fd2b86a3..565b71e7c2f3bb7cdc2f90234454b7c058c6af17 100644 --- a/src/ptbench/data/montgomery_shenzhen/fold_9.py +++ b/src/ptbench/data/montgomery_shenzhen/fold_9.py @@ -5,3 +5,8 @@ from .datamodule import DataModule datamodule = DataModule("fold-9.json") +"""Aggregated datamodule composed of Montgomery and Shenzhen datasets (cross +validation fold 9). + +See :py:class:`.montgomery_shenzhen.datamodule.DataModule` for technical details. +""" diff --git a/src/ptbench/data/montgomery_shenzhen_indian/datamodule.py b/src/ptbench/data/montgomery_shenzhen_indian/datamodule.py index ea2d2a4f417767cb42641c9e596a24c40a3ab386..3c555a62a36512dd76bda26149d439e11ae1fac9 100644 --- a/src/ptbench/data/montgomery_shenzhen_indian/datamodule.py +++ b/src/ptbench/data/montgomery_shenzhen_indian/datamodule.py @@ -12,7 +12,8 @@ from ..shenzhen.datamodule import make_split as make_shenzhen_split class DataModule(ConcatDataModule): - """Aggregated datamodule composed of Montgomery and Shenzhen datasets.""" + """Aggregated datamodule composed of Montgomery, Shenzhen and Indian + datasets.""" def __init__(self, split_filename: str): montgomery_loader = MontgomeryLoader() diff --git a/src/ptbench/data/montgomery_shenzhen_indian/default.py b/src/ptbench/data/montgomery_shenzhen_indian/default.py index 2b8a8fb25e09b85f372e854a2f2ce29bcdf56ddc..e4a05f6365acfa6d3cd67d2e5d057013a34dca48 100644 --- a/src/ptbench/data/montgomery_shenzhen_indian/default.py +++ b/src/ptbench/data/montgomery_shenzhen_indian/default.py @@ -5,3 +5,8 @@ from .datamodule import DataModule datamodule = DataModule("default.json") +"""Aggregated datamodule composed of Montgomery, Shenzhen and Indian datasets. + +See :py:class:`.montgomery_shenzhen_indian.datamodule.DataModule` for technical +details. +""" diff --git a/src/ptbench/data/montgomery_shenzhen_indian/fold_0.py b/src/ptbench/data/montgomery_shenzhen_indian/fold_0.py index 91711d3863fc916ee55949b858f19c030d180bf1..6f08e25a3a3295cbd4f0dbc8238ac39b6e0eda2a 100644 --- a/src/ptbench/data/montgomery_shenzhen_indian/fold_0.py +++ b/src/ptbench/data/montgomery_shenzhen_indian/fold_0.py @@ -5,3 +5,9 @@ from .datamodule import DataModule datamodule = DataModule("fold-0.json") +"""Aggregated datamodule composed of Montgomery, Shenzhen and Indian datasets +(cross validation fold 0). + +See :py:class:`.montgomery_shenzhen_indian.datamodule.DataModule` for technical +details. +""" diff --git a/src/ptbench/data/montgomery_shenzhen_indian/fold_1.py b/src/ptbench/data/montgomery_shenzhen_indian/fold_1.py index 173726955f475465cfcecd394e0d086d059d4968..3d7529cfc17c7d4da9a4b9b72a1ccee572f269f0 100644 --- a/src/ptbench/data/montgomery_shenzhen_indian/fold_1.py +++ b/src/ptbench/data/montgomery_shenzhen_indian/fold_1.py @@ -5,3 +5,8 @@ from .datamodule import DataModule datamodule = DataModule("fold-1.json") +"""Aggregated datamodule composed of Montgomery, Shenzhen and Indian datasets +(cross validation fold 1). + +See :py:class:`.montgomery_shenzhen_indian.datamodule.DataModule` for technical details. +""" diff --git a/src/ptbench/data/montgomery_shenzhen_indian/fold_2.py b/src/ptbench/data/montgomery_shenzhen_indian/fold_2.py index 91346efa569d498929371e6be75d205b30b22e88..2e9142814fb0a8754b77d0ff5a7b68a41d3da4d0 100644 --- a/src/ptbench/data/montgomery_shenzhen_indian/fold_2.py +++ b/src/ptbench/data/montgomery_shenzhen_indian/fold_2.py @@ -5,3 +5,9 @@ from .datamodule import DataModule datamodule = DataModule("fold-2.json") +"""Aggregated datamodule composed of Montgomery, Shenzhen and Indian datasets +(cross validation fold 2). + +See :py:class:`.montgomery_shenzhen_indian.datamodule.DataModule` for technical +details. +""" diff --git a/src/ptbench/data/montgomery_shenzhen_indian/fold_3.py b/src/ptbench/data/montgomery_shenzhen_indian/fold_3.py index 4156c3ee7b88aaedf705e05c028aa6ba47868e12..dfd4c9cd5e3f7da229771996e073bae26f75e825 100644 --- a/src/ptbench/data/montgomery_shenzhen_indian/fold_3.py +++ b/src/ptbench/data/montgomery_shenzhen_indian/fold_3.py @@ -5,3 +5,9 @@ from .datamodule import DataModule datamodule = DataModule("fold-3.json") +"""Aggregated datamodule composed of Montgomery, Shenzhen and Indian datasets +(cross validation fold 3). + +See :py:class:`.montgomery_shenzhen_indian.datamodule.DataModule` for technical +details. +""" diff --git a/src/ptbench/data/montgomery_shenzhen_indian/fold_4.py b/src/ptbench/data/montgomery_shenzhen_indian/fold_4.py index 6d5b73d1b344993c55e16d8be6c7829ce9abce9e..84e66cefbdff506afd25c41262a1f54676940ce0 100644 --- a/src/ptbench/data/montgomery_shenzhen_indian/fold_4.py +++ b/src/ptbench/data/montgomery_shenzhen_indian/fold_4.py @@ -5,3 +5,9 @@ from .datamodule import DataModule datamodule = DataModule("fold-4.json") +"""Aggregated datamodule composed of Montgomery, Shenzhen and Indian datasets +(cross validation fold 4). + +See :py:class:`.montgomery_shenzhen_indian.datamodule.DataModule` for technical +details. +""" diff --git a/src/ptbench/data/montgomery_shenzhen_indian/fold_5.py b/src/ptbench/data/montgomery_shenzhen_indian/fold_5.py index 27a7442dbdde02eea9cb95b9532818d44a48721a..650292f7c2e6d8ecff27a7f2164c36353e67c5a3 100644 --- a/src/ptbench/data/montgomery_shenzhen_indian/fold_5.py +++ b/src/ptbench/data/montgomery_shenzhen_indian/fold_5.py @@ -5,3 +5,9 @@ from .datamodule import DataModule datamodule = DataModule("fold-5.json") +"""Aggregated datamodule composed of Montgomery, Shenzhen and Indian datasets +(cross validation fold 5). + +See :py:class:`.montgomery_shenzhen_indian.datamodule.DataModule` for technical +details. +""" diff --git a/src/ptbench/data/montgomery_shenzhen_indian/fold_6.py b/src/ptbench/data/montgomery_shenzhen_indian/fold_6.py index 1270cd58cbd36261bcee05b594ec746a181ad113..8f673689c2ec6c2e7c8e25efe97f00351e09589b 100644 --- a/src/ptbench/data/montgomery_shenzhen_indian/fold_6.py +++ b/src/ptbench/data/montgomery_shenzhen_indian/fold_6.py @@ -5,3 +5,9 @@ from .datamodule import DataModule datamodule = DataModule("fold-6.json") +"""Aggregated datamodule composed of Montgomery, Shenzhen and Indian datasets +(cross validation fold 6). + +See :py:class:`.montgomery_shenzhen_indian.datamodule.DataModule` for technical +details. +""" diff --git a/src/ptbench/data/montgomery_shenzhen_indian/fold_7.py b/src/ptbench/data/montgomery_shenzhen_indian/fold_7.py index e7b5a1c3aedde45ff94fcdcc22c550fb30f9fd27..33eed5405a20141af252cc76f77156ab54baaa6c 100644 --- a/src/ptbench/data/montgomery_shenzhen_indian/fold_7.py +++ b/src/ptbench/data/montgomery_shenzhen_indian/fold_7.py @@ -5,3 +5,8 @@ from .datamodule import DataModule datamodule = DataModule("fold-7.json") +"""Aggregated datamodule composed of Montgomery, Shenzhen and Indian datasets +(cross validation fold 7). + +See :py:class:`.montgomery_shenzhen_indian.datamodule.DataModule` for technical details. +""" diff --git a/src/ptbench/data/montgomery_shenzhen_indian/fold_8.py b/src/ptbench/data/montgomery_shenzhen_indian/fold_8.py index 1abbe7b20bb320a9fa2b217838c88243ea60d64b..4de80f69739e27da225b862e3679b1684c89296c 100644 --- a/src/ptbench/data/montgomery_shenzhen_indian/fold_8.py +++ b/src/ptbench/data/montgomery_shenzhen_indian/fold_8.py @@ -5,3 +5,9 @@ from .datamodule import DataModule datamodule = DataModule("fold-8.json") +"""Aggregated datamodule composed of Montgomery, Shenzhen and Indian datasets +(cross validation fold 8). + +See :py:class:`.montgomery_shenzhen_indian.datamodule.DataModule` for technical +details. +""" diff --git a/src/ptbench/data/montgomery_shenzhen_indian/fold_9.py b/src/ptbench/data/montgomery_shenzhen_indian/fold_9.py index 71de470d58fa8cd673104ade245d66d3fd2b86a3..f33e691b15d1d7b368a671b4131fa5c4092fc631 100644 --- a/src/ptbench/data/montgomery_shenzhen_indian/fold_9.py +++ b/src/ptbench/data/montgomery_shenzhen_indian/fold_9.py @@ -5,3 +5,8 @@ from .datamodule import DataModule datamodule = DataModule("fold-9.json") +"""Aggregated datamodule composed of Montgomery, Shenzhen and Indian datasets +(cross validation fold 9). + +See :py:class:`.montgomery_shenzhen_indian.datamodule.DataModule` for technical details. +""" diff --git a/src/ptbench/data/montgomery_shenzhen_indian_padchest/datamodule.py b/src/ptbench/data/montgomery_shenzhen_indian_padchest/datamodule.py index 2fdcfc67d4b71a83c6779f3225d177f50c288bfe..9f6a3b0930dffa3fb3956d99b7c831e84dc379e2 100644 --- a/src/ptbench/data/montgomery_shenzhen_indian_padchest/datamodule.py +++ b/src/ptbench/data/montgomery_shenzhen_indian_padchest/datamodule.py @@ -38,7 +38,8 @@ class DataModule(ConcatDataModule): (montgomery_split["validation"], montgomery_loader), (shenzhen_split["validation"], shenzhen_loader), (indian_split["validation"], indian_loader), - (padchest_split["validation"], padchest_loader), + # there is no validation set on padchest... + # (padchest_split["validation"], padchest_loader), ], "test": [ (montgomery_split["test"], montgomery_loader), diff --git a/src/ptbench/data/nih_cxr14/datamodule.py b/src/ptbench/data/nih_cxr14/datamodule.py index 1594be18fe1c273806ca40be8bdae8175a5d4e98..58c828b4b252c36c8329bf4adbbadcd76a2e2215 100644 --- a/src/ptbench/data/nih_cxr14/datamodule.py +++ b/src/ptbench/data/nih_cxr14/datamodule.py @@ -18,26 +18,22 @@ from ..typing import Sample class RawDataLoader(_BaseRawDataLoader): - """A specialized raw-data-loader for the NIH CXR-14 dataset. + """A specialized raw-data-loader for the NIH CXR-14 dataset.""" - Attributes - ---------- + datadir: str + """This variable contains the base directory where the database raw data is + stored.""" - datadir - This variable contains the base directory where the database raw data - is stored. + idiap_file_organisation: bool + """If should use the Idiap's filesystem organisation when looking up data. - idiap_file_organisation - This variable will be ``True``, if the user has set the configuration - parameter ``nih_cxr14.idiap_file_organisation`` in the global - configuration file. It will cause internal loader to search for files - in a slightly different folder structure, that was adapted to Idiap's - requirements (number of files per folder to be less than 10k). + This variable will be ``True``, if the user has set the configuration + parameter ``nih_cxr14.idiap_file_organisation`` in the global configuration + file. It will cause internal loader to search for files in a slightly + different folder structure, that was adapted to Idiap's requirements + (number of files per folder to be less than 10k). """ - datadir: str - idiap_file_organisation: bool - def __init__(self): rc = load_rc() self.datadir = rc.get("datadir.nih_cxr14", os.path.realpath(os.curdir)) @@ -50,8 +46,7 @@ class RawDataLoader(_BaseRawDataLoader): Parameters ---------- - - sample: + sample A tuple containing the path suffix, within the dataset root folder, where to find the image to be loaded, and an integer, representing the sample label. @@ -59,8 +54,6 @@ class RawDataLoader(_BaseRawDataLoader): Returns ------- - - sample The sample representation """ file_path = sample[0] # default @@ -91,8 +84,7 @@ class RawDataLoader(_BaseRawDataLoader): Parameters ---------- - - sample: + sample A tuple containing the path suffix, within the dataset root folder, where to find the image to be loaded, and an integer, representing the sample label. @@ -100,8 +92,6 @@ class RawDataLoader(_BaseRawDataLoader): Returns ------- - - labels The integer labels associated with the sample """ return sample[1] diff --git a/src/ptbench/data/nih_cxr14/default.py b/src/ptbench/data/nih_cxr14/default.py index b9f7b6c5776abf0a2e128cc01dd92839af554298..af4f217217bb033bc1c089a3db633a29056d3f3f 100644 --- a/src/ptbench/data/nih_cxr14/default.py +++ b/src/ptbench/data/nih_cxr14/default.py @@ -11,5 +11,5 @@ datamodule = DataModule("default.json.bz2") * Validation samples: 6350 * Test samples: 4054 -See :py:class:`DataModule` for technical details. +See :py:class:`.nih_cxr14.datamodule.DataModule` for technical details. """ diff --git a/src/ptbench/data/nih_cxr14_padchest/datamodule.py b/src/ptbench/data/nih_cxr14_padchest/datamodule.py index 335679bf497cf78e65091cb70843ebb414f04c55..f1ce1f914a1469345821e5ec4e4f5426bd356207 100644 --- a/src/ptbench/data/nih_cxr14_padchest/datamodule.py +++ b/src/ptbench/data/nih_cxr14_padchest/datamodule.py @@ -31,7 +31,8 @@ class DataModule(ConcatDataModule): ], "test": [ (cxr14_split["test"], cxr14_loader), - (padchest_split["test"], padchest_loader), + # there is no test set on padchest + # (padchest_split["test"], padchest_loader), ], } ) diff --git a/src/ptbench/data/padchest/datamodule.py b/src/ptbench/data/padchest/datamodule.py index a09c5e67579564c2ca830677fe43be6fbcf0f901..69a37ea844fa3123e66b296803d8456845b4911a 100644 --- a/src/ptbench/data/padchest/datamodule.py +++ b/src/ptbench/data/padchest/datamodule.py @@ -20,17 +20,11 @@ from ..typing import Sample class RawDataLoader(_BaseRawDataLoader): - """A specialized raw-data-loader for the PadChest dataset. - - Attributes - ---------- - - datadir - This variable contains the base directory where the database raw data - is stored. - """ + """A specialized raw-data-loader for the PadChest dataset.""" datadir: str + """This variable contains the base directory where the database raw data is + stored.""" def __init__(self): rc = load_rc() @@ -41,8 +35,7 @@ class RawDataLoader(_BaseRawDataLoader): Parameters ---------- - - sample: + sample A tuple containing the path suffix, within the dataset root folder, where to find the image to be loaded, and an integer, representing the sample label. @@ -50,8 +43,6 @@ class RawDataLoader(_BaseRawDataLoader): Returns ------- - - sample The sample representation """ # N.B.: PadChest images are encoded as 16-bit grayscale images @@ -73,8 +64,7 @@ class RawDataLoader(_BaseRawDataLoader): Parameters ---------- - - sample: + sample A tuple containing the path suffix, within the dataset root folder, where to find the image to be loaded, and an integer, representing the sample label. @@ -82,8 +72,6 @@ class RawDataLoader(_BaseRawDataLoader): Returns ------- - - labels The integer labels associated with the sample """ return sample[1] @@ -117,213 +105,213 @@ class DataModule(CachingDataModule): * Labels: [PADCHEST-2019]_ * Output image: - * Transforms: - - * Load raw 16-bit PNG with :py:mod:`PIL` - * Remove excess black borders - * Convert image to 32-bit floats between 0. and 1. - * Convert to tensor - * Center crop, preserving the minimum dimension (height or width) - - * Final specifications - - * Grayscale, encoded as a 1-plane 32-bit float image, square with - varying resolutions depending on the raw input image - * Labels, in order (some of which may not be present in all splits): - - * COPD signs - * Chilaiditi sign - * NSG tube - * abnormal foreign body - * abscess - * adenopathy - * air bronchogram - * air fluid level - * air trapping - * alveolar pattern - * aortic aneurysm - * aortic atheromatosis - * aortic button enlargement - * aortic elongation - * aortic endoprosthesis - * apical pleural thickening - * artificial aortic heart valve - * artificial heart valve - * artificial mitral heart valve - * asbestosis signs - * ascendent aortic elongation - * atelectasis - * atelectasis basal - * atypical pneumonia - * axial hyperostosis - * azygoesophageal recess shift - * azygos lobe - * blastic bone lesion - * bone cement - * bone metastasis - * breast mass - * bronchiectasis - * bronchovascular markings - * bullas - * calcified adenopathy - * calcified densities - * calcified fibroadenoma - * calcified granuloma - * calcified mediastinal adenopathy - * calcified pleural plaques - * calcified pleural thickening - * callus rib fracture - * cardiomegaly - * catheter - * cavitation - * central vascular redistribution - * central venous catheter - * central venous catheter via jugular vein - * central venous catheter via subclavian vein - * central venous catheter via umbilical vein - * cervical rib - * chest drain tube - * chronic changes - * clavicle fracture - * consolidation - * costochondral junction hypertrophy - * costophrenic angle blunting - * cyst - * dai - * descendent aortic elongation - * dextrocardia - * diaphragmatic eventration - * double J stent - * dual chamber device - * electrical device - * emphysema - * empyema - * end on vessel - * endoprosthesis - * endotracheal tube - * esophagic dilatation - * exclude - * external foreign body - * fibrotic band - * fissure thickening - * flattened diaphragm - * fracture - * gastrostomy tube - * goiter - * granuloma - * ground glass pattern - * gynecomastia - * heart insufficiency - * heart valve calcified - * hemidiaphragm elevation - * hiatal hernia - * hilar congestion - * hilar enlargement - * humeral fracture - * humeral prosthesis - * hydropneumothorax - * hyperinflated lung - * hypoexpansion - * hypoexpansion basal - * increased density - * infiltrates - * interstitial pattern - * kerley lines - * kyphosis - * laminar atelectasis - * lepidic adenocarcinoma - * lipomatosis - * lobar atelectasis - * loculated fissural effusion - * loculated pleural effusion - * lung metastasis - * lung vascular paucity - * lymphangitis carcinomatosa - * lytic bone lesion - * major fissure thickening - * mammary prosthesis - * mass - * mastectomy - * mediastinal enlargement - * mediastinal mass - * mediastinal shift - * mediastinic lipomatosis - * metal - * miliary opacities - * minor fissure thickening - * multiple nodules - * nephrostomy tube - * nipple shadow - * nodule - * non axial articular degenerative changes - * normal - * obesity - * osteopenia - * osteoporosis - * osteosynthesis material - * pacemaker - * pectum carinatum - * pectum excavatum - * pericardial effusion - * pleural effusion - * pleural mass - * pleural plaques - * pleural thickening - * pneumomediastinum - * pneumonia - * pneumoperitoneo - * pneumothorax - * post radiotherapy changes - * prosthesis - * pseudonodule - * pulmonary artery enlargement - * pulmonary artery hypertension - * pulmonary edema - * pulmonary fibrosis - * pulmonary hypertension - * pulmonary mass - * pulmonary venous hypertension - * reservoir central venous catheter - * respiratory distress - * reticular interstitial pattern - * reticulonodular interstitial pattern - * rib fracture - * right sided aortic arch - * round atelectasis - * sclerotic bone lesion - * scoliosis - * segmental atelectasis - * single chamber device - * soft tissue mass - * sternoclavicular junction hypertrophy - * sternotomy - * subacromial space narrowing - * subcutaneous emphysema - * suboptimal study - * superior mediastinal enlargement - * supra aortic elongation - * surgery - * surgery breast - * surgery heart - * surgery humeral - * surgery lung - * surgery neck - * suture material - * thoracic cage deformation - * total atelectasis - * tracheal shift - * tracheostomy tube - * tuberculosis - * tuberculosis sequelae - * unchanged - * vascular hilar enlargement - * vascular redistribution - * ventriculoperitoneal drain tube - * vertebral anterior compression - * vertebral compression - * vertebral degenerative changes - * vertebral fracture - * volume loss + * Transforms: + + * Load raw 16-bit PNG with :py:mod:`PIL` + * Remove excess black borders + * Convert image to 32-bit floats between 0. and 1. + * Convert to tensor + * Center crop, preserving the minimum dimension (height or width) + + * Final specifications + + * Grayscale, encoded as a 1-plane 32-bit float image, square with + varying resolutions depending on the raw input image + * Labels, in order (some of which may not be present in all splits): + + * COPD signs + * Chilaiditi sign + * NSG tube + * abnormal foreign body + * abscess + * adenopathy + * air bronchogram + * air fluid level + * air trapping + * alveolar pattern + * aortic aneurysm + * aortic atheromatosis + * aortic button enlargement + * aortic elongation + * aortic endoprosthesis + * apical pleural thickening + * artificial aortic heart valve + * artificial heart valve + * artificial mitral heart valve + * asbestosis signs + * ascendent aortic elongation + * atelectasis + * atelectasis basal + * atypical pneumonia + * axial hyperostosis + * azygoesophageal recess shift + * azygos lobe + * blastic bone lesion + * bone cement + * bone metastasis + * breast mass + * bronchiectasis + * bronchovascular markings + * bullas + * calcified adenopathy + * calcified densities + * calcified fibroadenoma + * calcified granuloma + * calcified mediastinal adenopathy + * calcified pleural plaques + * calcified pleural thickening + * callus rib fracture + * cardiomegaly + * catheter + * cavitation + * central vascular redistribution + * central venous catheter + * central venous catheter via jugular vein + * central venous catheter via subclavian vein + * central venous catheter via umbilical vein + * cervical rib + * chest drain tube + * chronic changes + * clavicle fracture + * consolidation + * costochondral junction hypertrophy + * costophrenic angle blunting + * cyst + * dai + * descendent aortic elongation + * dextrocardia + * diaphragmatic eventration + * double J stent + * dual chamber device + * electrical device + * emphysema + * empyema + * end on vessel + * endoprosthesis + * endotracheal tube + * esophagic dilatation + * exclude + * external foreign body + * fibrotic band + * fissure thickening + * flattened diaphragm + * fracture + * gastrostomy tube + * goiter + * granuloma + * ground glass pattern + * gynecomastia + * heart insufficiency + * heart valve calcified + * hemidiaphragm elevation + * hiatal hernia + * hilar congestion + * hilar enlargement + * humeral fracture + * humeral prosthesis + * hydropneumothorax + * hyperinflated lung + * hypoexpansion + * hypoexpansion basal + * increased density + * infiltrates + * interstitial pattern + * kerley lines + * kyphosis + * laminar atelectasis + * lepidic adenocarcinoma + * lipomatosis + * lobar atelectasis + * loculated fissural effusion + * loculated pleural effusion + * lung metastasis + * lung vascular paucity + * lymphangitis carcinomatosa + * lytic bone lesion + * major fissure thickening + * mammary prosthesis + * mass + * mastectomy + * mediastinal enlargement + * mediastinal mass + * mediastinal shift + * mediastinic lipomatosis + * metal + * miliary opacities + * minor fissure thickening + * multiple nodules + * nephrostomy tube + * nipple shadow + * nodule + * non axial articular degenerative changes + * normal + * obesity + * osteopenia + * osteoporosis + * osteosynthesis material + * pacemaker + * pectum carinatum + * pectum excavatum + * pericardial effusion + * pleural effusion + * pleural mass + * pleural plaques + * pleural thickening + * pneumomediastinum + * pneumonia + * pneumoperitoneo + * pneumothorax + * post radiotherapy changes + * prosthesis + * pseudonodule + * pulmonary artery enlargement + * pulmonary artery hypertension + * pulmonary edema + * pulmonary fibrosis + * pulmonary hypertension + * pulmonary mass + * pulmonary venous hypertension + * reservoir central venous catheter + * respiratory distress + * reticular interstitial pattern + * reticulonodular interstitial pattern + * rib fracture + * right sided aortic arch + * round atelectasis + * sclerotic bone lesion + * scoliosis + * segmental atelectasis + * single chamber device + * soft tissue mass + * sternoclavicular junction hypertrophy + * sternotomy + * subacromial space narrowing + * subcutaneous emphysema + * suboptimal study + * superior mediastinal enlargement + * supra aortic elongation + * surgery + * surgery breast + * surgery heart + * surgery humeral + * surgery lung + * surgery neck + * suture material + * thoracic cage deformation + * total atelectasis + * tracheal shift + * tracheostomy tube + * tuberculosis + * tuberculosis sequelae + * unchanged + * vascular hilar enlargement + * vascular redistribution + * ventriculoperitoneal drain tube + * vertebral anterior compression + * vertebral compression + * vertebral degenerative changes + * vertebral fracture + * volume loss """ def __init__(self, split_filename: str): diff --git a/src/ptbench/data/padchest/idiap.py b/src/ptbench/data/padchest/idiap.py index ea8b3dbd24d6fdf770f38c3cde7848687090935c..52c52123ae8943e1027a2fdf42aac06b6abc9da0 100644 --- a/src/ptbench/data/padchest/idiap.py +++ b/src/ptbench/data/padchest/idiap.py @@ -8,7 +8,7 @@ datamodule = DataModule("idiap.json.bz2") """Padchest dataset for computer-aided diagnosis (``idiap`` split). This split contains all images in the database. Read documentation of -:py:class:`DataModule` for technical details. +:py:class:`.padchest.datamodule.DataModule` for technical details. * Split reference: ours * Training samples: 96'269 diff --git a/src/ptbench/data/shenzhen/datamodule.py b/src/ptbench/data/shenzhen/datamodule.py index 0596007eaae5050b5691f1ebe1563f78e1507910..221bc869dfc23d50e85025ca7eda6ecbf44321ed 100644 --- a/src/ptbench/data/shenzhen/datamodule.py +++ b/src/ptbench/data/shenzhen/datamodule.py @@ -19,20 +19,11 @@ from ..typing import Sample class RawDataLoader(_BaseRawDataLoader): - """A specialized raw-data-loader for the Shenzen dataset. - - Attributes - ---------- - - datadir - This variable contains the base directory where the database raw data - is stored. - - transform - Transforms that are always applied to the loaded raw images. - """ + """A specialized raw-data-loader for the Shenzhen dataset.""" datadir: str + """This variable contains the base directory where the database raw data is + stored.""" def __init__(self, config_variable: str = "datadir.shenzhen"): self.datadir = load_rc().get( @@ -44,8 +35,7 @@ class RawDataLoader(_BaseRawDataLoader): Parameters ---------- - - sample: + sample A tuple containing the path suffix, within the dataset root folder, where to find the image to be loaded, and an integer, representing the sample label. @@ -53,8 +43,6 @@ class RawDataLoader(_BaseRawDataLoader): Returns ------- - - sample The sample representation """ # N.B.: Image.convert("L") is required to normalize grayscale back to @@ -78,8 +66,7 @@ class RawDataLoader(_BaseRawDataLoader): Parameters ---------- - - sample: + sample A tuple containing the path suffix, within the dataset root folder, where to find the image to be loaded, and an integer, representing the sample label. @@ -87,8 +74,6 @@ class RawDataLoader(_BaseRawDataLoader): Returns ------- - - label The integer label associated with the sample """ return sample[1] @@ -117,23 +102,23 @@ class DataModule(CachingDataModule): * Raw data input (on disk): - * PNG 8-bit RGB images (grayscale, but encoded as RGB images with - "inverted" grayscale scale requiring special treatment). - * Variable width and height of 3000 x 3000 pixels or less + * PNG 8-bit RGB images (grayscale, but encoded as RGB images with + "inverted" grayscale scale requiring special treatment). + * Variable width and height of 3000 x 3000 pixels or less * Output image: - * Transforms: + * Transforms: - * Load raw PNG with :py:mod:`PIL` - * Remove black borders - * Torch center cropping to get square image + * Load raw PNG with :py:mod:`PIL` + * Remove black borders + * Torch center cropping to get square image - * Final specifications: + * Final specifications: - * Grayscale, encoded as a single plane tensor, 32-bit floats, - square with varying resolutions, depending on the input image - * Labels: 0 (healthy), 1 (active tuberculosis) + * Grayscale, encoded as a single plane tensor, 32-bit floats, + square with varying resolutions, depending on the input image + * Labels: 0 (healthy), 1 (active tuberculosis) """ def __init__(self, split_filename: str): diff --git a/src/ptbench/data/shenzhen/default.py b/src/ptbench/data/shenzhen/default.py index 93517c03b1d4e6532c7410ba05b36bf57fd5c08b..0a73847af2107003b9b21ad2d06dacc255148519 100644 --- a/src/ptbench/data/shenzhen/default.py +++ b/src/ptbench/data/shenzhen/default.py @@ -5,9 +5,11 @@ from .datamodule import DataModule datamodule = DataModule("default.json") -"""Default Shenzen TB database split. +"""Default Shenzhen TB database split. * Training samples: 64% of TB and healthy CXR (including labels) * Validation samples: 16% of TB and healthy CXR (including labels) * Test samples: 20% of TB and healthy CXR (including labels) + +See :py:class:`.shenzhen.datamodule.DataModule` for technical details. """ diff --git a/src/ptbench/data/shenzhen/fold_0.py b/src/ptbench/data/shenzhen/fold_0.py index 91711d3863fc916ee55949b858f19c030d180bf1..dc895f782752ea79f4597ee6f7c2e7af88f97d58 100644 --- a/src/ptbench/data/shenzhen/fold_0.py +++ b/src/ptbench/data/shenzhen/fold_0.py @@ -5,3 +5,7 @@ from .datamodule import DataModule datamodule = DataModule("fold-0.json") +"""Shenzhen datamodule for computer-aided diagnosis (cross validation fold 0). + +See :py:class:`.shenzhen.datamodule.DataModule` for technical details. +""" diff --git a/src/ptbench/data/shenzhen/fold_1.py b/src/ptbench/data/shenzhen/fold_1.py index 173726955f475465cfcecd394e0d086d059d4968..ce062462f79385e7db05be8f0a51b5de050b9235 100644 --- a/src/ptbench/data/shenzhen/fold_1.py +++ b/src/ptbench/data/shenzhen/fold_1.py @@ -5,3 +5,7 @@ from .datamodule import DataModule datamodule = DataModule("fold-1.json") +"""Shenzhen datamodule for computer-aided diagnosis (cross validation fold 1). + +See :py:class:`.shenzhen.datamodule.DataModule` for technical details. +""" diff --git a/src/ptbench/data/shenzhen/fold_2.py b/src/ptbench/data/shenzhen/fold_2.py index 91346efa569d498929371e6be75d205b30b22e88..3f061c371c238c094b16983a81d81438a7f0e847 100644 --- a/src/ptbench/data/shenzhen/fold_2.py +++ b/src/ptbench/data/shenzhen/fold_2.py @@ -5,3 +5,7 @@ from .datamodule import DataModule datamodule = DataModule("fold-2.json") +"""Shenzhen datamodule for computer-aided diagnosis (cross validation fold 2). + +See :py:class:`.shenzhen.datamodule.DataModule` for technical details. +""" diff --git a/src/ptbench/data/shenzhen/fold_3.py b/src/ptbench/data/shenzhen/fold_3.py index 4156c3ee7b88aaedf705e05c028aa6ba47868e12..e3bb067e085fa6062caf45126aad2cf69b31a9d2 100644 --- a/src/ptbench/data/shenzhen/fold_3.py +++ b/src/ptbench/data/shenzhen/fold_3.py @@ -5,3 +5,7 @@ from .datamodule import DataModule datamodule = DataModule("fold-3.json") +"""Shenzhen datamodule for computer-aided diagnosis (cross validation fold 3). + +See :py:class:`.shenzhen.datamodule.DataModule` for technical details. +""" diff --git a/src/ptbench/data/shenzhen/fold_4.py b/src/ptbench/data/shenzhen/fold_4.py index 6d5b73d1b344993c55e16d8be6c7829ce9abce9e..f0cb843bd533d6925677896b25c6db9cd41c961a 100644 --- a/src/ptbench/data/shenzhen/fold_4.py +++ b/src/ptbench/data/shenzhen/fold_4.py @@ -5,3 +5,7 @@ from .datamodule import DataModule datamodule = DataModule("fold-4.json") +"""Shenzhen datamodule for computer-aided diagnosis (cross validation fold 4). + +See :py:class:`.shenzhen.datamodule.DataModule` for technical details. +""" diff --git a/src/ptbench/data/shenzhen/fold_5.py b/src/ptbench/data/shenzhen/fold_5.py index 27a7442dbdde02eea9cb95b9532818d44a48721a..6a27ac51522a571b91b9b589fe48a72c31cff3b7 100644 --- a/src/ptbench/data/shenzhen/fold_5.py +++ b/src/ptbench/data/shenzhen/fold_5.py @@ -5,3 +5,7 @@ from .datamodule import DataModule datamodule = DataModule("fold-5.json") +"""Shenzhen datamodule for computer-aided diagnosis (cross validation fold 5). + +See :py:class:`.shenzhen.datamodule.DataModule` for technical details. +""" diff --git a/src/ptbench/data/shenzhen/fold_6.py b/src/ptbench/data/shenzhen/fold_6.py index 1270cd58cbd36261bcee05b594ec746a181ad113..302d2b8dbf262a0ebe1a590de81d826ab04beb7b 100644 --- a/src/ptbench/data/shenzhen/fold_6.py +++ b/src/ptbench/data/shenzhen/fold_6.py @@ -5,3 +5,7 @@ from .datamodule import DataModule datamodule = DataModule("fold-6.json") +"""Shenzhen datamodule for computer-aided diagnosis (cross validation fold 6). + +See :py:class:`.shenzhen.datamodule.DataModule` for technical details. +""" diff --git a/src/ptbench/data/shenzhen/fold_7.py b/src/ptbench/data/shenzhen/fold_7.py index e7b5a1c3aedde45ff94fcdcc22c550fb30f9fd27..a07f4d293eb3debee97b8d427863605624bb4536 100644 --- a/src/ptbench/data/shenzhen/fold_7.py +++ b/src/ptbench/data/shenzhen/fold_7.py @@ -5,3 +5,7 @@ from .datamodule import DataModule datamodule = DataModule("fold-7.json") +"""Shenzhen datamodule for computer-aided diagnosis (cross validation fold 7). + +See :py:class:`.shenzhen.datamodule.DataModule` for technical details. +""" diff --git a/src/ptbench/data/shenzhen/fold_8.py b/src/ptbench/data/shenzhen/fold_8.py index 1abbe7b20bb320a9fa2b217838c88243ea60d64b..0c5e6d225adabbf98b36006ae29a36f644ed6689 100644 --- a/src/ptbench/data/shenzhen/fold_8.py +++ b/src/ptbench/data/shenzhen/fold_8.py @@ -5,3 +5,7 @@ from .datamodule import DataModule datamodule = DataModule("fold-8.json") +"""Shenzhen datamodule for computer-aided diagnosis (cross validation fold 8). + +See :py:class:`.shenzhen.datamodule.DataModule` for technical details. +""" diff --git a/src/ptbench/data/shenzhen/fold_9.py b/src/ptbench/data/shenzhen/fold_9.py index 71de470d58fa8cd673104ade245d66d3fd2b86a3..bb73ea16c6fcbc70b092ab1e60b57c38f517a75a 100644 --- a/src/ptbench/data/shenzhen/fold_9.py +++ b/src/ptbench/data/shenzhen/fold_9.py @@ -5,3 +5,7 @@ from .datamodule import DataModule datamodule = DataModule("fold-9.json") +"""Shenzhen datamodule for computer-aided diagnosis (cross validation fold 9). + +See :py:class:`.shenzhen.datamodule.DataModule` for technical details. +""" diff --git a/src/ptbench/data/tbpoc/datamodule.py b/src/ptbench/data/tbpoc/datamodule.py index 31e2aac4f4607835ff340238cbe3108e79a2a059..f564e2326cf4d0ae50eaa0d2e673384dff076584 100644 --- a/src/ptbench/data/tbpoc/datamodule.py +++ b/src/ptbench/data/tbpoc/datamodule.py @@ -19,20 +19,11 @@ from ..typing import Sample class RawDataLoader(_BaseRawDataLoader): - """A specialized raw-data-loader for the Shenzen dataset. - - Attributes - ---------- - - datadir - This variable contains the base directory where the database raw data - is stored. - - transform - Transforms that are always applied to the loaded raw images. - """ + """A specialized raw-data-loader for the Shenzen dataset.""" datadir: str + """This variable contains the base directory where the database raw data is + stored.""" def __init__(self, config_variable: str = "datadir.tbpoc"): self.datadir = load_rc().get( @@ -44,8 +35,7 @@ class RawDataLoader(_BaseRawDataLoader): Parameters ---------- - - sample: + sample A tuple containing the path suffix, within the dataset root folder, where to find the image to be loaded, and an integer, representing the sample label. @@ -53,8 +43,6 @@ class RawDataLoader(_BaseRawDataLoader): Returns ------- - - sample The sample representation """ # images from TBPOC are encoded as grayscale JPEGs, no need to @@ -76,8 +64,7 @@ class RawDataLoader(_BaseRawDataLoader): Parameters ---------- - - sample: + sample A tuple containing the path suffix, within the dataset root folder, where to find the image to be loaded, and an integer, representing the sample label. @@ -85,8 +72,6 @@ class RawDataLoader(_BaseRawDataLoader): Returns ------- - - label The integer label associated with the sample """ return sample[1] @@ -111,24 +96,24 @@ class DataModule(CachingDataModule): * Raw data input (on disk): - * JPEG 8-bit Grayscale images - * resolution: fixed to one of the cases above + * JPEG 8-bit Grayscale images + * resolution: fixed to one of the cases above * Output image: - * Transforms: + * Transforms: - * Load raw grayscale jpeg with :py:mod:`PIL` - * Remove black borders - * Convert to torch tensor - * Torch center cropping to get square image + * Load raw grayscale jpeg with :py:mod:`PIL` + * Remove black borders + * Convert to torch tensor + * Torch center cropping to get square image - * Final specifications: + * Final specifications: - * Grayscale, encoded as a single plane tensor, 32-bit floats, - square with varying resolutions (2048 x 2048 being the maximum), - but also depending on black borders' sizes on the input image. - * Labels: 0 (healthy), 1 (active tuberculosis) + * Grayscale, encoded as a single plane tensor, 32-bit floats, + square with varying resolutions (2048 x 2048 being the maximum), + but also depending on black borders' sizes on the input image. + * Labels: 0 (healthy), 1 (active tuberculosis) """ def __init__(self, split_filename: str): diff --git a/src/ptbench/data/tbpoc/fold_0.py b/src/ptbench/data/tbpoc/fold_0.py index 775f64cfbf32624d4383b240c2b9377305abaa74..2beb07fdfed14bb7509ad24f08178853dce46e1a 100644 --- a/src/ptbench/data/tbpoc/fold_0.py +++ b/src/ptbench/data/tbpoc/fold_0.py @@ -7,5 +7,5 @@ from .datamodule import DataModule datamodule = DataModule("fold-0.json") """TB-POC dataset for TB detection (cross validation fold 0). -See :py:class:`DataModule` for technical details. +See :py:class:`.tbpoc.datamodule.DataModule` for technical details. """ diff --git a/src/ptbench/data/tbpoc/fold_1.py b/src/ptbench/data/tbpoc/fold_1.py index 6f0f137facfd1c88b42d43c3334427f56e32b7b5..338d99b2cf4d902a5d767c6789078bbbbd2ecc0d 100644 --- a/src/ptbench/data/tbpoc/fold_1.py +++ b/src/ptbench/data/tbpoc/fold_1.py @@ -7,5 +7,5 @@ from .datamodule import DataModule datamodule = DataModule("fold-1.json") """TB-POC dataset for TB detection (cross validation fold 1). -See :py:class:`DataModule` for technical details. +See :py:class:`.tbpoc.datamodule.DataModule` for technical details. """ diff --git a/src/ptbench/data/tbpoc/fold_2.py b/src/ptbench/data/tbpoc/fold_2.py index 662fd32cb76caca49ccb04fcb2aadc987642850c..9df72b54046984edcbe097e17001ff065493e190 100644 --- a/src/ptbench/data/tbpoc/fold_2.py +++ b/src/ptbench/data/tbpoc/fold_2.py @@ -7,5 +7,5 @@ from .datamodule import DataModule datamodule = DataModule("fold-2.json") """TB-POC dataset for TB detection (cross validation fold 2). -See :py:class:`DataModule` for technical details. +See :py:class:`.tbpoc.datamodule.DataModule` for technical details. """ diff --git a/src/ptbench/data/tbpoc/fold_3.py b/src/ptbench/data/tbpoc/fold_3.py index c52b8c2e4b6353631ac812f2d0dd7c7fb31dcf45..514bf12c466a0b604cd0793438f0294e619cfd75 100644 --- a/src/ptbench/data/tbpoc/fold_3.py +++ b/src/ptbench/data/tbpoc/fold_3.py @@ -7,5 +7,5 @@ from .datamodule import DataModule datamodule = DataModule("fold-3.json") """TB-POC dataset for TB detection (cross validation fold 3). -See :py:class:`DataModule` for technical details. +See :py:class:`.tbpoc.datamodule.DataModule` for technical details. """ diff --git a/src/ptbench/data/tbpoc/fold_4.py b/src/ptbench/data/tbpoc/fold_4.py index 6de0dc13e8381707e31c727b465271799d4c7f22..d4f87280283f3d3660d6c2ef1ec92e93d7b359ed 100644 --- a/src/ptbench/data/tbpoc/fold_4.py +++ b/src/ptbench/data/tbpoc/fold_4.py @@ -7,5 +7,5 @@ from .datamodule import DataModule datamodule = DataModule("fold-4.json") """TB-POC dataset for TB detection (cross validation fold 4). -See :py:class:`DataModule` for technical details. +See :py:class:`.tbpoc.datamodule.DataModule` for technical details. """ diff --git a/src/ptbench/data/tbpoc/fold_5.py b/src/ptbench/data/tbpoc/fold_5.py index bdca5a36a3f4c65268256ed6d82df124ddd4fe09..2df9a7ff444a8d0946345426f02f83f27c2da0f1 100644 --- a/src/ptbench/data/tbpoc/fold_5.py +++ b/src/ptbench/data/tbpoc/fold_5.py @@ -7,5 +7,5 @@ from .datamodule import DataModule datamodule = DataModule("fold-5.json") """TB-POC dataset for TB detection (cross validation fold 5). -See :py:class:`DataModule` for technical details. +See :py:class:`.tbpoc.datamodule.DataModule` for technical details. """ diff --git a/src/ptbench/data/tbpoc/fold_6.py b/src/ptbench/data/tbpoc/fold_6.py index c17ba0ba00ffc7f14ade642cae4d3433b13e031e..5d4fd08a0c8490b934e142095d4218818f69e288 100644 --- a/src/ptbench/data/tbpoc/fold_6.py +++ b/src/ptbench/data/tbpoc/fold_6.py @@ -7,5 +7,5 @@ from .datamodule import DataModule datamodule = DataModule("fold-6.json") """TB-POC dataset for TB detection (cross validation fold 6). -See :py:class:`DataModule` for technical details. +See :py:class:`.tbpoc.datamodule.DataModule` for technical details. """ diff --git a/src/ptbench/data/tbpoc/fold_7.py b/src/ptbench/data/tbpoc/fold_7.py index 4310f2f43b054c7c0e7d7d9d73521590d6424c27..3b0b137f7db9f688bb4e34d8fc58ecb1ade9775c 100644 --- a/src/ptbench/data/tbpoc/fold_7.py +++ b/src/ptbench/data/tbpoc/fold_7.py @@ -7,5 +7,5 @@ from .datamodule import DataModule datamodule = DataModule("fold-7.json") """TB-POC dataset for TB detection (cross validation fold 7). -See :py:class:`DataModule` for technical details. +See :py:class:`.tbpoc.datamodule.DataModule` for technical details. """ diff --git a/src/ptbench/data/tbpoc/fold_8.py b/src/ptbench/data/tbpoc/fold_8.py index d7fa5d100803aa91ba956977372cd218bbc2b428..f0304467db0fbb75096a95ffccecd30a49174b5c 100644 --- a/src/ptbench/data/tbpoc/fold_8.py +++ b/src/ptbench/data/tbpoc/fold_8.py @@ -7,5 +7,5 @@ from .datamodule import DataModule datamodule = DataModule("fold-8.json") """TB-POC dataset for TB detection (cross validation fold 8). -See :py:class:`DataModule` for technical details. +See :py:class:`.tbpoc.datamodule.DataModule` for technical details. """ diff --git a/src/ptbench/data/tbpoc/fold_9.py b/src/ptbench/data/tbpoc/fold_9.py index f37e1f36a094580fe3b404839851bb23ebd63b43..327c7156089fbfd1cb1ae21e8d5e8d507e4a5361 100644 --- a/src/ptbench/data/tbpoc/fold_9.py +++ b/src/ptbench/data/tbpoc/fold_9.py @@ -7,5 +7,5 @@ from .datamodule import DataModule datamodule = DataModule("fold-9.json") """TB-POC dataset for TB detection (cross validation fold 9). -See :py:class:`DataModule` for technical details. +See :py:class:`.tbpoc.datamodule.DataModule` for technical details. """ diff --git a/src/ptbench/data/tbx11k/datamodule.py b/src/ptbench/data/tbx11k/datamodule.py index 45785ddbd679a241d90c9c48cfdd526315f48fbc..45cab752ea18a132b59b3f93cc03e1b46a1f1021 100644 --- a/src/ptbench/data/tbx11k/datamodule.py +++ b/src/ptbench/data/tbx11k/datamodule.py @@ -47,17 +47,11 @@ finding locations, as described above. class RawDataLoader(_BaseRawDataLoader): - """A specialized raw-data-loader for the TBX11k dataset. - - Attributes - ---------- - - datadir - This variable contains the base directory where the database raw data - is stored. - """ + """A specialized raw-data-loader for the TBX11k dataset.""" datadir: str + """This variable contains the base directory where the database raw data is + stored.""" def __init__(self): self.datadir = load_rc().get( @@ -69,8 +63,7 @@ class RawDataLoader(_BaseRawDataLoader): Parameters ---------- - - sample: + sample A tuple containing the path suffix, within the dataset root folder, where to find the image to be loaded, an integer, representing the sample label, and possible radiological findings represented by @@ -79,8 +72,6 @@ class RawDataLoader(_BaseRawDataLoader): Returns ------- - - sample The sample representation """ image = PIL.Image.open(os.path.join(self.datadir, sample[0])) @@ -98,8 +89,7 @@ class RawDataLoader(_BaseRawDataLoader): Parameters ---------- - - sample: + sample A tuple containing the path suffix, within the dataset root folder, where to find the image to be loaded, an integer, representing the sample label, and possible radiological findings represented by @@ -108,8 +98,6 @@ class RawDataLoader(_BaseRawDataLoader): Returns ------- - - label The integer label associated with the sample """ return sample[1] @@ -121,8 +109,7 @@ class RawDataLoader(_BaseRawDataLoader): Parameters ---------- - - sample: + sample A tuple containing the path suffix, within the dataset root folder, where to find the image to be loaded, an integer, representing the sample label, and possible radiological findings represented by @@ -131,8 +118,6 @@ class RawDataLoader(_BaseRawDataLoader): Returns ------- - - annotations Bounding box annotations, if any available with the sample. """ return sample[2] if len(sample) > 2 else [] # type: ignore @@ -194,91 +179,55 @@ class DataModule(CachingDataModule): - imgs/tb/tb1104.png - imgs/tb/tb1143.png - - Original train dataset samples: - - Healthy: 3000 - - Sick (but no TB): 3000 - - Active TB only: 473 - - Latent TB only: 103 - - Both active and latent TB: 23 - - Unknown: 1 - - Total: 6600 - - - Original validation dataset samples: - - Healthy: 800 - - Sick (but no TB): 800 - - Latent TB only: 36 - - Active TB only: 157 - - Both active and latent TB: 7 - - Total: 1800 - - - Original test dataset samples: - - Unknown: 3302 - - Total: 3302 - - * Because the test set does not have annotations, we generate train, - validation and test datasets as such: + * Original train dataset samples: - - The original validation dataset becomes our test set. - - The original train dataset is split into new train and validation - datasets (validation ratio = 0.203 w.r.t. original train dataset size). - The selection of samples is stratified (see comments through our split - code, which is shipped alongside this file.) - - * Split v1 contains healthy subjects against active TB cases (total samples - = 4430): - - - ``train`` dataset samples: - - Healthy: 2390 - - Active TB only: 377 - - Total: 2767 - - - ``validation`` dataset samples: - - Healthy: 610 - - Active TB only: 96 - - Total: 706 + - Healthy: 3000 + - Sick (but no TB): 3000 + - Active TB only: 473 + - Latent TB only: 103 + - Both active and latent TB: 23 + - Unknown: 1 + - Total: 6600 - - ``test`` dataset samples: - - Healthy: 800 - - Active TB only: 157 - - Total: 957 + * Original validation dataset samples: - * Split v2 contains healthy, sick (no TB), and latent TB subjects against - active TB cases (total samples = 8369): + - Healthy: 800 + - Sick (but no TB): 800 + - Latent TB only: 36 + - Active TB only: 157 + - Both active and latent TB: 7 + - Total: 1800 - - ``train`` dataset samples: - - Healthy, Sick or Latent TB: 4864 - - Active TB only: 377 - - Total: 5241 + * Original test dataset samples: - - ``validation`` dataset samples: - - Healthy, Sick or Latent TB: 1239 - - Active TB only: 96 - - Total: 1335 + - Unknown: 3302 + - Total: 3302 - - ``test`` dataset samples: - - Healthy, Sick or Latent TB: 1636 - - Active TB only: 157 - - Total: 1793 + * Because the test set does not have annotations, we generated train, + validation and test datasets as such: + - The original validation dataset becomes our test set. + - The original train dataset is split into new train and validation + datasets (validation ratio = 0.203 w.r.t. original train dataset size). + The selection of samples is stratified (see comments through our split + code, which is shipped alongside this file.) Data specifications: - * Raw data input (on disk): - - * PNG images 8 bits RGB, 512 x 512 pixels + * Raw data input (on disk): PNG images 8 bits RGB, 512 x 512 pixels * Output image: - * Transforms: + * Transforms: - * Load raw PNG with :py:mod:`PIL` + - Load raw PNG with :py:mod:`PIL` - * Final specifications: + * Final specifications: - * RGB, encoded as a 3-plane tensor using 32-bit floats, square - (512x512 pixels) - * Labels: 0 (healthy, latent tb or sick but no tb depending on the - protocol), 1 (active tuberculosis) + - RGB, encoded as a 3-plane tensor using 32-bit floats, square + (512x512 pixels) + - Labels: 0 (healthy, latent tb or sick but no tb depending on the + protocol), 1 (active tuberculosis) """ def __init__(self, split_filename: str): diff --git a/src/ptbench/data/tbx11k/v1_healthy_vs_atb.py b/src/ptbench/data/tbx11k/v1_healthy_vs_atb.py index 9a5cf8e370123f20c652586034edd249a91ff990..fe0de82ac28049df89805a910c1c8f31bd2070ae 100644 --- a/src/ptbench/data/tbx11k/v1_healthy_vs_atb.py +++ b/src/ptbench/data/tbx11k/v1_healthy_vs_atb.py @@ -5,3 +5,27 @@ from .datamodule import DataModule datamodule = DataModule("v1-healthy-vs-atb.json") +"""TBX11k dataset for TB detection. Split ``v1`` (healthy against active TB +cases). + +Split v1 contains healthy subjects against active TB cases (total samples = +4430): + +* ``train`` dataset samples: + + - Healthy: 2390 + - Active TB only: 377 + - Total: 2767 + +* ``validation`` dataset samples: + + - Healthy: 610 + - Active TB only: 96 + - Total: 706 + +* ``test`` dataset samples: + + - Healthy: 800 + - Active TB only: 157 + - Total: 957 +""" diff --git a/src/ptbench/data/tbx11k/v2_others_vs_atb.py b/src/ptbench/data/tbx11k/v2_others_vs_atb.py index ccb9822220921fc34c8d16b810bc392bc9766c63..5bee7bd05dc882a4dc4d5c92780b0304a293f88b 100644 --- a/src/ptbench/data/tbx11k/v2_others_vs_atb.py +++ b/src/ptbench/data/tbx11k/v2_others_vs_atb.py @@ -5,3 +5,27 @@ from .datamodule import DataModule datamodule = DataModule("v2-others-vs-atb.json") +"""TBX11k dataset for TB detection. Split ``v1`` (everything else against +active TB cases). + +Split v2 contains healthy, sick (no TB), and latent TB subjects against +active TB cases (total samples = 8369): + +* ``train`` dataset samples: + + - Healthy, Sick or Latent TB: 4864 + - Active TB only: 377 + - Total: 5241 + +* ``validation`` dataset samples: + + - Healthy, Sick or Latent TB: 1239 + - Active TB only: 96 + - Total: 1335 + +* ``test`` dataset samples: + + - Healthy, Sick or Latent TB: 1636 + - Active TB only: 157 + - Total: 1793 +""" diff --git a/src/ptbench/data/typing.py b/src/ptbench/data/typing.py index 6f41b39eb33d2a91c51008623388bc2900032665..c1df54c62eea0bf5005e2e3f5c0381e62b2e07a7 100644 --- a/src/ptbench/data/typing.py +++ b/src/ptbench/data/typing.py @@ -52,9 +52,9 @@ DatabaseSplit: typing.TypeAlias = collections.abc.Mapping[ ] """The definition of a database split. -A database split maps dataset (subset) names to sequences of objects -that, through :py:class:`RawDataLoader`s, eventually become -:py:class:`Sample`s in the processing pipeline. +A database split maps dataset (subset) names to sequences of objects that, +through a :py:class:`RawDataLoader`, eventually becomes a :py:data:`.Sample` in +the processing pipeline. """ ConcatDatabaseSplit: typing.TypeAlias = collections.abc.Mapping[ @@ -63,12 +63,11 @@ ConcatDatabaseSplit: typing.TypeAlias = collections.abc.Mapping[ ] """The definition of a complex database split composed of several other splits. -A database split maps dataset (subset) names to sequences of objects -that, through :py:class:`RawDataLoader`s, eventually become -:py:class:`Sample`s in the processing pipeline. Objects of this subtype -allow the construction of complex splits composed of cannibalized parts -of other splits. Each split may be assigned a different -:py:class:`RawDataLoader`. +A database split maps dataset (subset) names to sequences of objects that, +through a :py:class:`.RawDataLoader`, eventually becomes a :py:data:`.Sample` in +the processing pipeline. Objects of this subtype allow the construction of +complex splits composed of cannibalized parts of other splits. Each split may +be assigned a different :py:class:`.RawDataLoader`. """ diff --git a/src/ptbench/models/alexnet.py b/src/ptbench/models/alexnet.py index 5daed2a31a2834997d7d90df5a444a68da4d831f..9096f6081366ce8983a181caa2d2500f118ef82a 100644 --- a/src/ptbench/models/alexnet.py +++ b/src/ptbench/models/alexnet.py @@ -27,7 +27,6 @@ class Alexnet(pl.LightningModule): Parameters ---------- - train_loss The loss to be used during the training. @@ -124,8 +123,7 @@ class Alexnet(pl.LightningModule): Parameters ---------- - - checkpoint: + checkpoint Loaded checkpoint """ checkpoint["normalizer"] = self.normalizer @@ -136,8 +134,7 @@ class Alexnet(pl.LightningModule): Parameters ---------- - - checkpoint: + checkpoint Loaded checkpoint """ logger.info("Restoring normalizer from checkpoint.") @@ -151,8 +148,7 @@ class Alexnet(pl.LightningModule): Parameters ---------- - - dataloader: :py:class:`torch.utils.data.DataLoader` + dataloader A torch Dataloader from which to compute the mean and std. Will not be used if the model is pretrained. """ diff --git a/src/ptbench/models/densenet.py b/src/ptbench/models/densenet.py index 0663b60b9df2f089aa93ccbff18c992bc18c6749..97ebaf78e51e69fc8b732be1e10b69cae2d09231 100644 --- a/src/ptbench/models/densenet.py +++ b/src/ptbench/models/densenet.py @@ -25,7 +25,6 @@ class Densenet(pl.LightningModule): Parameters ---------- - train_loss The loss to be used during the training. @@ -124,8 +123,7 @@ class Densenet(pl.LightningModule): Parameters ---------- - - checkpoint: + checkpoint Loaded checkpoint """ checkpoint["normalizer"] = self.normalizer @@ -136,8 +134,7 @@ class Densenet(pl.LightningModule): Parameters ---------- - - checkpoint: + checkpoint Loaded checkpoint """ logger.info("Restoring normalizer from checkpoint.") @@ -151,8 +148,7 @@ class Densenet(pl.LightningModule): Parameters ---------- - - dataloader: :py:class:`torch.utils.data.DataLoader` + dataloader A torch Dataloader from which to compute the mean and std. Will not be used if the model is pretrained. """ diff --git a/src/ptbench/models/normalizer.py b/src/ptbench/models/normalizer.py index 147efc7e02beac30bab6f79678b6860c3b42c810..576f21cc6db52c0a6f7bfbe9c59009194afc9e14 100644 --- a/src/ptbench/models/normalizer.py +++ b/src/ptbench/models/normalizer.py @@ -1,7 +1,7 @@ # SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> # # SPDX-License-Identifier: GPL-3.0-or-later -"""A network model that prefixes a subtract/divide step to any other module.""" +"""Functions to compute normalisation factors based on dataloaders.""" import torch import torch.nn diff --git a/src/ptbench/models/pasa.py b/src/ptbench/models/pasa.py index 3f10757fe2126b5be67ef577bccd8c8bbefe3fa5..5d6e20b4dd25c8ba6d57e3c681cdc2514af616ee 100644 --- a/src/ptbench/models/pasa.py +++ b/src/ptbench/models/pasa.py @@ -31,7 +31,6 @@ class Pasa(pl.LightningModule): Parameters ---------- - train_loss The loss to be used during the training. @@ -204,8 +203,7 @@ class Pasa(pl.LightningModule): Parameters ---------- - - checkpoint: + checkpoint Loaded checkpoint """ checkpoint["normalizer"] = self.normalizer @@ -216,8 +214,7 @@ class Pasa(pl.LightningModule): Parameters ---------- - - checkpoint: + checkpoint Loaded checkpoint """ logger.info("Restoring normalizer from checkpoint.") @@ -228,7 +225,6 @@ class Pasa(pl.LightningModule): Parameters ---------- - dataloader A torch Dataloader from which to compute the mean and std """ diff --git a/src/ptbench/scripts/train_analysis.py b/src/ptbench/scripts/train_analysis.py index 4061164f7112f363730e6d0af5318b3141bd74f5..6fd623f8d7c006c076ff71b73e97a122a63b91f6 100644 --- a/src/ptbench/scripts/train_analysis.py +++ b/src/ptbench/scripts/train_analysis.py @@ -24,12 +24,14 @@ def create_figures(df: pandas.DataFrame) -> list[plt.figure]: It is assumed that some metric names are of the form <metric>/<subset>. All subsets for a metric will be displayed on the same figure. + Parameters ---------- df: Pandas dataframe containing the data to plot. + Returns ------- @@ -115,17 +117,7 @@ def train_analysis( **_, ) -> None: """Creates a plot for each metric in the training logs and saves them in a - pdf file. - - Parameters - ---------- - - logdir: - Directory containing tensorboard event files. - - output_pdf: - The pdf file in which to save the plots. - """ + pdf file.""" from matplotlib.backends.backend_pdf import PdfPages diff --git a/src/ptbench/utils/checkpointer.py b/src/ptbench/utils/checkpointer.py index 67190e2f34de18e7399bd6179d86ac4468ab45ab..88cdfbb7412f9a89f0c2995b8ab4ebff797c698b 100644 --- a/src/ptbench/utils/checkpointer.py +++ b/src/ptbench/utils/checkpointer.py @@ -17,23 +17,25 @@ def get_checkpoint( Can return the best or last checkpoint, or a checkpoint at a specific path. Ensures the checkpoint exists, raising an error if it is not the case. - If resume_from is None, checks the output directory if a checkpoint already exists and returns it. - If no checkpoint is found, returns None. + If resume_from is ``None``, checks the output directory if a checkpoint + already exists and returns it. If no checkpoint is found, returns ``None``. + Parameters ---------- - output_folder: + output_folder Directory in which checkpoints are stored. - resume_from: + resume_from Which model to get. Can be one of "best", "last", or a path to a checkpoint. If None, gets the last checkpoint if it exists, otherwise returns None + Returns ------- - checkpoint_file: + checkpoint_file Path to the requested checkpoint or None. """ last_checkpoint_path = os.path.join(output_folder, "model_final_epoch.ckpt")