From 3c726c5506dfe7cfde8a6ac4c9487ee5dcd7aac5 Mon Sep 17 00:00:00 2001 From: dcarron <daniel.carron@idiap.ch> Date: Fri, 2 Feb 2024 17:45:45 +0100 Subject: [PATCH] [pre-commit] Start fixing numpydoc issues --- pyproject.toml | 28 ++++++++---- src/mednet/config/data/hivtb/datamodule.py | 25 ++++++++--- src/mednet/config/data/indian/datamodule.py | 17 ++++++- src/mednet/config/data/indian/fold_0.py | 4 +- .../config/data/montgomery/datamodule.py | 21 +++++++-- .../data/montgomery_shenzhen/datamodule.py | 8 +++- .../montgomery_shenzhen_indian/datamodule.py | 8 +++- .../datamodule.py | 10 ++++- .../datamodule.py | 14 ++++-- .../config/data/nih_cxr14/datamodule.py | 21 +++++++-- .../data/nih_cxr14_padchest/datamodule.py | 10 ++++- src/mednet/config/data/padchest/datamodule.py | 21 +++++++-- src/mednet/config/data/shenzhen/datamodule.py | 30 +++++++++++-- src/mednet/config/data/tbpoc/datamodule.py | 22 +++++++-- src/mednet/config/data/tbx11k/datamodule.py | 35 ++++++++++++--- .../data/tbx11k/make_splits_from_database.py | 6 +-- src/mednet/data/augmentations.py | 8 +--- src/mednet/data/datamodule.py | 32 ++++++------- src/mednet/data/split.py | 20 ++++----- src/mednet/data/typing.py | 6 +-- src/mednet/engine/callbacks.py | 6 +-- src/mednet/engine/device.py | 6 +-- src/mednet/engine/evaluator.py | 16 +++---- src/mednet/engine/loggers.py | 2 +- src/mednet/engine/predictor.py | 6 +-- src/mednet/engine/saliency/completeness.py | 8 ++-- src/mednet/engine/saliency/evaluator.py | 11 +++-- src/mednet/engine/saliency/generator.py | 10 ++--- .../engine/saliency/interpretability.py | 27 ++++++----- src/mednet/engine/saliency/viewer.py | 12 ++--- src/mednet/engine/trainer.py | 6 +-- src/mednet/models/alexnet.py | 2 +- src/mednet/models/densenet.py | 2 +- src/mednet/models/loss_weights.py | 4 +- src/mednet/models/normalizer.py | 6 +-- src/mednet/models/pasa.py | 2 +- src/mednet/models/separate.py | 6 +-- src/mednet/models/transforms.py | 4 +- src/mednet/scripts/cli.py | 2 +- src/mednet/scripts/click.py | 2 +- src/mednet/scripts/config.py | 12 ++--- src/mednet/scripts/database.py | 8 ++-- src/mednet/scripts/evaluate.py | 6 +-- src/mednet/scripts/experiment.py | 4 +- src/mednet/scripts/predict.py | 6 +-- src/mednet/scripts/saliency/completeness.py | 4 +- src/mednet/scripts/saliency/evaluate.py | 4 +- src/mednet/scripts/saliency/generate.py | 4 +- .../scripts/saliency/interpretability.py | 3 +- src/mednet/scripts/saliency/view.py | 4 +- src/mednet/scripts/train.py | 7 +-- src/mednet/scripts/train_analysis.py | 6 +-- src/mednet/scripts/utils.py | 2 +- src/mednet/utils/checkpointer.py | 6 +-- src/mednet/utils/rc.py | 2 +- src/mednet/utils/resources.py | 45 ++++++++----------- src/mednet/utils/summary.py | 2 +- src/mednet/utils/tensorboard.py | 2 +- tests/conftest.py | 31 +++++-------- tests/test_tbx11k.py | 10 +++-- 60 files changed, 407 insertions(+), 247 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 8d33d411..f2c14f40 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -260,12 +260,24 @@ junit_log_passing_tests = false [tool.numpydoc_validation] checks = [ - "all", # report on all checks, except the below - "PR04", - "EX01", - "SA01", - "ES01", - "GL01", - "GL08", - "SS06", + "all", # report on all checks, except the below + "ES01", # Not all functions require extended summaries + "EX01", # Not all functions require examples + "GL01", # Expects text to be on the line after the opening quotes but that is in direct opposition of the sphinx recommendations and conflicts with other pre-commit hooks. + "GL08", # Causes issues if we don't have a docstring at the top of the file. Disabling this might fail to catch actual missing docstrings. + "PR04", # numpydoc does not currently support PEP484 typehints, which we are using + "SA01", # We do not use Also sections + "SS06", # Summary will span multiple lines if too long because of reformatting by other hooks. +] + +exclude = [ # don't report on objects that match any of these regex + '\.__len__$', + '\.__getitem__$', + '\.__iter__$', +] +override_SS05 = [ # override SS05 to allow docstrings starting with these words + '^Process ', + '^Assess ', + '^Access ', + '^This', ] diff --git a/src/mednet/config/data/hivtb/datamodule.py b/src/mednet/config/data/hivtb/datamodule.py index 3ad4dc63..7a561733 100644 --- a/src/mednet/config/data/hivtb/datamodule.py +++ b/src/mednet/config/data/hivtb/datamodule.py @@ -1,7 +1,7 @@ # SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> # # SPDX-License-Identifier: GPL-3.0-or-later -"""HIV-TB dataset for computer-aided diagnosis (only BMP files) +"""HIV-TB dataset for computer-aided diagnosis (only BMP files). Database reference: [HIV-TB-2019]_ """ @@ -40,7 +40,7 @@ class RawDataLoader(_BaseRawDataLoader): ) def sample(self, sample: tuple[str, int]) -> Sample: - """Loads a single image sample from the disk. + """Load a single image sample from the disk. Parameters ---------- @@ -68,7 +68,7 @@ class RawDataLoader(_BaseRawDataLoader): return tensor, dict(label=sample[1], name=sample[0]) # type: ignore[arg-type] def label(self, sample: tuple[str, int]) -> int: - """Loads a single image sample label from the disk. + """Load a single image sample label from the disk. Parameters ---------- @@ -86,7 +86,17 @@ class RawDataLoader(_BaseRawDataLoader): def make_split(basename: str) -> DatabaseSplit: - """Returns a database split for the HIV-TB database.""" + """Return a database split for the HIV-TB database. + + Parameters + ---------- + basename + Name of the .json file containing the split to load. + + Returns + ------- + An instance of DatabaseSplit. + """ return JSONDatabaseSplit( importlib.resources.files(__name__.rsplit(".", 1)[0]).joinpath(basename) @@ -94,7 +104,7 @@ def make_split(basename: str) -> DatabaseSplit: class DataModule(CachingDataModule): - """HIV-TB dataset for computer-aided diagnosis (only BMP files) + """HIV-TB dataset for computer-aided diagnosis (only BMP files). * Database reference: [HIV-TB-2019]_ * Original resolution, varying with most images being 2048 x 2500 pixels @@ -121,6 +131,11 @@ class DataModule(CachingDataModule): * Grayscale, encoded as a single plane tensor, 32-bit floats, square at 2048 x 2048 pixels * Labels: 0 (healthy), 1 (active tuberculosis) + + Parameters + ---------- + split_filename + Name of the .json file containing the split to load. """ def __init__(self, split_filename: str): diff --git a/src/mednet/config/data/indian/datamodule.py b/src/mednet/config/data/indian/datamodule.py index c8b762fc..08a50722 100644 --- a/src/mednet/config/data/indian/datamodule.py +++ b/src/mednet/config/data/indian/datamodule.py @@ -19,7 +19,17 @@ database.""" def make_split(basename: str) -> DatabaseSplit: - """Returns a database split for the Indian database.""" + """Return a database split for the Indian database. + + Parameters + ---------- + basename + Name of the .json file containing the split to load. + + Returns + ------- + An instance of DatabaseSplit. + """ return JSONDatabaseSplit( importlib.resources.files(__name__.rsplit(".", 1)[0]).joinpath(basename) @@ -59,6 +69,11 @@ class DataModule(CachingDataModule): * Grayscale, encoded as a single plane tensor, 32-bit floats, square, with varying resolutions, depending on the input raw image * Labels: 0 (healthy), 1 (active tuberculosis) + + Parameters + ---------- + split_filename + Name of the .json file containing the split to load. """ def __init__(self, split_filename: str): diff --git a/src/mednet/config/data/indian/fold_0.py b/src/mednet/config/data/indian/fold_0.py index 2c94e91d..3f6d60e7 100644 --- a/src/mednet/config/data/indian/fold_0.py +++ b/src/mednet/config/data/indian/fold_0.py @@ -1,8 +1,8 @@ # SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> # # SPDX-License-Identifier: GPL-3.0-or-later -"""Indian collection dataset for computer-aided diagnosis (cross validation -fold 0). +"""Indian collection dataset for computer-aided diagnosis (cross validationfold +0). Database reference: [INDIAN-2013]_ diff --git a/src/mednet/config/data/montgomery/datamodule.py b/src/mednet/config/data/montgomery/datamodule.py index e2454827..e19f3200 100644 --- a/src/mednet/config/data/montgomery/datamodule.py +++ b/src/mednet/config/data/montgomery/datamodule.py @@ -39,7 +39,7 @@ class RawDataLoader(_BaseRawDataLoader): ) def sample(self, sample: tuple[str, int]) -> Sample: - """Loads a single image sample from the disk. + """Load a single image sample from the disk. Parameters ---------- @@ -67,7 +67,7 @@ class RawDataLoader(_BaseRawDataLoader): return tensor, dict(label=sample[1], name=sample[0]) # type: ignore[arg-type] def label(self, sample: tuple[str, int]) -> int: - """Loads a single image sample label from the disk. + """Load a single image sample label from the disk. Parameters ---------- @@ -85,7 +85,17 @@ class RawDataLoader(_BaseRawDataLoader): def make_split(basename: str) -> DatabaseSplit: - """Returns a database split for the Montgomery database.""" + """Return a database split for the Montgomery database. + + Parameters + ---------- + basename + Name of the .json file containing the split to load. + + Returns + ------- + An instance of DatabaseSplit. + """ return JSONDatabaseSplit( importlib.resources.files(__name__.rsplit(".", 1)[0]).joinpath(basename) @@ -123,6 +133,11 @@ class DataModule(CachingDataModule): * Grayscale, encoded as a single plane tensor, 32-bit floats, square at 4020 x 4020 pixels * Labels: 0 (healthy), 1 (active tuberculosis) + + Parameters + ---------- + split_filename + Name of the .json file containing the split to load. """ def __init__(self, split_filename: str): diff --git a/src/mednet/config/data/montgomery_shenzhen/datamodule.py b/src/mednet/config/data/montgomery_shenzhen/datamodule.py index 06100561..fa83fdde 100644 --- a/src/mednet/config/data/montgomery_shenzhen/datamodule.py +++ b/src/mednet/config/data/montgomery_shenzhen/datamodule.py @@ -10,7 +10,13 @@ from ..shenzhen.datamodule import make_split as make_shenzhen_split class DataModule(ConcatDataModule): - """Aggregated DataModule composed of Montgomery and Shenzhen datasets.""" + """Aggregated DataModule composed of Montgomery and Shenzhen datasets. + + Parameters + ---------- + split_filename + Name of the .json file containing the split to load. + """ def __init__(self, split_filename: str): montgomery_loader = MontgomeryLoader() diff --git a/src/mednet/config/data/montgomery_shenzhen_indian/datamodule.py b/src/mednet/config/data/montgomery_shenzhen_indian/datamodule.py index 303bf5e7..64b4d0f7 100644 --- a/src/mednet/config/data/montgomery_shenzhen_indian/datamodule.py +++ b/src/mednet/config/data/montgomery_shenzhen_indian/datamodule.py @@ -15,7 +15,13 @@ from ..shenzhen.datamodule import make_split as make_shenzhen_split class DataModule(ConcatDataModule): """Aggregated DataModule composed of Montgomery, Shenzhen and Indian - datasets.""" + datasets. + + Parameters + ---------- + split_filename + Name of the .json file containing the split to load. + """ def __init__(self, split_filename: str): montgomery_loader = MontgomeryLoader() diff --git a/src/mednet/config/data/montgomery_shenzhen_indian_padchest/datamodule.py b/src/mednet/config/data/montgomery_shenzhen_indian_padchest/datamodule.py index ee111c03..e30fac0a 100644 --- a/src/mednet/config/data/montgomery_shenzhen_indian_padchest/datamodule.py +++ b/src/mednet/config/data/montgomery_shenzhen_indian_padchest/datamodule.py @@ -17,7 +17,15 @@ from ..shenzhen.datamodule import make_split as make_shenzhen_split class DataModule(ConcatDataModule): """Aggregated DataModule composed of Montgomery, Shenzhen, Indian, and - PadChest datasets.""" + PadChest datasets. + + Parameters + ---------- + split_filename + Name of the .json file containing the split to load. + padchest_split_filename + Name of the .json file from padchest containing the split to load. + """ def __init__(self, split_filename: str, padchest_split_filename: str): montgomery_loader = MontgomeryLoader() diff --git a/src/mednet/config/data/montgomery_shenzhen_indian_tbx11k/datamodule.py b/src/mednet/config/data/montgomery_shenzhen_indian_tbx11k/datamodule.py index f83e4404..dbda251e 100644 --- a/src/mednet/config/data/montgomery_shenzhen_indian_tbx11k/datamodule.py +++ b/src/mednet/config/data/montgomery_shenzhen_indian_tbx11k/datamodule.py @@ -17,9 +17,17 @@ from ..tbx11k.datamodule import make_split as make_tbx11k_split class DataModule(ConcatDataModule): """Aggregated DataModule composed of Montgomery, Shenzhen, Indian, and - TBX11k datasets.""" + TBX11k datasets. - def __init__(self, split_filename: str, tbx11_split_filename: str): + Parameters + ---------- + split_filename + Name of the .json file containing the split to load. + tbx11k_split_filename + Name of the .json file from tbx11k containing the split to load. + """ + + def __init__(self, split_filename: str, tbx11k_split_filename: str): montgomery_loader = MontgomeryLoader() montgomery_split = make_montgomery_split(split_filename) shenzhen_loader = ShenzhenLoader() @@ -27,7 +35,7 @@ class DataModule(ConcatDataModule): indian_loader = IndianLoader() indian_split = make_indian_split(split_filename) tbx11k_loader = TBX11kLoader() - tbx11k_split = make_tbx11k_split(tbx11_split_filename) + tbx11k_split = make_tbx11k_split(tbx11k_split_filename) super().__init__( splits={ diff --git a/src/mednet/config/data/nih_cxr14/datamodule.py b/src/mednet/config/data/nih_cxr14/datamodule.py index 50fc6056..5967ee63 100644 --- a/src/mednet/config/data/nih_cxr14/datamodule.py +++ b/src/mednet/config/data/nih_cxr14/datamodule.py @@ -63,7 +63,7 @@ class RawDataLoader(_BaseRawDataLoader): ) def sample(self, sample: tuple[str, list[int]]) -> Sample: - """Loads a single image sample from the disk. + """Load a single image sample from the disk. Parameters ---------- @@ -102,7 +102,7 @@ class RawDataLoader(_BaseRawDataLoader): return tensor, dict(label=sample[1], name=sample[0]) # type: ignore[arg-type] def label(self, sample: tuple[str, list[int]]) -> list[int]: - """Loads a single image sample label from the disk. + """Load a single image sample label from the disk. Parameters ---------- @@ -120,7 +120,17 @@ class RawDataLoader(_BaseRawDataLoader): def make_split(basename: str) -> DatabaseSplit: - """Returns a database split for the NIH CXR-14 database.""" + """Return a database split for the NIH CXR-14 database. + + Parameters + ---------- + basename + Name of the .json file containing the split to load. + + Returns + ------- + An instance of DatabaseSplit. + """ return JSONDatabaseSplit( importlib.resources.files(__name__.rsplit(".", 1)[0]).joinpath(basename) @@ -171,6 +181,11 @@ class DataModule(CachingDataModule): * fibrosis * edema * consolidation + + Parameters + ---------- + split_filename + Name of the .json file containing the split to load. """ def __init__(self, split_filename: str): diff --git a/src/mednet/config/data/nih_cxr14_padchest/datamodule.py b/src/mednet/config/data/nih_cxr14_padchest/datamodule.py index 09fd623f..2c793c79 100644 --- a/src/mednet/config/data/nih_cxr14_padchest/datamodule.py +++ b/src/mednet/config/data/nih_cxr14_padchest/datamodule.py @@ -11,7 +11,15 @@ from ..padchest.datamodule import make_split as make_padchest_split class DataModule(ConcatDataModule): """Aggregated dataset composed of NIH CXR14 relabeld and PadChest - (normalized) datasets.""" + (normalized) datasets. + + Parameters + ---------- + cxr14_split_filename + Name of the .json file from crx14 containing the split to load. + padchest_split_filename + Name of the .json file from padchest containing the split to load. + """ def __init__(self, cxr14_split_filename: str, padchest_split_filename): cxr14_loader = CXR14Loader() diff --git a/src/mednet/config/data/padchest/datamodule.py b/src/mednet/config/data/padchest/datamodule.py index b40a9e13..94743f68 100644 --- a/src/mednet/config/data/padchest/datamodule.py +++ b/src/mednet/config/data/padchest/datamodule.py @@ -41,7 +41,7 @@ class RawDataLoader(_BaseRawDataLoader): ) def sample(self, sample: tuple[str, int | list[int]]) -> Sample: - """Loads a single image sample from the disk. + """Load a single image sample from the disk. Parameters ---------- @@ -69,7 +69,7 @@ class RawDataLoader(_BaseRawDataLoader): return tensor, dict(label=sample[1], name=sample[0]) # type: ignore[arg-type] def label(self, sample: tuple[str, int | list[int]]) -> int | list[int]: - """Loads a single image sample label from the disk. + """Load a single image sample label from the disk. Parameters ---------- @@ -87,7 +87,17 @@ class RawDataLoader(_BaseRawDataLoader): def make_split(basename: str) -> DatabaseSplit: - """Returns a database split for the NIH CXR-14 database.""" + """Return a database split for the NIH CXR-14 database. + + Parameters + ---------- + basename + Name of the .json file containing the split to load. + + Returns + ------- + An instance of DatabaseSplit. + """ return JSONDatabaseSplit( importlib.resources.files(__name__.rsplit(".", 1)[0]).joinpath(basename) @@ -321,6 +331,11 @@ class DataModule(CachingDataModule): * vertebral degenerative changes * vertebral fracture * volume loss + + Parameters + ---------- + split_filename + Name of the .json file containing the split to load. """ def __init__(self, split_filename: str): diff --git a/src/mednet/config/data/shenzhen/datamodule.py b/src/mednet/config/data/shenzhen/datamodule.py index c9ab8a64..4b04f871 100644 --- a/src/mednet/config/data/shenzhen/datamodule.py +++ b/src/mednet/config/data/shenzhen/datamodule.py @@ -27,7 +27,14 @@ database.""" class RawDataLoader(_BaseRawDataLoader): - """A specialized raw-data-loader for the Shenzhen dataset.""" + """A specialized raw-data-loader for the Shenzhen dataset. + + Parameters + ---------- + config_variable + Key to search for in the configuration file for the root directory of this + database. + """ datadir: str """This variable contains the base directory where the database raw data is @@ -41,7 +48,7 @@ class RawDataLoader(_BaseRawDataLoader): ) def sample(self, sample: tuple[str, int]) -> Sample: - """Loads a single image sample from the disk. + """Load a single image sample from the disk. Parameters ---------- @@ -71,7 +78,7 @@ class RawDataLoader(_BaseRawDataLoader): return tensor, dict(label=sample[1], name=sample[0]) # type: ignore[arg-type] def label(self, sample: tuple[str, int]) -> int: - """Loads a single image sample label from the disk. + """Load a single image sample label from the disk. Parameters ---------- @@ -89,7 +96,17 @@ class RawDataLoader(_BaseRawDataLoader): def make_split(basename: str) -> DatabaseSplit: - """Returns a database split for the Shenzhen database.""" + """Return a database split for the Shenzhen database. + + Parameters + ---------- + basename + Name of the .json file containing the split to load. + + Returns + ------- + An instance of DatabaseSplit. + """ return JSONDatabaseSplit( importlib.resources.files(__name__.rsplit(".", 1)[0]).joinpath(basename) @@ -128,6 +145,11 @@ class DataModule(CachingDataModule): * Grayscale, encoded as a single plane tensor, 32-bit floats, square with varying resolutions, depending on the input image * Labels: 0 (healthy), 1 (active tuberculosis) + + Parameters + ---------- + split_filename + Name of the .json file containing the split to load. """ def __init__(self, split_filename: str): diff --git a/src/mednet/config/data/tbpoc/datamodule.py b/src/mednet/config/data/tbpoc/datamodule.py index 05a2cfad..e9d55cef 100644 --- a/src/mednet/config/data/tbpoc/datamodule.py +++ b/src/mednet/config/data/tbpoc/datamodule.py @@ -35,7 +35,7 @@ class RawDataLoader(_BaseRawDataLoader): ) def sample(self, sample: tuple[str, int]) -> Sample: - """Loads a single image sample from the disk. + """Load a single image sample from the disk. Parameters ---------- @@ -63,7 +63,7 @@ class RawDataLoader(_BaseRawDataLoader): return tensor, dict(label=sample[1], name=sample[0]) # type: ignore[arg-type] def label(self, sample: tuple[str, int]) -> int: - """Loads a single image sample label from the disk. + """Load a single image sample label from the disk. Parameters ---------- @@ -72,7 +72,6 @@ class RawDataLoader(_BaseRawDataLoader): where to find the image to be loaded, and an integer, representing the sample label. - Returns ------- The integer label associated with the sample @@ -81,7 +80,17 @@ class RawDataLoader(_BaseRawDataLoader): def make_split(basename: str) -> DatabaseSplit: - """Returns a database split for the TB-POC database.""" + """Return a database split for the TB-POC database. + + Parameters + ---------- + basename + Name of the .json file containing the split to load. + + Returns + ------- + An instance of DatabaseSplit. + """ return JSONDatabaseSplit( importlib.resources.files(__name__.rsplit(".", 1)[0]).joinpath(basename) @@ -117,6 +126,11 @@ class DataModule(CachingDataModule): square with varying resolutions (2048 x 2048 being the maximum), but also depending on black borders' sizes on the input image. * Labels: 0 (healthy), 1 (active tuberculosis) + + Parameters + ---------- + split_filename + Name of the .json file containing the split to load. """ def __init__(self, split_filename: str): diff --git a/src/mednet/config/data/tbx11k/datamodule.py b/src/mednet/config/data/tbx11k/datamodule.py index dc721886..1efd1c02 100644 --- a/src/mednet/config/data/tbx11k/datamodule.py +++ b/src/mednet/config/data/tbx11k/datamodule.py @@ -48,7 +48,7 @@ class BoundingBox: height: int def area(self) -> int: - """Computes the bounding box area. + """Compute the bounding box area. Returns ------- @@ -66,7 +66,7 @@ class BoundingBox: return self.ymin + self.height - 1 def intersection(self, other: typing_extensions.Self) -> int: - """Computes the area intersection between bounding boxes. + """Compute the area intersection between bounding boxes. Notice that screen geometry dictates is slightly different from floating point metrics. Consider a 1D example for the evaluation of the @@ -97,7 +97,13 @@ class BoundingBox: class BoundingBoxes(collections.abc.Sequence[BoundingBox]): - """A collection of bounding boxes.""" + """A collection of bounding boxes. + + Parameters + ---------- + t + A sequence of BoundingBox. + """ def __init__(self, t: typing.Sequence[BoundingBox] = []): self.t = tuple(t) @@ -149,7 +155,7 @@ class RawDataLoader(_BaseRawDataLoader): ) def sample(self, sample: DatabaseSample) -> Sample: - """Loads a single image sample from the disk. + """Load a single image sample from the disk. Parameters ---------- @@ -178,7 +184,7 @@ class RawDataLoader(_BaseRawDataLoader): ) def label(self, sample: DatabaseSample) -> int: - """Loads a single image sample label from the disk. + """Load a single image sample label from the disk. Parameters ---------- @@ -196,7 +202,7 @@ class RawDataLoader(_BaseRawDataLoader): return sample[1] def bounding_boxes(self, sample: DatabaseSample) -> BoundingBoxes: - """Loads image annotated bounding-boxes from the disk. + """Load image annotated bounding-boxes from the disk. Parameters ---------- @@ -218,7 +224,17 @@ class RawDataLoader(_BaseRawDataLoader): def make_split(basename: str) -> DatabaseSplit: - """Returns a database split for the Montgomery database.""" + """Return a database split for the Montgomery database. + + Parameters + ---------- + basename + Name of the .json file containing the split to load. + + Returns + ------- + An instance of DatabaseSplit. + """ return JSONDatabaseSplit( importlib.resources.files(__name__.rsplit(".", 1)[0]).joinpath(basename) @@ -322,6 +338,11 @@ class DataModule(CachingDataModule): (512x512 pixels) - Labels: 0 (healthy, latent tb or sick but no tb depending on the protocol), 1 (active tuberculosis) + + Parameters + ---------- + split_filename + Name of the .json file containing the split to load. """ def __init__(self, split_filename: str): diff --git a/src/mednet/config/data/tbx11k/make_splits_from_database.py b/src/mednet/config/data/tbx11k/make_splits_from_database.py index 90238b07..4c09c375 100644 --- a/src/mednet/config/data/tbx11k/make_splits_from_database.py +++ b/src/mednet/config/data/tbx11k/make_splits_from_database.py @@ -59,7 +59,7 @@ from sklearn.model_selection import StratifiedKFold, train_test_split def reorder(data: dict) -> list: - """Reorders data from TBX11K into a sample-based organisation.""" + """Reorder data from TBX11K into a sample-based organisation.""" categories = {k["id"]: k["name"] for k in data["categories"]} assert len(set(categories.values())) == len( @@ -98,7 +98,7 @@ def reorder(data: dict) -> list: def normalize_labels(data: list) -> list: - """Decides on the final labels for each sample. + """Decide on the final labels for each sample. Categories are decided on the following principles: @@ -287,7 +287,7 @@ def create_v2_default_split(d: dict, seed: int, validation_size) -> dict: def create_folds( d: dict, n: int, seed: int, validation_size: float ) -> list[dict]: - """Creates folds from existing splits. + """Create folds from existing splits. Parameters ---------- diff --git a/src/mednet/data/augmentations.py b/src/mednet/data/augmentations.py index b76171c9..c985b01e 100644 --- a/src/mednet/data/augmentations.py +++ b/src/mednet/data/augmentations.py @@ -35,7 +35,7 @@ def _elastic_deformation_on_image( mode: str = "nearest", p: float = 1.0, ) -> torch.Tensor: - """Performs elastic deformation on an image. + """Perform elastic deformation on an image. This implementation is based on 2 scipy functions (:py:func:`scipy.ndimage.gaussian_filter` and @@ -134,7 +134,7 @@ def _elastic_deformation_on_batch( p: float = 1.0, pool: multiprocessing.pool.Pool | None = None, ) -> torch.Tensor: - """Performs elastic deformation on a batch of images. + """Perform elastic deformation on a batch of images. This implementation is based on 2 scipy functions (:py:func:`scipy.ndimage.gaussian_filter` and @@ -143,10 +143,6 @@ def _elastic_deformation_on_batch( Parameters ---------- - img - The input image to apply elastic deformation to. This image should - always have this shape: ``[C, H, W]``. It should always represent a - tensor on the CPU. alpha A multiplier for the gaussian filter outputs. sigma diff --git a/src/mednet/data/datamodule.py b/src/mednet/data/datamodule.py index 539bd621..62e07b98 100644 --- a/src/mednet/data/datamodule.py +++ b/src/mednet/data/datamodule.py @@ -46,7 +46,7 @@ def _sample_size_bytes(s: Sample) -> int: """ def _tensor_size_bytes(t: torch.Tensor) -> int: - """Returns a tensor size in bytes.""" + """Return a tensor size in bytes.""" return int(t.element_size() * torch.prod(torch.tensor(t.shape))) size = sys.getsizeof(s[0]) # tensor metadata @@ -100,7 +100,7 @@ class _DelayedLoadingDataset(Dataset): logger.info(f"Estimated sample size: {sample_size_mb:.1f} Mb") def labels(self) -> list[int | list[int]]: - """Returns the integer labels for all samples in the dataset.""" + """Return the integer labels for all samples in the dataset.""" return [self.loader.label(k) for k in self.raw_dataset] def __getitem__(self, key: int) -> Sample: @@ -207,7 +207,7 @@ class _CachedDataset(Dataset): ) def labels(self) -> list[int | list[int]]: - """Returns the integer labels for all samples in the dataset.""" + """Return the integer labels for all samples in the dataset.""" return [k[1]["label"] for k in self.data] def __getitem__(self, key: int) -> Sample: @@ -239,7 +239,7 @@ class _ConcatDataset(Dataset): ] def labels(self) -> list[int | list[int]]: - """Returns the integer labels for all samples in the dataset.""" + """Return the integer labels for all samples in the dataset.""" return list(itertools.chain(*[k.labels() for k in self._datasets])) def __getitem__(self, key: int) -> Sample: @@ -258,7 +258,7 @@ def _make_balanced_random_sampler( dataset: Dataset, target: str = "label", ) -> torch.utils.data.WeightedRandomSampler: - """Generates a pytorch sampler that samples according to class + """Generate a pytorch sampler that samples according to class probabilities. This function takes as input a torch Dataset, and computes the weights to @@ -577,7 +577,7 @@ class ConcatDataModule(lightning.LightningDataModule): @property def model_transforms(self) -> list[Transform] | None: - """Transforms required to fit data into the model. + """Transform required to fit data into the model. A list of transforms (torch modules) that will be applied after raw-data-loading. and just before data is fed into the model or @@ -635,7 +635,7 @@ class ConcatDataModule(lightning.LightningDataModule): self._train_sampler = None def set_chunk_size(self, batch_size: int, batch_chunk_count: int) -> None: - """Coherently sets the batch-chunk-size after validation. + """Coherently set the batch-chunk-size after validation. Parameters ---------- @@ -676,7 +676,7 @@ class ConcatDataModule(lightning.LightningDataModule): self._chunk_size = self._batch_size // self._batch_chunk_count def _setup_dataset(self, name: str) -> None: - """Sets-up a single dataset from the input data split. + """Set-up a single dataset from the input data split. Parameters ---------- @@ -733,13 +733,13 @@ class ConcatDataModule(lightning.LightningDataModule): self._datasets[name] = _ConcatDataset(datasets) def _val_dataset_keys(self) -> list[str]: - """Returns list of validation dataset names.""" + """Return list of validation dataset names.""" return ["validation"] + [ k for k in self.splits.keys() if k.startswith("monitor-") ] def setup(self, stage: str) -> None: - """Sets up datasets for different tasks on the pipeline. + """Set up datasets for different tasks on the pipeline. This method should setup (load, pre-process, etc) all datasets required for a particular ``stage`` (fit, validate, test, predict), and keep @@ -778,7 +778,7 @@ class ConcatDataModule(lightning.LightningDataModule): self._setup_dataset(k) def teardown(self, stage: str) -> None: - """Unsets-up datasets for different tasks on the pipeline. + """Unset-up datasets for different tasks on the pipeline. This method unsets (unload, remove from memory, etc) all datasets required for a particular ``stage`` (fit, validate, test, predict). @@ -801,7 +801,7 @@ class ConcatDataModule(lightning.LightningDataModule): super().teardown(stage) def train_dataloader(self) -> DataLoader: - """Returns the train data loader.""" + """Return the train data loader.""" return torch.utils.data.DataLoader( self._datasets["train"], @@ -814,7 +814,7 @@ class ConcatDataModule(lightning.LightningDataModule): ) def unshuffled_train_dataloader(self) -> DataLoader: - """Returns the train data loader without shuffling.""" + """Return the train data loader without shuffling.""" return torch.utils.data.DataLoader( self._datasets["train"], @@ -825,7 +825,7 @@ class ConcatDataModule(lightning.LightningDataModule): ) def val_dataloader(self) -> dict[str, DataLoader]: - """Returns the validation data loader(s)""" + """Return the validation data loader(s)""" validation_loader_opts = { "batch_size": self._chunk_size, @@ -843,7 +843,7 @@ class ConcatDataModule(lightning.LightningDataModule): } def test_dataloader(self) -> dict[str, DataLoader]: - """Returns the test data loader(s)""" + """Return the test data loader(s)""" return dict( test=torch.utils.data.DataLoader( @@ -857,7 +857,7 @@ class ConcatDataModule(lightning.LightningDataModule): ) def predict_dataloader(self) -> dict[str, DataLoader]: - """Returns the prediction data loader(s)""" + """Return the prediction data loader(s)""" return { k: torch.utils.data.DataLoader( diff --git a/src/mednet/data/split.py b/src/mednet/data/split.py index 7dc61f22..c6abae45 100644 --- a/src/mednet/data/split.py +++ b/src/mednet/data/split.py @@ -18,7 +18,7 @@ logger = logging.getLogger(__name__) class JSONDatabaseSplit(DatabaseSplit): - """Defines a loader that understands a database split (train, test, etc) in + """Define a loader that understands a database split (train, test, etc) in JSON format. To create a new database split, you need to provide a JSON formatted @@ -73,7 +73,7 @@ class JSONDatabaseSplit(DatabaseSplit): @functools.cached_property def _datasets(self) -> DatabaseSplit: - """Datasets in a split. + """Return the DatabaseSplits. The first call to this (cached) property will trigger full .json file loading from disk. Subsequent calls will be cached. @@ -93,20 +93,20 @@ class JSONDatabaseSplit(DatabaseSplit): return json.load(f) def __getitem__(self, key: str) -> typing.Sequence[typing.Any]: - """Accesses dataset ``key`` from this split.""" + """Access dataset ``key`` from this split.""" return self._datasets[key] def __iter__(self): - """Iterates over the datasets.""" + """Iterate over the datasets.""" return iter(self._datasets) def __len__(self) -> int: - """How many datasets we currently have.""" + """The number of datasets we currently have.""" return len(self._datasets) class CSVDatabaseSplit(DatabaseSplit): - """Defines a loader that understands a database split (train, test, etc) in + """Define a loader that understands a database split (train, test, etc) in CSV format. To create a new database split, you need to provide one or more CSV @@ -149,7 +149,7 @@ class CSVDatabaseSplit(DatabaseSplit): @functools.cached_property def _datasets(self) -> DatabaseSplit: - """Datasets in a split. + """Return the DatabaseSplits. The first call to this (cached) property will trigger all CSV file loading from disk. Subsequent calls will be cached. @@ -157,7 +157,7 @@ class CSVDatabaseSplit(DatabaseSplit): Returns ------- datasets : dict - A dictionary mapping dataset names to lists of JSON objects + A dictionary mapping dataset names to lists of JSON objects. """ retval: dict[str, typing.Sequence[typing.Any]] = {} @@ -180,11 +180,11 @@ class CSVDatabaseSplit(DatabaseSplit): return retval def __getitem__(self, key: str) -> typing.Sequence[typing.Any]: - """Accesses dataset ``key`` from this split.""" + """Accesse dataset ``key`` from this split.""" return self._datasets[key] def __iter__(self): - """Iterates over the datasets.""" + """Iterate over the datasets.""" return iter(self._datasets) def __len__(self) -> int: diff --git a/src/mednet/data/typing.py b/src/mednet/data/typing.py index 3102ecbc..658cad9d 100644 --- a/src/mednet/data/typing.py +++ b/src/mednet/data/typing.py @@ -25,11 +25,11 @@ class RawDataLoader: """A loader object can load samples and labels from storage.""" def sample(self, _: typing.Any) -> Sample: - """Loads whole samples from media.""" + """Load whole samples from media.""" raise NotImplementedError("You must implement the `sample()` method") def label(self, k: typing.Any) -> int | list[int]: - """Loads only sample label from media. + """Load only sample label from media. If you do not override this implementation, then, by default, this method will call :py:meth:`sample` to load the whole sample @@ -79,7 +79,7 @@ class Dataset(torch.utils.data.Dataset[Sample], typing.Iterable, typing.Sized): """ def labels(self) -> list[int | list[int]]: - """Returns the integer labels for all samples in the dataset.""" + """Return the integer labels for all samples in the dataset.""" raise NotImplementedError("You must implement the `labels()` method") diff --git a/src/mednet/engine/callbacks.py b/src/mednet/engine/callbacks.py index 90912a8c..fd0da884 100644 --- a/src/mednet/engine/callbacks.py +++ b/src/mednet/engine/callbacks.py @@ -66,7 +66,7 @@ class LoggingCallback(lightning.pytorch.Callback): This method is executed whenever you *start* training a module. Parameters - --------- + ---------- trainer The Lightning trainer object. pl_module @@ -92,7 +92,7 @@ class LoggingCallback(lightning.pytorch.Callback): face the consequences of slow training! Parameters - --------- + ---------- trainer The Lightning trainer object. pl_module @@ -218,7 +218,7 @@ class LoggingCallback(lightning.pytorch.Callback): face the consequences of slow training! Parameters - --------- + ---------- trainer The Lightning trainer object. pl_module diff --git a/src/mednet/engine/device.py b/src/mednet/engine/device.py index d751fae5..7f989523 100644 --- a/src/mednet/engine/device.py +++ b/src/mednet/engine/device.py @@ -21,7 +21,7 @@ SupportedPytorchDevice: typing.TypeAlias = typing.Literal[ def _split_int_list(s: str) -> list[int]: - """Splits a list of integers encoded in a string (e.g. "1,2,3") into a + """Split a list of integers encoded in a string (e.g. "1,2,3") into a Python list of integers (e.g. ``[1, 2, 3]``).""" return [int(k.strip()) for k in s.split(",")] @@ -112,7 +112,7 @@ class DeviceManager: ) def torch_device(self) -> torch.device: - """Returns a representation of the torch device to use by default. + """Return a representation of the torch device to use by default. .. warning:: @@ -140,7 +140,7 @@ class DeviceManager: ) def lightning_accelerator(self) -> tuple[str, int | list[int] | str]: - """Returns the lightning accelerator setup. + """Return the lightning accelerator setup. Returns ------- diff --git a/src/mednet/engine/evaluator.py b/src/mednet/engine/evaluator.py index 51f5ed55..cd242600 100644 --- a/src/mednet/engine/evaluator.py +++ b/src/mednet/engine/evaluator.py @@ -24,7 +24,7 @@ logger = logging.getLogger(__name__) def eer_threshold(predictions: Iterable[BinaryPrediction]) -> float: - """Calculates the (approximate) threshold leading to the equal error rate. + """Calculate the (approximate) threshold leading to the equal error rate. Parameters ---------- @@ -80,7 +80,7 @@ def _get_centered_maxf1( def maxf1_threshold(predictions: Iterable[BinaryPrediction]) -> float: - """Calculates the threshold leading to the maximum F1-score on a precision- + """Calculate the threshold leading to the maximum F1-score on a precision- recall curve. Parameters @@ -118,7 +118,7 @@ def _score_plot( title: str, threshold: float, ) -> matplotlib.figure.Figure: - """Plots the normalized score distributions for all systems. + """Plot the normalized score distributions for all systems. Parameters ---------- @@ -193,7 +193,7 @@ def run_binary( dict[str, matplotlib.figure.Figure], dict[str, typing.Any], ]: - """Runs inference and calculates measures for binary classification. + """Run inference and calculates measures for binary classification. Parameters ---------- @@ -281,7 +281,7 @@ def run_binary( def aggregate_summaries( data: typing.Sequence[typing.Mapping[str, typing.Any]], fmt: str ) -> str: - """Tabulates summaries from multiple splits. + """Tabulate summaries from multiple splits. This function can properly tabulate the various summaries produced for all the splits in a prediction database. @@ -309,7 +309,7 @@ def aggregate_roc( data: typing.Mapping[str, typing.Any], title: str = "ROC", ) -> matplotlib.figure.Figure: - """Aggregates ROC curves from multiple splits. + """Aggregate ROC curves from multiple splits. This function produces a single ROC plot for multiple curves generated per split. @@ -391,7 +391,7 @@ def aggregate_roc( def _precision_recall_canvas() -> ( Iterator[tuple[matplotlib.figure.Figure, matplotlib.figure.Axes]] ): - """Generates a canvas to draw precision-recall curves. + """Generate a canvas to draw precision-recall curves. Works like a context manager, yielding a figure and an axes set in which the precision-recall curves should be added to. The figure already @@ -462,7 +462,7 @@ def aggregate_pr( data: typing.Mapping[str, typing.Any], title: str = "Precision-Recall Curve", ) -> matplotlib.figure.Figure: - """Aggregates PR curves from multiple splits. + """Aggregate PR curves from multiple splits. This function produces a single Precision-Recall plot for multiple curves generated per split. The plot will be annotated with F1-score iso-lines (in diff --git a/src/mednet/engine/loggers.py b/src/mednet/engine/loggers.py index d597294b..8e14f774 100644 --- a/src/mednet/engine/loggers.py +++ b/src/mednet/engine/loggers.py @@ -43,7 +43,7 @@ class CustomTensorboardLogger(TensorBoardLogger): passed then logs are saved in ``/save_dir/name/version/sub_dir/``. Defaults to ``None`` in which logs are saved in ``/save_dir/name/version/``. - \**kwargs: + \**kwargs Additional arguments used by :py:class:`tensorboardX.SummaryWriter` can be passed as keyword arguments in this logger. To automatically flush to disk, ``max_queue`` sets the size of the queue for pending logs before diff --git a/src/mednet/engine/predictor.py b/src/mednet/engine/predictor.py index db7b6ac1..ae9ef271 100644 --- a/src/mednet/engine/predictor.py +++ b/src/mednet/engine/predictor.py @@ -31,14 +31,14 @@ def run( | MultiClassPredictionSplit | None ): - """Runs inference on input data, outputs csv files with predictions. + """Run inference on input data, outputs csv files with predictions. Parameters - --------- + ---------- model Neural network model (e.g. pasa). datamodule - The lightning DataModule to use for training **and** validation + The lightning DataModule to use for training **and** validation. device_manager An internal device representation, to be used for training and validation. This representation can be converted into a pytorch device diff --git a/src/mednet/engine/saliency/completeness.py b/src/mednet/engine/saliency/completeness.py index 6d9cf7b2..46ac77fa 100644 --- a/src/mednet/engine/saliency/completeness.py +++ b/src/mednet/engine/saliency/completeness.py @@ -43,7 +43,7 @@ def _calculate_road_scores( saliency_map_callable: typing.Callable, percentiles: typing.Sequence[int], ) -> tuple[float, float, float]: - """Calculates average ROAD scores for different removal percentiles. + """Calculate average ROAD scores for different removal percentiles. This function calculates ROAD scores by averaging the scores for different removal (hardcoded) percentiles, for a single input image, a @@ -137,7 +137,7 @@ def _process_sample( "all" or "highest". "highest" is default, which means only saliency maps for the class with the highest activation will be generated. - positive only + positive_only If set, and the model chosen has a single output (binary), then saliency maps will only be generated for samples of the positive class. percentiles @@ -213,7 +213,7 @@ def run( percentiles: typing.Sequence[int], parallel: int, ) -> dict[str, list[typing.Any]]: - """Evaluates ROAD scores for all samples in a DataModule. + """Evaluate ROAD scores for all samples in a DataModule. The ROAD algorithm was first described in [ROAD-2022]_. It estimates explainability (in the completeness sense) of saliency maps by substituting @@ -231,7 +231,7 @@ def run( sample for a particular saliency mapping algorithm. Parameters - --------- + ---------- model Neural network model (e.g. pasa). datamodule diff --git a/src/mednet/engine/saliency/evaluator.py b/src/mednet/engine/saliency/evaluator.py index 3f2afbba..0941dc60 100644 --- a/src/mednet/engine/saliency/evaluator.py +++ b/src/mednet/engine/saliency/evaluator.py @@ -16,8 +16,7 @@ def _reconcile_metrics( completeness: list, interpretability: list, ) -> list[tuple[str, int, float, float, float]]: - """Summarizes samples into a new table containing the most important - scores. + """Summarize samples into a new table containing the most important scores. It returns a list containing a table with completeness and ROAD scores per sample, for the selected dataset. Only samples for which a completness and @@ -81,7 +80,7 @@ def _make_histogram( xlim: tuple[float, float] | None = None, title: None | str = None, ) -> matplotlib.figure.Figure: - """Builds an histogram of values. + """Build an histogram of values. Parameters ---------- @@ -142,7 +141,7 @@ def _make_histogram( def summary_table( summary: dict[SaliencyMapAlgorithm, dict[str, typing.Any]], fmt: str ) -> str: - """Tabulates various summaries into one table. + """Tabulate various summaries into one table. Parameters ---------- @@ -185,7 +184,7 @@ def _extract_statistics( dataset: str, xlim: tuple[float, float] | None = None, ) -> dict[str, typing.Any]: - """Extracts all meaningful statistics from a reconciled statistics set. + """Extract all meaningful statistics from a reconciled statistics set. Parameters ---------- @@ -246,7 +245,7 @@ def run( completeness: dict[str, list], interpretability: dict[str, list], ) -> dict[str, typing.Any]: - """Evaluates multiple saliency map algorithms and produces summarized + """Evaluate multiple saliency map algorithms and produces summarized results. Parameters diff --git a/src/mednet/engine/saliency/generator.py b/src/mednet/engine/saliency/generator.py index 6ce13812..65b4b62d 100644 --- a/src/mednet/engine/saliency/generator.py +++ b/src/mednet/engine/saliency/generator.py @@ -24,7 +24,7 @@ def _create_saliency_map_callable( target_layers: list[torch.nn.Module] | None, use_cuda: bool, ): - """Creates a class activation map (CAM) instance for a given model. + """Create a class activation map (CAM) instance for a given model. Parameters ---------- @@ -105,7 +105,7 @@ def _save_saliency_map( """Helper function to save a saliency map to disk. Parameters - --------- + ---------- output_folder Directory in which the resulting saliency maps will be saved. name @@ -129,11 +129,11 @@ def run( positive_only: bool, output_folder: pathlib.Path, ) -> None: - """Applies saliency mapping techniques on input CXR, outputs pickled - saliency maps directly to disk. + """Apply saliency mapping techniques on input CXR, outputs pickled saliency + maps directly to disk. Parameters - --------- + ---------- model Neural network model (e.g. pasa). datamodule diff --git a/src/mednet/engine/saliency/interpretability.py b/src/mednet/engine/saliency/interpretability.py index 6611d157..aeca5b6f 100644 --- a/src/mednet/engine/saliency/interpretability.py +++ b/src/mednet/engine/saliency/interpretability.py @@ -29,7 +29,7 @@ def _ordered_connected_components( saliency_map: SaliencyMap, threshold: float, ) -> list[BinaryMask]: - """Calculates the largest connected components available on a saliency map + """Calculate the largest connected components available on a saliency map and return those as individual masks. This implementation is based on [SCORECAM-2020]_: @@ -81,7 +81,7 @@ def _ordered_connected_components( def _extract_bounding_box( mask: BinaryMask, ) -> BoundingBox: - """Defines a bounding box surrounding a connected component mask. + """Define a bounding box surrounding a connected component mask. Parameters ---------- @@ -103,7 +103,7 @@ def _compute_max_iou_and_ioda( detected_box: BoundingBox, gt_bboxes: BoundingBoxes, ) -> tuple[float, float]: - """Will calculate how much of detected area lies in ground truth boxes. + """Calculate how much of detected area lies in ground truth boxes. If there are multiple gt boxes, the detected area will be calculated for each gt box separately and the gt box with the highest @@ -153,7 +153,7 @@ def _get_largest_bounding_boxes( n: int, threshold: float = 0.2, ) -> list[BoundingBox]: - """Returns the N largest connected components as bounding boxes in a + """Return the N largest connected components as bounding boxes in a saliency map. The return of values is subject to the value of ``threshold`` applied, as @@ -192,8 +192,7 @@ def _compute_simultaneous_iou_and_ioda( detected_box: BoundingBox, gt_bboxes: BoundingBoxes, ) -> tuple[float, float]: - """Will calculate how much of detected area lies between ground truth - boxes. + """Calculate how much of detected area lies between ground truth boxes. This means that if there are multiple gt boxes, the detected area will be compared to them simultaneously (and not to each gt box @@ -230,7 +229,7 @@ def _compute_iou_ioda_from_largest_bbox( gt_bboxes: BoundingBoxes, saliency_map: SaliencyMap, ) -> tuple[float, float]: - """Calculates the metrics for a single sample. + """Calculate the metrics for a single sample. Parameters ---------- @@ -258,8 +257,8 @@ def _compute_avg_saliency_focus( saliency_map: SaliencyMap, gt_mask: BinaryMask, ) -> float: - """Integrates the saliency map over the ground-truth boxes and normalizes - by total bounding-box area. + """Integrate the saliency map over the ground-truth boxes and normalizes by + total bounding-box area. This function will integrate (sum) the value of the saliency map over the ground-truth bounding boxes and normalize it by the total area covered by @@ -291,7 +290,7 @@ def _compute_proportional_energy( saliency_map: SaliencyMap, gt_mask: BinaryMask, ) -> float: - """Calculates how much activation lies within the ground truth boxes + """Calculate how much activation lies within the ground truth boxes compared to the total sum of the activations (integral). Parameters @@ -321,7 +320,7 @@ def _compute_binary_mask( gt_bboxes: BoundingBoxes, saliency_map: SaliencyMap, ) -> BinaryMask: - """Computes a binary mask for the saliency map using BoundingBoxes. + """Compute a binary mask for the saliency map using BoundingBoxes. The binary_mask will be ON/True where the gt boxes are located. @@ -355,7 +354,7 @@ def _process_sample( gt_bboxes: BoundingBoxes, saliency_map: SaliencyMap, ) -> tuple[float, float]: - """Calculates the metrics for a single sample. + """Calculate the metrics for a single sample. Parameters ---------- @@ -387,11 +386,11 @@ def run( target_label: int, datamodule: lightning.pytorch.LightningDataModule, ) -> dict[str, list[typing.Any]]: - """Computes the proportional energy and average saliency focus for a given + """Compute the proportional energy and average saliency focus for a given target label in a DataModule. Parameters - --------- + ---------- input_folder Directory in which the saliency maps are stored for a specific visualization type. diff --git a/src/mednet/engine/saliency/viewer.py b/src/mednet/engine/saliency/viewer.py index 4e4c6890..6e383801 100644 --- a/src/mednet/engine/saliency/viewer.py +++ b/src/mednet/engine/saliency/viewer.py @@ -53,7 +53,7 @@ def _overlay_saliency_map( ], image_weight: float, ) -> PIL.Image.Image: - """Creates an overlayed represention of the saliency map on the original + """Create an overlayed represention of the saliency map on the original image. This is a slightly modified version of the show_cam_on_image implementation in: @@ -113,12 +113,12 @@ def _overlay_bounding_box( color: str, width: int, ) -> PIL.Image.Image: - """Draws ground-truth on the input image. + """Draw ground-truth on the input image. Parameters ---------- image - The input image that will be overlayed with the saliency map + The input image that will be overlayed with the saliency map. bbox The bounding box to draw on the input image. color @@ -149,8 +149,8 @@ def _process_sample( saliencies: numpy.typing.NDArray[numpy.double], ground_truth: BoundingBoxes, ) -> PIL.Image.Image: - """Generates an overlayed representation of the original sample and - saliency maps. + """Generate an overlayed representation of the original sample and saliency + maps. Parameters ---------- @@ -195,7 +195,7 @@ def run( show_groundtruth: bool, threshold: float, ): - """Overlays saliency maps on CXR to output final images with heatmaps. + """Overlay saliency maps on CXR to output final images with heatmaps. Parameters ---------- diff --git a/src/mednet/engine/trainer.py b/src/mednet/engine/trainer.py index c338d7f4..3da73a11 100644 --- a/src/mednet/engine/trainer.py +++ b/src/mednet/engine/trainer.py @@ -30,7 +30,7 @@ def save_model_summary( output_folder: pathlib.Path, model: torch.nn.Module, ) -> tuple[lightning.pytorch.callbacks.ModelSummary, int]: - """Saves a little summary of the model in a txt file. + """Save a little summary of the model in a txt file. Parameters ---------- @@ -64,7 +64,7 @@ def static_information_to_csv( device_type: SupportedPytorchDevice, model_size: int, ) -> None: - """Saves the static information in a CSV file. + """Save the static information in a CSV file. Parameters ---------- @@ -115,7 +115,7 @@ def run( batch_chunk_count: int, checkpoint: pathlib.Path | None, ): - """Fits a CNN model using supervised learning and save it to disk. + """Fit a CNN model using supervised learning and save it to disk. This method supports periodic checkpointing and the output of a CSV-formatted log with the evolution of some figures during training. diff --git a/src/mednet/models/alexnet.py b/src/mednet/models/alexnet.py index 9c990d84..0b2d9e18 100644 --- a/src/mednet/models/alexnet.py +++ b/src/mednet/models/alexnet.py @@ -138,7 +138,7 @@ class Alexnet(pl.LightningModule): self.normalizer = checkpoint["normalizer"] def set_normalizer(self, dataloader: torch.utils.data.DataLoader) -> None: - """Initializes the normalizer for the current model. + """Initialize the normalizer for the current model. This function is NOOP if ``pretrained = True`` (normalizer set to imagenet weights, during contruction). diff --git a/src/mednet/models/densenet.py b/src/mednet/models/densenet.py index 19bf75b3..2bd6a2dd 100644 --- a/src/mednet/models/densenet.py +++ b/src/mednet/models/densenet.py @@ -138,7 +138,7 @@ class Densenet(pl.LightningModule): self.normalizer = checkpoint["normalizer"] def set_normalizer(self, dataloader: torch.utils.data.DataLoader) -> None: - """Initializes the normalizer for the current model. + """Initialize the normalizer for the current model. This function is NOOP if ``pretrained = True`` (normalizer set to imagenet weights, during contruction). diff --git a/src/mednet/models/loss_weights.py b/src/mednet/models/loss_weights.py index c0da89ff..08ac2d32 100644 --- a/src/mednet/models/loss_weights.py +++ b/src/mednet/models/loss_weights.py @@ -15,7 +15,7 @@ logger = logging.getLogger(__name__) def _get_label_weights( dataloader: torch.utils.data.DataLoader, ) -> torch.Tensor: - """Computes the weights of each class of a DataLoader. + """Compute the weights of each class of a DataLoader. This function inputs a pytorch DataLoader and computes the ratio between number of negative and positive samples (scalar). The weight can be used @@ -69,7 +69,7 @@ def _get_label_weights( def make_balanced_bcewithlogitsloss( dataloader: DataLoader, ) -> torch.nn.BCEWithLogitsLoss: - """Returns a balanced binary-cross-entropy loss. + """Return a balanced binary-cross-entropy loss. The loss is weighted using the ratio between positives and total examples available. diff --git a/src/mednet/models/normalizer.py b/src/mednet/models/normalizer.py index 6bdce4fc..6e09f51e 100644 --- a/src/mednet/models/normalizer.py +++ b/src/mednet/models/normalizer.py @@ -13,7 +13,7 @@ import tqdm def make_z_normalizer( dataloader: torch.utils.data.DataLoader, ) -> torchvision.transforms.Normalize: - """Computes mean and standard deviation from a dataloader. + """Compute mean and standard deviation from a dataloader. This function will input a dataloader, and compute the mean and standard deviation by image channel. It will work for both monochromatic, and color @@ -21,7 +21,7 @@ def make_z_normalizer( Parameters ---------- - dataloader: + dataloader A torch Dataloader from which to compute the mean and std. Returns @@ -55,7 +55,7 @@ def make_z_normalizer( def make_imagenet_normalizer() -> torchvision.transforms.Normalize: - """Returns the stock ImageNet normalisation weights from torchvision. + """Return the stock ImageNet normalisation weights from torchvision. The weights are wrapped in a torch module. This normalizer only works for **RGB (color) images**. diff --git a/src/mednet/models/pasa.py b/src/mednet/models/pasa.py index 0125b0e0..9007ab1b 100644 --- a/src/mednet/models/pasa.py +++ b/src/mednet/models/pasa.py @@ -224,7 +224,7 @@ class Pasa(pl.LightningModule): self.normalizer = checkpoint["normalizer"] def set_normalizer(self, dataloader: torch.utils.data.DataLoader) -> None: - """Initializes the input normalizer for the current model. + """Initialize the input normalizer for the current model. Parameters ---------- diff --git a/src/mednet/models/separate.py b/src/mednet/models/separate.py index 6238dff0..9e5c0ee7 100644 --- a/src/mednet/models/separate.py +++ b/src/mednet/models/separate.py @@ -14,8 +14,8 @@ from .typing import BinaryPrediction, MultiClassPrediction def _as_predictions( samples: typing.Iterable[Sample], ) -> list[BinaryPrediction | MultiClassPrediction]: - """Takes a list of separated batch predictions and transforms it into a - list of formal predictions. + """Take a list of separated batch predictions and transforms it into a list + of formal predictions. Parameters ---------- @@ -31,7 +31,7 @@ def _as_predictions( def separate(batch: Sample) -> list[BinaryPrediction | MultiClassPrediction]: - """Separates a collated batch, reconstituting its samples. + """Separate a collated batch, reconstituting its samples. This function implements the inverse of :py:func:`torch.utils.data.default_collate`, and can separate, into diff --git a/src/mednet/models/transforms.py b/src/mednet/models/transforms.py index 14c7205a..55312665 100644 --- a/src/mednet/models/transforms.py +++ b/src/mednet/models/transforms.py @@ -9,7 +9,7 @@ import torchvision.transforms.functional def grayscale_to_rgb(img: torch.Tensor) -> torch.Tensor: - """Converts an image in grayscale to RGB. + """Convert an image in grayscale to RGB. If the image is already in RGB format, then this is a NOOP - the same tensor is returned (no cloning). If the image is in grayscale format @@ -50,7 +50,7 @@ def grayscale_to_rgb(img: torch.Tensor) -> torch.Tensor: def rgb_to_grayscale(img: torch.Tensor) -> torch.Tensor: - """Converts an image in RGB to grayscale. + """Convert an image in RGB to grayscale. If the image is already in grayscale format, then this is a NOOP - the same tensor is returned (no cloning). If the image is in RGB format diff --git a/src/mednet/scripts/cli.py b/src/mednet/scripts/cli.py index e8f7ba24..3c44ea0a 100644 --- a/src/mednet/scripts/cli.py +++ b/src/mednet/scripts/cli.py @@ -40,7 +40,7 @@ cli.add_command( context_settings=dict(help_option_names=["-?", "-h", "--help"]), ) def saliency(): - """Sub-commands to generate, evaluate and view saliency maps.""" + """The sub-commands to generate, evaluate and view saliency maps.""" pass diff --git a/src/mednet/scripts/click.py b/src/mednet/scripts/click.py index 84606e60..7f8830d6 100644 --- a/src/mednet/scripts/click.py +++ b/src/mednet/scripts/click.py @@ -15,7 +15,7 @@ class ConfigCommand(_BaseConfigCommand): def format_epilog( self, _: click.core.Context, formatter: click.formatting.HelpFormatter ) -> None: - """Formats the command epilog during --help. + """Format the command epilog during --help. Parameters ---------- diff --git a/src/mednet/scripts/config.py b/src/mednet/scripts/config.py index 14abfe9a..9494cda4 100644 --- a/src/mednet/scripts/config.py +++ b/src/mednet/scripts/config.py @@ -16,7 +16,7 @@ logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") @click.group(cls=AliasedGroup) def config(): - """Commands for listing, describing and copying configuration resources.""" + """Command for listing, describing and copying configuration resources.""" pass @@ -43,7 +43,7 @@ def config(): ) @verbosity_option(logger=logger) def list(verbose) -> None: - """Lists configuration files installed.""" + """List configuration files installed.""" entry_points = importlib.metadata.entry_points().select( group="mednet.config" ) @@ -98,7 +98,7 @@ def list(verbose) -> None: epilog="""Examples: \b - 1. Describes the Montgomery dataset configuration: + 1. Describe the Montgomery dataset configuration: .. code:: sh @@ -106,7 +106,7 @@ def list(verbose) -> None: \b - 2. Describes the Montgomery dataset configuration and lists its + 2. Describe the Montgomery dataset configuration and lists its contents: .. code:: sh @@ -122,7 +122,7 @@ def list(verbose) -> None: ) @verbosity_option(logger=logger) def describe(name, verbose) -> None: - """Describes a specific configuration file.""" + """Describe a specific configuration file.""" entry_points = importlib.metadata.entry_points().select( group="mednet.config" ) @@ -152,7 +152,7 @@ def describe(name, verbose) -> None: epilog="""Examples: \b - 1. Makes a copy of one of the stock configuration files locally, so it can be + 1. Make a copy of one of the stock configuration files locally, so it can be adapted: .. code:: sh diff --git a/src/mednet/scripts/database.py b/src/mednet/scripts/database.py index f58a2746..dd3134f1 100644 --- a/src/mednet/scripts/database.py +++ b/src/mednet/scripts/database.py @@ -11,7 +11,7 @@ logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") def _get_raw_databases() -> dict[str, dict[str, str]]: - """Returns a list of all supported (raw) databases. + """Return a list of all supported (raw) databases. Returns ------- @@ -57,7 +57,7 @@ def _get_raw_databases() -> dict[str, dict[str, str]]: @click.group(cls=AliasedGroup) def database() -> None: - """Commands for listing and verifying databases installed.""" + """Command for listing and verifying databases installed.""" pass @@ -90,7 +90,7 @@ def database() -> None: ) @verbosity_option(logger=logger, expose_value=False) def list(): - """Lists all supported and configured databases.""" + """List all supported and configured databases.""" config = _get_raw_databases() click.echo("Available databases:") @@ -132,7 +132,7 @@ def list(): ) @verbosity_option(logger=logger, expose_value=False) def check(split, limit): - """Checks file access on one or more DataModules.""" + """Check file access on one or more DataModules.""" import importlib.metadata import sys diff --git a/src/mednet/scripts/evaluate.py b/src/mednet/scripts/evaluate.py index 25976eef..657e2327 100644 --- a/src/mednet/scripts/evaluate.py +++ b/src/mednet/scripts/evaluate.py @@ -22,13 +22,13 @@ logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") cls=ConfigCommand, epilog="""Examples: -1. Runs evaluation on an existing prediction output: +1. Run evaluation on an existing prediction output: .. code:: sh mednet evaluate -vv --predictions=path/to/predictions.json --output-folder=path/to/results -2. Runs evaluation on an existing prediction output, tune threshold a priori on the `validation` set: +2. Run evaluation on an existing prediction output, tune threshold a priori on the `validation` set: .. code:: sh @@ -79,7 +79,7 @@ def evaluate( threshold: str | float, **_, # ignored ) -> None: - """Evaluates predictions (from a model) on a classification task.""" + """Evaluate predictions (from a model) on a classification task.""" import json import typing diff --git a/src/mednet/scripts/experiment.py b/src/mednet/scripts/experiment.py index 6ea6cffb..6c83e926 100644 --- a/src/mednet/scripts/experiment.py +++ b/src/mednet/scripts/experiment.py @@ -21,7 +21,7 @@ logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") epilog="""Examples: \b - 1. Trains a pasa model with montgomery dataset, on the CPU, for only two + 1. Train a pasa model with montgomery dataset, on the CPU, for only two epochs, then runs inference and evaluation on stock datasets, report performance as a table and figures: @@ -51,7 +51,7 @@ def experiment( balance_classes, **_, ): - """Runs a complete experiment, from training, to prediction and evaluation. + """Run a complete experiment, from training, to prediction and evaluation. This script is just a wrapper around the individual scripts for training, running prediction, and evaluating. It organises the output in a preset way:: diff --git a/src/mednet/scripts/predict.py b/src/mednet/scripts/predict.py index 34cf93aa..664da885 100644 --- a/src/mednet/scripts/predict.py +++ b/src/mednet/scripts/predict.py @@ -19,13 +19,13 @@ logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") cls=ConfigCommand, epilog="""Examples: -1. Runs prediction on an existing DataModule configuration: +1. Run prediction on an existing DataModule configuration: .. code:: sh mednet predict -vv pasa montgomery --weight=path/to/model.ckpt --output=path/to/predictions.json -2. Enables multi-processing data loading with 6 processes: +2. Enable multi-processing data loading with 6 processes: .. code:: sh @@ -125,7 +125,7 @@ def predict( parallel, **_, ) -> None: - """Runs inference (generates scores) on all input images, using a pre- + """Run inference (generates scores) on all input images, using a pre- trained model.""" import json diff --git a/src/mednet/scripts/saliency/completeness.py b/src/mednet/scripts/saliency/completeness.py index 1d9d7172..5ce8578d 100644 --- a/src/mednet/scripts/saliency/completeness.py +++ b/src/mednet/scripts/saliency/completeness.py @@ -21,7 +21,7 @@ logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") cls=ConfigCommand, epilog="""Examples: -1. Calculates the ROAD scores for an existing dataset configuration and stores them in .json files: +1. Calculate the ROAD scores for an existing dataset configuration and stores them in .json files: .. code:: sh @@ -173,7 +173,7 @@ def completeness( percentile, **_, ) -> None: - """Evaluates saliency map algorithm completeness using RemOve And Debias + """Evaluate saliency map algorithm completeness using RemOve And Debias (ROAD). For the selected saliency map algorithm, evaluates the completeness of diff --git a/src/mednet/scripts/saliency/evaluate.py b/src/mednet/scripts/saliency/evaluate.py index 4b00cc50..d5b75336 100644 --- a/src/mednet/scripts/saliency/evaluate.py +++ b/src/mednet/scripts/saliency/evaluate.py @@ -24,7 +24,7 @@ logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") cls=ConfigCommand, epilog="""Examples: -1. Tabulates and generates plots for two saliency map algorithms: +1. Tabulate and generates plots for two saliency map algorithms: .. code:: sh @@ -77,7 +77,7 @@ def evaluate( output_folder, **_, # ignored ) -> None: - """Calculates summary statistics for a saliency map algorithm.""" + """Calculate summary statistics for a saliency map algorithm.""" import json from matplotlib.backends.backend_pdf import PdfPages diff --git a/src/mednet/scripts/saliency/generate.py b/src/mednet/scripts/saliency/generate.py index c2ae4501..7f2d5842 100644 --- a/src/mednet/scripts/saliency/generate.py +++ b/src/mednet/scripts/saliency/generate.py @@ -21,7 +21,7 @@ logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") cls=ConfigCommand, epilog="""Examples: -1. Generates saliency maps for all prediction dataloaders on a DataModule, +1. Generate saliency maps for all prediction dataloaders on a DataModule, using a pre-trained DenseNet model, and saves them as numpy-pickeled objects on the output directory: @@ -162,7 +162,7 @@ def generate( positive_only, **_, ) -> None: - """Generates saliency maps for locations on input images that affected the + """Generate saliency maps for locations on input images that affected the prediction. The quality of saliency information depends on the saliency map diff --git a/src/mednet/scripts/saliency/interpretability.py b/src/mednet/scripts/saliency/interpretability.py index e5fa9556..5c3da890 100644 --- a/src/mednet/scripts/saliency/interpretability.py +++ b/src/mednet/scripts/saliency/interpretability.py @@ -84,7 +84,7 @@ def interpretability( output_json, **_, ) -> None: - """Evaluates saliency map agreement with annotations (human + """Evaluate saliency map agreement with annotations (human interpretability). The evaluation happens by comparing saliency maps with ground-truth @@ -96,7 +96,6 @@ def interpretability( For obvious reasons, this evaluation is limited to datasets that contain built-in annotations which corroborate classification. - As a result of the evaluation, this application creates a single .json file that resembles the original DataModule, with added information containing the following measures, for each sample: diff --git a/src/mednet/scripts/saliency/view.py b/src/mednet/scripts/saliency/view.py index 928cac6b..e644b8e2 100644 --- a/src/mednet/scripts/saliency/view.py +++ b/src/mednet/scripts/saliency/view.py @@ -18,7 +18,7 @@ logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") cls=ConfigCommand, epilog="""Examples: -1. Generates visualizations in the form of heatmaps from existing saliency maps for a dataset configuration: +1. Generate visualizations in the form of heatmaps from existing saliency maps for a dataset configuration: .. code:: sh @@ -98,7 +98,7 @@ def view( threshold, **_, ) -> None: - """Generates heatmaps for input CXRs based on existing saliency maps.""" + """Generate heatmaps for input CXRs based on existing saliency maps.""" from ...engine.saliency.viewer import run from ..utils import save_sh_command diff --git a/src/mednet/scripts/train.py b/src/mednet/scripts/train.py index cc974870..f1956715 100644 --- a/src/mednet/scripts/train.py +++ b/src/mednet/scripts/train.py @@ -16,7 +16,8 @@ logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") def reusable_options(f): - """Options that can be re-used by top-level scripts (i.e. ``experiment``). + """The options that can be re-used by top-level scripts (i.e. + ``experiment``). This decorator equips the target function ``f`` with all (reusable) ``train`` script options. @@ -222,7 +223,7 @@ def reusable_options(f): cls=ConfigCommand, epilog="""Examples: -1. Trains a pasa model with the montgomery dataset, on a GPU (``cuda:0``): +1. Train a pasa model with the montgomery dataset, on a GPU (``cuda:0``): .. code:: sh @@ -248,7 +249,7 @@ def train( balance_classes, **_, ) -> None: - """Trains an CNN to perform image classification. + """Train an CNN to perform image classification. Training is performed for a configurable number of epochs, and generates checkpoints. Checkpoints are model files with a .ckpt diff --git a/src/mednet/scripts/train_analysis.py b/src/mednet/scripts/train_analysis.py index d3c979e0..cb240a6e 100644 --- a/src/mednet/scripts/train_analysis.py +++ b/src/mednet/scripts/train_analysis.py @@ -34,7 +34,7 @@ def create_figures( "percent-usage/gpu/*", ], ) -> list: - """Generates figures for each metric in the dataframe. + """Generate figures for each metric in the dataframe. Each row of the dataframe corresponds to an epoch and each column to a metric. It is assumed that some metric names are of the form <metric>/<subset>. @@ -107,7 +107,7 @@ def create_figures( epilog="""Examples: \b - 1. Analyzes a training log and produces various plots: + 1. Analyze a training log and produces various plots: .. code:: sh @@ -134,7 +134,7 @@ def train_analysis( logdir: pathlib.Path, output: pathlib.Path, ) -> None: - """Creates a plot for each metric in the training logs and saves them in a + """Create a plot for each metric in the training logs and saves them in a .pdf file.""" import matplotlib.pyplot as plt diff --git a/src/mednet/scripts/utils.py b/src/mednet/scripts/utils.py index b3b0d310..7c62d84e 100644 --- a/src/mednet/scripts/utils.py +++ b/src/mednet/scripts/utils.py @@ -13,7 +13,7 @@ logger = logging.getLogger(__name__) def save_sh_command(path: pathlib.Path) -> None: - """Records command-line to reproduce this script. + """Record command-line to reproduce this script. This function can record the current command-line used to call the script being run. It creates an executable ``bash`` script setting up the current diff --git a/src/mednet/utils/checkpointer.py b/src/mednet/utils/checkpointer.py index bedff23a..51d9192f 100644 --- a/src/mednet/utils/checkpointer.py +++ b/src/mednet/utils/checkpointer.py @@ -24,7 +24,7 @@ def _get_checkpoint_from_alias( path: pathlib.Path, alias: typing.Literal["best", "periodic"], ) -> pathlib.Path: - """Gets an existing checkpoint file path. + """Get an existing checkpoint file path. This function can search for names matching the checkpoint alias "stem" (ie. the prefix), and then assumes a dash "-" and a number follows that @@ -89,7 +89,7 @@ def _get_checkpoint_from_alias( def get_checkpoint_to_resume_training( path: pathlib.Path, ) -> pathlib.Path: - """Returns the best checkpoint file path to resume training from. + """Return the best checkpoint file path to resume training from. Parameters ---------- @@ -114,7 +114,7 @@ def get_checkpoint_to_resume_training( def get_checkpoint_to_run_inference( path: pathlib.Path, ) -> pathlib.Path: - """Returns the best checkpoint file path to run inference with. + """Return the best checkpoint file path to run inference with. Parameters ---------- diff --git a/src/mednet/utils/rc.py b/src/mednet/utils/rc.py index 1a99df49..b3657e02 100644 --- a/src/mednet/utils/rc.py +++ b/src/mednet/utils/rc.py @@ -6,5 +6,5 @@ from clapper.rc import UserDefaults def load_rc() -> UserDefaults: - """Returns global configuration variables.""" + """Return global configuration variables.""" return UserDefaults("mednet.toml") diff --git a/src/mednet/utils/resources.py b/src/mednet/utils/resources.py index ae28a34d..e1800778 100644 --- a/src/mednet/utils/resources.py +++ b/src/mednet/utils/resources.py @@ -32,7 +32,7 @@ GB = float(2**30) def run_nvidia_smi( query: typing.Sequence[str], ) -> dict[str, str | float] | None: - """Returns GPU information from query. + """Return GPU information from query. For a comprehensive list of options and help, execute ``nvidia-smi --help-query-gpu`` on a host with a GPU @@ -76,26 +76,21 @@ def run_nvidia_smi( def run_powermetrics( time_window_ms: int = 500, key: str | None = None ) -> dict[str, typing.Any] | None: - """Returns GPU information from the system. + """Return GPU information from the system. For a comprehensive list of options and help, execute ``man powermetrics`` on a Mac computer with Apple silicon. - Parameters ---------- - time_window_ms The amount of time, in milliseconds, to collect usage information on the GPU. - key If specified returns only a sub-key of the dictionary. - Returns ------- - data A dictionary containing the GPU information. """ @@ -141,7 +136,7 @@ def run_powermetrics( def cuda_constants() -> dict[str, str | int | float] | None: - """Returns GPU (static) information using nvidia-smi. + """Return GPU (static) information using nvidia-smi. See :py:func:`run_nvidia_smi` for operational details. @@ -168,12 +163,11 @@ def cuda_constants() -> dict[str, str | int | float] | None: def mps_constants() -> dict[str, str | int | float] | None: - """Returns GPU (static) information using `/usr/bin/powermetrics`. + """Return GPU (static) information using `/usr/bin/powermetrics`. Returns ------- - - data : :py:class:`tuple`, None + dict[str, str | int | float] If ``nvidia-smi`` is not available, returns ``None``, otherwise, we return a dictionary containing the following ``nvidia-smi`` query information, in this order: @@ -199,7 +193,7 @@ def mps_constants() -> dict[str, str | int | float] | None: def cuda_log() -> dict[str, float] | None: - """Returns GPU information about current non-static status using nvidia- + """Return GPU information about current non-static status using nvidia- smi. See :py:func:`run_nvidia_smi` for operational details. @@ -239,19 +233,18 @@ def cuda_log() -> dict[str, float] | None: def mps_log() -> dict[str, float] | None: - """Returns GPU information about current non-static status using ``sudo + """Return GPU information about current non-static status using ``sudo powermetrics``. Returns ------- - If ``sudo powermetrics`` is not executable (or is not configured for passwordless execution), returns ``None``, otherwise, we return a dictionary containing the following query information, in this order: * ``freq_hz`` as ``frequency-MHz/gpu`` * 100 * (1 - ``idle_ratio``), as ``percent-usage/gpu``, - (:py:class:`float`, in percent) + (:py:class:`float`, in percent). """ result = run_powermetrics(500, key="gpu") @@ -266,7 +259,7 @@ def mps_log() -> dict[str, float] | None: def cpu_constants() -> dict[str, int | float]: - """Returns static CPU information about the current system. + """Return static CPU information about the current system. Returns ------- @@ -275,7 +268,7 @@ def cpu_constants() -> dict[str, int | float]: 0. ``cpu_memory_total`` (:py:class:`float`): total memory available, in gigabytes - 1. ``cpu_count`` (:py:class:`int`): number of logical CPUs available + 1. ``cpu_count`` (:py:class:`int`): number of logical CPUs available. """ return { "memory-total-GB/cpu": psutil.virtual_memory().total / GB, @@ -284,7 +277,7 @@ def cpu_constants() -> dict[str, int | float]: class CPULogger: - """Logs CPU information using :py:mod:`psutil`. + """Log CPU information using :py:mod:`psutil`. Parameters ---------- @@ -299,7 +292,7 @@ class CPULogger: [k.cpu_percent(interval=None) for k in self.cluster] def log(self) -> dict[str, int | float]: - """Returns current process cluster information. + """Return current process cluster information. Returns ------- @@ -374,7 +367,7 @@ class _InformationGatherer: String representation of one of the supported pytorch device types triggering the correct readout of resource usage. main_pid - The main process identifier to monitor + The main process identifier to monitor. logger A logger to be used for logging messages. """ @@ -413,7 +406,7 @@ class _InformationGatherer: self.data: dict[str, list[int | float]] = {k: [] for k in keys} def acc(self) -> None: - """Accumulates another measurement.""" + """Accumulate another measurement.""" for k, v in self.cpu_logger.log().items(): self.data[k].append(v) @@ -434,12 +427,12 @@ class _InformationGatherer: pass def clear(self) -> None: - """Clears accumulated data.""" + """Clear accumulated data.""" for k in self.data.keys(): self.data[k] = [] def summary(self) -> dict[str, list[int | float]]: - """Returns the current data.""" + """Return the current data.""" if len(next(iter(self.data.values()))) == 0: self.logger.error("CPU/GPU logger was not able to collect any data") return self.data @@ -547,11 +540,11 @@ class ResourceMonitor: self.data: dict[str, int | float] | None = None def __enter__(self) -> None: - """Starts the monitoring process.""" + """Start the monitoring process.""" self.monitor.start() def checkpoint(self, remove_last_n: int | None = None) -> None: - """Forces the monitoring process to yield data and clear the internal + """Force the monitoring process to yield data and clear the internal accumulator. Parameters @@ -589,7 +582,7 @@ class ResourceMonitor: self.data[k] = 0.0 def __exit__(self, *_) -> None: - """Stops the monitoring process and returns the summary of + """Stop the monitoring process and returns the summary of observations.""" self.stop_event.set() diff --git a/src/mednet/utils/summary.py b/src/mednet/utils/summary.py index ffc3b417..bff705e3 100644 --- a/src/mednet/utils/summary.py +++ b/src/mednet/utils/summary.py @@ -46,7 +46,7 @@ def _repr(model: torch.nn.Module) -> tuple[str, int]: def summary(model: torch.nn.Module) -> tuple[str, int]: - """Counts the number of parameters in each model layer. + """Count the number of parameters in each model layer. Parameters ---------- diff --git a/src/mednet/utils/tensorboard.py b/src/mednet/utils/tensorboard.py index 1ede9e70..56a81c44 100644 --- a/src/mednet/utils/tensorboard.py +++ b/src/mednet/utils/tensorboard.py @@ -12,7 +12,7 @@ from tensorboard.backend.event_processing.event_accumulator import ( def scalars_to_dict( logdir: pathlib.Path, ) -> dict[str, tuple[list[int], list[float]]]: - """Returns scalars stored in tensorboard event files. + """Return scalars stored in tensorboard event files. This method will gather all tensorboard event files produced by a training run, and will return a dictionary with all collected scalars, ready for diff --git a/tests/conftest.py b/tests/conftest.py index 60d00e28..5b9d4d47 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -17,7 +17,7 @@ from mednet.data.typing import DatabaseSplit @pytest.fixture def datadir(request) -> pathlib.Path: - """Returns the directory in which the test is sitting.""" + """Return the directory in which the test is sitting.""" return pathlib.Path(request.module.__file__).parents[0] / "data" @@ -76,7 +76,7 @@ def temporary_basedir(tmp_path_factory): def pytest_sessionstart(session: pytest.Session) -> None: - """Presets the session start to ensure the Montgomery dataset is always + """Preset the session start to ensure the Montgomery dataset is always available.""" from mednet.utils.rc import load_rc @@ -129,27 +129,19 @@ class DatabaseCheckers: prefixes: typing.Sequence[str], possible_labels: typing.Sequence[int], ): - """Runs a simple consistence check on the data split. + """Run a simple consistence check on the data split. Parameters ---------- - - make_split - A database specific function that takes a split name and returns - the loaded database split. - - split_filename - This is the split we will check - - lenghts + split + An instance of DatabaseSplit. + lengths A dictionary that contains keys matching those of the split (this will be checked). The values of the dictionary should correspond to the sizes of each of the datasets in the split. - prefixes Each file named in a split should start with at least one of these prefixes. - possible_labels These are the list of possible labels contained in any split. """ @@ -179,21 +171,20 @@ class DatabaseCheckers: prefixes: typing.Sequence[str], possible_labels: typing.Sequence[int], ): - """Checks the consistence of an individual (loaded) batch. + """Check the consistence of an individual (loaded) batch. Parameters ---------- batch The loaded batch to be checked. - - size - The mini-batch size - + batch_size + The mini-batch size. + color_planes + The number of color planes in the images. prefixes Each file named in a split should start with at least one of these prefixes. - possible_labels These are the list of possible labels contained in any split. """ diff --git a/tests/test_tbx11k.py b/tests/test_tbx11k.py index 71c37e16..b1bbcc1f 100644 --- a/tests/test_tbx11k.py +++ b/tests/test_tbx11k.py @@ -153,16 +153,18 @@ def check_loaded_batch( batch_size: int, prefixes: typing.Sequence[str], ): - """Checks the consistence of an individual (loaded) batch. + """Check the consistence of an individual (loaded) batch. Parameters ---------- batch The loaded batch to be checked. - - size - The mini-batch size + batch_size + The mini-batch size. + prefixes + Each file named in a split should start with at least one of these + prefixes. """ assert len(batch) == 2 # data, metadata -- GitLab