diff --git a/src/mednet/data/augmentations.py b/src/mednet/data/augmentations.py index a104ec4ac26b1540ed2a94a0509563fbeff0b4f8..2afcdac5cd45605516e725a6d7c3fb3b57f2387b 100644 --- a/src/mednet/data/augmentations.py +++ b/src/mednet/data/augmentations.py @@ -246,7 +246,7 @@ class ElasticDeformation: (default), disables multiprocessing. If set to -2, then enable auto-tune (use the minimum value between the first batch size and total number of processing cores). Set to 0 to enable as many processes as - processing cores as available in the system. Set to >= 1 to enable that + processing cores available in the system. Set to >= 1 to enable that many processes. """ @@ -273,7 +273,7 @@ class ElasticDeformation: If set to -1 (default), disables multiprocessing. If set to -2, then enable auto-tune (use the minimum value between the first batch size and total number of processing cores). Set to 0 to - enable as many processes as processing cores as available in the + enable as many processes as processing cores available in the system. Set to >= 1 to enable that many processes. """ return self._parallel diff --git a/src/mednet/data/datamodule.py b/src/mednet/data/datamodule.py index 82353be3fb2080b9e761dba9d2b52899316b9dd9..b7d8660989ca8c5abec00c756a61d656bc98f5b1 100644 --- a/src/mednet/data/datamodule.py +++ b/src/mednet/data/datamodule.py @@ -168,7 +168,7 @@ class _CachedDataset(Dataset): parallel Use multiprocessing for data loading: if set to -1 (default), disables multiprocessing data loading. Set to 0 to enable as many data loading - instances as processing cores as available in the system. Set to >= 1 + instances as processing cores available in the system. Set to >= 1 to enable that many multiprocessing instances for data loading. transforms @@ -491,7 +491,7 @@ class ConcatDataModule(lightning.LightningDataModule): parallel Use multiprocessing for data loading: if set to -1 (default), disables multiprocessing data loading. Set to 0 to enable as many data loading - instances as processing cores as available in the system. Set to >= 1 + instances as processing cores available in the system. Set to >= 1 to enable that many multiprocessing instances for data loading. """ @@ -540,7 +540,7 @@ class ConcatDataModule(lightning.LightningDataModule): Use multiprocessing for data loading: if set to -1 (default), disables multiprocessing data loading. Set to 0 to enable as - many data loading instances as processing cores as available in + many data loading instances as processing cores available in the system. Set to >= 1 to enable that many multiprocessing instances for data loading. diff --git a/src/mednet/engine/saliency/completeness.py b/src/mednet/engine/saliency/completeness.py index d2b031b27a7d64839bc5c63a19dc6ac29fc04f88..8d781cc18bf88e5725792d8c176fb43a26e2e07c 100644 --- a/src/mednet/engine/saliency/completeness.py +++ b/src/mednet/engine/saliency/completeness.py @@ -250,7 +250,7 @@ def run( parallel Use multiprocessing for data processing: if set to -1, disables multiprocessing. Set to 0 to enable as many data processing instances - as processing cores as available in the system. Set to >= 1 to enable + as processing cores available in the system. Set to >= 1 to enable that many multiprocessing instances for data processing. diff --git a/src/mednet/models/alexnet.py b/src/mednet/models/alexnet.py index 5e5b3e7eee3d810aaa4c8dfed31ad3d9fa5414b5..53fbf49a2a9fb5ad3bb60aac086b86f9a2f36038 100644 --- a/src/mednet/models/alexnet.py +++ b/src/mednet/models/alexnet.py @@ -118,26 +118,27 @@ class Alexnet(pl.LightningModule): return x def on_save_checkpoint(self, checkpoint: Checkpoint) -> None: - """Called by Lightning to restore your model. - - If you saved something with on_save_checkpoint() this is your chance to - restore this. + """Called by Lightning when saving a checkpoint to give you a chance to + store anything else you might want to save. Use on_load_checkpoint() to + restore what additional data is saved here. Parameters ---------- checkpoint - Loaded checkpoint + The checkpoint to save """ checkpoint["normalizer"] = self.normalizer def on_load_checkpoint(self, checkpoint: Checkpoint) -> None: - """Called by Lightning when saving a checkpoint to give you a chance to - store anything else you might want to save. + """Called by Lightning to restore your model. + + If you saved something with on_save_checkpoint() this is your chance to + restore this. Parameters ---------- checkpoint - Loaded checkpoint + The loaded checkpoint """ logger.info("Restoring normalizer from checkpoint.") self.normalizer = checkpoint["normalizer"] diff --git a/src/mednet/models/densenet.py b/src/mednet/models/densenet.py index 333edb11e2112db9ef1f1bf37b2d333f5b418de6..e2bfc31017ba7d63a247b8b197c3a321090a5ef1 100644 --- a/src/mednet/models/densenet.py +++ b/src/mednet/models/densenet.py @@ -118,26 +118,27 @@ class Densenet(pl.LightningModule): return x def on_save_checkpoint(self, checkpoint: Checkpoint) -> None: - """Called by Lightning to restore your model. - - If you saved something with on_save_checkpoint() this is your chance to - restore this. + """Called by Lightning when saving a checkpoint to give you a chance to + store anything else you might want to save. Use on_load_checkpoint() to + restore what additional data is saved here. Parameters ---------- checkpoint - Loaded checkpoint + The checkpoint to save """ checkpoint["normalizer"] = self.normalizer def on_load_checkpoint(self, checkpoint: Checkpoint) -> None: - """Called by Lightning when saving a checkpoint to give you a chance to - store anything else you might want to save. + """Called by Lightning to restore your model. + + If you saved something with on_save_checkpoint() this is your chance to + restore this. Parameters ---------- checkpoint - Loaded checkpoint + The loaded checkpoint """ logger.info("Restoring normalizer from checkpoint.") self.normalizer = checkpoint["normalizer"] diff --git a/src/mednet/models/loss_weights.py b/src/mednet/models/loss_weights.py index 8cf79b7757f3f677d151870f5fd1e571501ed673..c6af3d5903410e00edd49d11c43f50b2f7d005a1 100644 --- a/src/mednet/models/loss_weights.py +++ b/src/mednet/models/loss_weights.py @@ -21,8 +21,6 @@ def _get_label_weights( number of negative and positive samples (scalar). The weight can be used to adjust minimisation criteria to in cases there is a huge data imbalance. - If - It returns a vector with weights (inverse counts) for each label. @@ -38,7 +36,7 @@ def _get_label_weights( ------- positive_weights - the positive weight of each class in the dataset given as input + The positive weight of each class in the dataset given as input """ targets = torch.tensor( diff --git a/src/mednet/models/pasa.py b/src/mednet/models/pasa.py index a7b8ae62c0c56bc2c5172058fdd3382c987514a1..7741944a91b90f50d9839aaaf9dddd3effc5d19e 100644 --- a/src/mednet/models/pasa.py +++ b/src/mednet/models/pasa.py @@ -204,26 +204,27 @@ class Pasa(pl.LightningModule): return x def on_save_checkpoint(self, checkpoint: Checkpoint) -> None: - """Called by Lightning to restore your model. - - If you saved something with on_save_checkpoint() this is your chance to - restore this. + """Called by Lightning when saving a checkpoint to give you a chance to + store anything else you might want to save. Use on_load_checkpoint() to + restore what additional data is saved here. Parameters ---------- checkpoint - Loaded checkpoint + The checkpoint to save """ checkpoint["normalizer"] = self.normalizer def on_load_checkpoint(self, checkpoint: Checkpoint) -> None: - """Called by Lightning when saving a checkpoint to give you a chance to - store anything else you might want to save. + """Called by Lightning to restore your model. + + If you saved something with on_save_checkpoint() this is your chance to + restore this. Parameters ---------- checkpoint - Loaded checkpoint + The loaded checkpoint """ logger.info("Restoring normalizer from checkpoint.") self.normalizer = checkpoint["normalizer"] diff --git a/src/mednet/models/separate.py b/src/mednet/models/separate.py index 9568721169a58a69c1f54f57bcca05398cb02e3c..8479c2a7986d975722becc8ee81a194332255fd0 100644 --- a/src/mednet/models/separate.py +++ b/src/mednet/models/separate.py @@ -14,8 +14,8 @@ from .typing import BinaryPrediction, MultiClassPrediction def _as_predictions( samples: typing.Iterable[Sample], ) -> list[BinaryPrediction | MultiClassPrediction]: - """Takes a list of separated batch predictions and transform into a list of - formal predictions. + """Takes a list of separated batch predictions and transforms it into a + list of formal predictions. Parameters ---------- @@ -31,7 +31,7 @@ def _as_predictions( def separate(batch: Sample) -> list[BinaryPrediction | MultiClassPrediction]: - """Separates a collated batch reconstituting its samples. + """Separates a collated batch, reconstituting its samples. This function implements the inverse of :py:func:`torch.utils.data.default_collate`, and can separate, into diff --git a/src/mednet/models/transforms.py b/src/mednet/models/transforms.py index 8ff0af726aada9256603882dd8a6c0aeb6d34cad..7453cb6d77e3d793a988f954e5ef3dcc5fff33b8 100644 --- a/src/mednet/models/transforms.py +++ b/src/mednet/models/transforms.py @@ -76,7 +76,7 @@ def rgb_to_grayscale(img: torch.Tensor) -> torch.Tensor: ------- img - transformed tensor where the 3rd dimension from the last is 3. + Transformed tensor where the 3rd dimension from the last is 3. """ if img.ndim < 3: raise TypeError( diff --git a/src/mednet/scripts/experiment.py b/src/mednet/scripts/experiment.py index 151a2356f55db173839aab44a3ec46a942223b1c..6ea6cffb07cff62e0e7be48150e5a78518785a67 100644 --- a/src/mednet/scripts/experiment.py +++ b/src/mednet/scripts/experiment.py @@ -23,7 +23,7 @@ logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") \b 1. Trains a pasa model with montgomery dataset, on the CPU, for only two epochs, then runs inference and evaluation on stock datasets, report - performance as a table and a figure: + performance as a table and figures: .. code:: sh @@ -54,8 +54,7 @@ def experiment( """Runs a complete experiment, from training, to prediction and evaluation. This script is just a wrapper around the individual scripts for training, - running prediction, evaluating and comparing model performance. It - organises the output in a preset way:: + running prediction, and evaluating. It organises the output in a preset way:: \b └─ <output-folder>/ diff --git a/src/mednet/scripts/predict.py b/src/mednet/scripts/predict.py index 828ba4ff9304db9352e73aea1885229f361b0432..c684ed9514c83f980bfa3b65387ed01164a5bc9a 100644 --- a/src/mednet/scripts/predict.py +++ b/src/mednet/scripts/predict.py @@ -37,7 +37,7 @@ logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") "--output", "-o", help="""Path where to store the JSON predictions for all samples in the - input datamodule (leading directories are created if they do not not + input datamodule (leading directories are created if they do not exist).""", required=True, default="results", @@ -49,7 +49,7 @@ logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") @click.option( "--model", "-m", - help="""A lightining module instance implementing the network architecture + help="""A lightning module instance implementing the network architecture (not the weights, necessarily) to be used for prediction.""", required=True, cls=ResourceOption, @@ -57,7 +57,7 @@ logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") @click.option( "--datamodule", "-d", - help="""A lighting data module that will be asked for prediction data + help="""A lightning data module that will be asked for prediction data loaders. Typically, this includes all configured splits in a datamodule, however this is not a requirement. A datamodule that returns a single dataloader for prediction (wrapped in a dictionary) is acceptable.""", @@ -106,7 +106,7 @@ logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") "-P", help="""Use multiprocessing for data loading: if set to -1 (default), disables multiprocessing data loading. Set to 0 to enable as many data - loading instances as processing cores as available in the system. Set to + loading instances as processing cores available in the system. Set to >= 1 to enable that many multiprocessing instances for data loading.""", type=click.IntRange(min=-1), show_default=True, diff --git a/src/mednet/scripts/saliency/completeness.py b/src/mednet/scripts/saliency/completeness.py index 16d9c8df0e722c3fe54bbdc762cb7e0daa4ee737..62072612e4b5ae6b6ad6d7d3b1f1c72521453450 100644 --- a/src/mednet/scripts/saliency/completeness.py +++ b/src/mednet/scripts/saliency/completeness.py @@ -32,7 +32,7 @@ logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") @click.option( "--model", "-m", - help="""A lightining module instance implementing the network architecture + help="""A lightning module instance implementing the network architecture (not the weights, necessarily) to be used for inference. Currently, only supports pasa and densenet models.""", required=True, @@ -41,7 +41,7 @@ logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") @click.option( "--datamodule", "-d", - help="""A lighting data module that will be asked for prediction data + help="""A lightning data module that will be asked for prediction data loaders. Typically, this includes all configured splits in a datamodule, however this is not a requirement. A datamodule that returns a single dataloader for prediction (wrapped in a dictionary) is acceptable.""", diff --git a/src/mednet/scripts/saliency/generate.py b/src/mednet/scripts/saliency/generate.py index bb1bf4a03ada352c2d2978feca834496afac007f..34e54fd49865821055ed311e271d9f790f72f431 100644 --- a/src/mednet/scripts/saliency/generate.py +++ b/src/mednet/scripts/saliency/generate.py @@ -34,7 +34,7 @@ logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") @click.option( "--model", "-m", - help="""A lightining module instance implementing the network architecture + help="""A lightning module instance implementing the network architecture (not the weights, necessarily) to be used for inference. Currently, only supports pasa and densenet models.""", required=True, @@ -43,7 +43,7 @@ logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") @click.option( "--datamodule", "-d", - help="""A lighting data module that will be asked for prediction data + help="""A lightning data module that will be asked for prediction data loaders. Typically, this includes all configured splits in a datamodule, however this is not a requirement. A datamodule that returns a single dataloader for prediction (wrapped in a dictionary) is acceptable.""", @@ -86,7 +86,7 @@ logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") @click.option( "--weight", "-w", - help="""Path or URL to pretrained model file (`.ckpt` extension), + help="""Path or URL to a pretrained model file (`.ckpt` extension), corresponding to the architecture set with `--model`. Optionally, you may also pass a directory containing the result of a training session, in which case either the best (lowest validation) or latest model will be loaded.""", @@ -105,7 +105,7 @@ logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") "-P", help="""Use multiprocessing for data loading: if set to -1 (default), disables multiprocessing data loading. Set to 0 to enable as many data - loading instances as processing cores as available in the system. Set to + loading instances as processing cores available in the system. Set to >= 1 to enable that many multiprocessing instances for data loading.""", type=click.IntRange(min=-1), show_default=True, diff --git a/src/mednet/scripts/saliency/interpretability.py b/src/mednet/scripts/saliency/interpretability.py index 1a77cbd9320cc7f1b36d30f5c26408b87c900d44..321bb18f51ae0c5c8cc6ce834dbb017365527e39 100644 --- a/src/mednet/scripts/saliency/interpretability.py +++ b/src/mednet/scripts/saliency/interpretability.py @@ -30,7 +30,7 @@ logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") @click.option( "--datamodule", "-d", - help="""A lighting data module that will be asked for prediction data + help="""A lightning data module that will be asked for prediction data loaders. Typically, this includes all configured splits in a datamodule, however this is not a requirement. A datamodule that returns a single dataloader for prediction (wrapped in a dictionary) is acceptable.""", @@ -40,7 +40,7 @@ logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") @click.option( "--input-folder", "-i", - help="""Path where to load saliency maps from. You can generate saliency + help="""Path from where to load saliency maps. You can generate saliency maps with ``mednet saliency generate``.""", required=True, type=click.Path( @@ -108,7 +108,7 @@ def interpretability( of the activations. * Average Saliency Focus: estimates how much of the ground truth bounding boxes area is covered by the activations. It is similar to the - proportional energy measure in the sense it does not need explicit + proportional energy measure in the sense that it does not need explicit thresholding. """ diff --git a/src/mednet/scripts/saliency/view.py b/src/mednet/scripts/saliency/view.py index 56f31fad9123698cd4423bc35f6e1960dc4a0379..b61df6e2272542e50f5e6f2435685b105dd5f060 100644 --- a/src/mednet/scripts/saliency/view.py +++ b/src/mednet/scripts/saliency/view.py @@ -18,7 +18,7 @@ logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") cls=ConfigCommand, epilog="""Examples: -1. Generates visualizations in form of heatmaps from existing saliency maps for a dataset configuration: +1. Generates visualizations in the form of heatmaps from existing saliency maps for a dataset configuration: .. code:: sh @@ -28,14 +28,14 @@ logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") @click.option( "--model", "-m", - help="A lightining module instance implementing the network to be used for applying the necessary data transformations.", + help="A lightning module instance implementing the network to be used for applying the necessary data transformations.", required=True, cls=ResourceOption, ) @click.option( "--datamodule", "-d", - help="A lighting data module containing the training, validation and test sets.", + help="A lightning data module containing the training, validation and test sets.", required=True, cls=ResourceOption, ) diff --git a/src/mednet/scripts/train.py b/src/mednet/scripts/train.py index 7c329359a61c7e14d72b360db58ff3852e4141af..b09bca133b13653363485ef9ec814fec01431ba0 100644 --- a/src/mednet/scripts/train.py +++ b/src/mednet/scripts/train.py @@ -51,14 +51,14 @@ def reusable_options(f): @click.option( "--model", "-m", - help="A lightining module instance implementing the network to be trained", + help="A lightning module instance implementing the network to be trained", required=True, cls=ResourceOption, ) @click.option( "--datamodule", "-d", - help="A lighting data module containing the training and validation sets.", + help="A lightning data module containing the training and validation sets.", required=True, cls=ResourceOption, ) @@ -87,10 +87,10 @@ def reusable_options(f): "memory requirements for the network). The number of samples " "loaded for every iteration will be batch-size/batch-chunk-count. " "batch-size needs to be divisible by batch-chunk-count, otherwise an " - "error will be raised. This parameter is used to reduce number of " + "error will be raised. This parameter is used to reduce the number of " "samples loaded in each iteration, in order to reduce the memory usage " - "in exchange for processing time (more iterations). This is specially " - "interesting whe one is running with GPUs with limited RAM. The " + "in exchange for processing time (more iterations). This is especially " + "interesting when one is training on GPUs with limited RAM. The " "default of 1 forces the whole batch to be processed at once. Otherwise " "the batch is broken into batch-chunk-count pieces, and gradients are " "accumulated to complete each batch.", @@ -103,10 +103,10 @@ def reusable_options(f): @click.option( "--drop-incomplete-batch/--no-drop-incomplete-batch", "-D", - help="If set, then may drop the last batch in an epoch, in case it is " + help="If set, the last batch in an epoch will be dropped if " "incomplete. If you set this option, you should also consider " "increasing the total number of epochs of training, as the total number " - "of training steps may be reduced", + "of training steps may be reduced.", required=True, show_default=True, default=False, @@ -117,7 +117,7 @@ def reusable_options(f): "-e", help="""Number of epochs (complete training set passes) to train for. If continuing from a saved checkpoint, ensure to provide a greater - number of epochs than that saved on the checkpoint to be loaded.""", + number of epochs than was saved in the checkpoint to be loaded.""", show_default=True, required=True, default=1000, @@ -132,7 +132,7 @@ def reusable_options(f): change this to make validation more sparse, by increasing the validation period. Notice that this affects checkpoint saving. While checkpoints are created after every training step (the last training - step always triggers the overriding of latest checkpoint), and that + step always triggers the overriding of latest checkpoint), and this process is independent of validation runs, evaluation of the 'best' model obtained so far based on those will be influenced by this setting.""", @@ -204,8 +204,8 @@ def reusable_options(f): @click.option( "--balance-classes/--no-balance-classes", "-B/-N", - help="""If set, then balances weights of the random sampler during - training, so that samples from all sample classes are picked picked + help="""If set, balances weights of the random sampler during + training so that samples from all sample classes are picked equitably.""", required=True, show_default=True, @@ -224,7 +224,7 @@ def reusable_options(f): cls=ConfigCommand, epilog="""Examples: -1. Trains Pasa's model with Montgomery dataset, on a GPU (``cuda:0``): +1. Trains a pasa model with the montgomery dataset, on a GPU (``cuda:0``): .. code:: sh @@ -253,10 +253,9 @@ def train( """Trains an CNN to perform image classification. Training is performed for a configurable number of epochs, and - generates at least a final_model.ckpt. It may also generate a - number of intermediate checkpoints. Checkpoints are model files - (.ckpt files) that are stored during the training and useful to - resume the procedure in case it stops abruptly. + generates checkpoints. Checkpoints are model files with a .ckpt + extension that are used in subsequent tasks or from which training + can be resumed. """ import os diff --git a/src/mednet/scripts/train_analysis.py b/src/mednet/scripts/train_analysis.py index 7736d2cb9b362b6dbc5ba1d920bae9ff084a2a2e..98c7754863c47775e989c3ddde66bfcef2824afa 100644 --- a/src/mednet/scripts/train_analysis.py +++ b/src/mednet/scripts/train_analysis.py @@ -36,7 +36,7 @@ def create_figures( ) -> list: """Generates figures for each metric in the dataframe. - Each row of the dataframe correspond to an epoch and each column to a metric. + Each row of the dataframe corresponds to an epoch and each column to a metric. It is assumed that some metric names are of the form <metric>/<subset>. All subsets for a metric will be displayed on the same figure. @@ -47,12 +47,12 @@ def create_figures( data: A dictionary where keys represent all scalar names, and values correspond to a tuple that contains an array with epoch numbers (when - values were taken), when the monitored values themselves. These lists + values were taken), and the monitored values themselves. These lists are pre-sorted by epoch number. groups: - A list of scalar globs we are interested on the existing tensorboard - data, for plotting. Values with multiple matches are drawn on the same - plot. Values that do not exist are ignored. + A list of scalar globs present in the existing tensorboard data that + we are interested in for plotting. Values with multiple matches are + drawn on the same plot. Values that do not exist are ignored. Returns @@ -118,14 +118,16 @@ def create_figures( mednet train-analysis -vv results/logs """, ) -@click.argument( - "logdir", +@click.option( + "--logdir", + help="Path to the directory containing the Tensorboard training logs", + required=True, type=click.Path(dir_okay=True, exists=True, path_type=pathlib.Path), ) @click.option( "--output", "-o", - help="Name of the output file to dump (multi-page PDF)", + help="Name of the output file to create (multi-page PDF)", required=True, show_default=True, default="trainlog.pdf", @@ -137,7 +139,7 @@ def train_analysis( output: pathlib.Path, ) -> None: """Creates a plot for each metric in the training logs and saves them in a - pdf file.""" + PDF file.""" import matplotlib.pyplot as plt