diff --git a/src/ptbench/engine/road_calculator.py b/src/ptbench/engine/road_calculator.py index 44237dadd7b0020fa15d3e0779f4ce260943e97f..f2e2a82e112e8f008ef0408de9a4e26a9f9d2f93 100644 --- a/src/ptbench/engine/road_calculator.py +++ b/src/ptbench/engine/road_calculator.py @@ -142,11 +142,11 @@ def run( Parameters --------- - model : py:class:`torch.nn.Module` - The model to use for visualization. + model + Neural network model (e.g. pasa). - data_loader : py:class:`torch.torch.utils.data.DataLoader` - The pytorch Dataloader used to iterate over batches. + data_loader + The pytorch lightning Dataloader used to iterate over batches. output_folder : str Directory in which the results will be saved. @@ -196,12 +196,12 @@ def run( for samples in tqdm(data_loader, desc="batches", leave=False, disable=None): # TB negative labels are skipped - if samples[2][0].item() == 0: + if samples[1]["label"].item() == 0: if tb_positive_only: continue - names = samples[0] - images = samples[1].to( + names = samples[1]["name"] + images = samples[0].to( device=device, non_blocking=torch.cuda.is_available() ) diff --git a/src/ptbench/engine/saliencymap_generator.py b/src/ptbench/engine/saliencymap_generator.py index 598a66da170655d8b47f9c3a9fb089576b4a298a..ff31b56b1be99e6a7bfa65cb09eb104384c59069 100644 --- a/src/ptbench/engine/saliencymap_generator.py +++ b/src/ptbench/engine/saliencymap_generator.py @@ -65,11 +65,11 @@ def run( Parameters --------- - model : py:class:`torch.nn.Module` - The model to use for the saliency map calculation. + model + Neural network model (e.g. pasa). - data_loader : py:class:`torch.torch.utils.data.DataLoader` - The pytorch Dataloader used to iterate over batches. + data_loader + The pytorch lightning Dataloader used to iterate over batches. output_folder : str Directory in which the results will be saved. @@ -114,12 +114,12 @@ def run( for samples in tqdm(data_loader, desc="batches", leave=False, disable=None): # TB negative labels are skipped (they don't have bboxes) - if samples[2][0].item() == 0: + if samples[1]["label"].item() == 0: if tb_positive_only: continue - names = samples[0] - images = samples[1].to( + names = samples[1]["name"] + images = samples[0].to( device=device, non_blocking=torch.cuda.is_available() ) diff --git a/src/ptbench/scripts/calculate_road.py b/src/ptbench/scripts/calculate_road.py index 5c9080956f956340cc2c22471c16fc7716cf58db..41bbe3db889d07e251b712498e288fcdaaedc0a0 100644 --- a/src/ptbench/scripts/calculate_road.py +++ b/src/ptbench/scripts/calculate_road.py @@ -4,6 +4,7 @@ import csv import os +import pathlib import click @@ -193,36 +194,37 @@ def prepare_csv_writers( .. code:: sh - ptbench calculate-road -vv pasa tbx11k_simplified_bbox -a "cuda" --weight=path/to/model_final.pth --output-folder=path/to/output_folder + ptbench calculate-road -vv pasa tbx11k_simplified_bbox --device="cuda" --weight=path/to/model_final.pth --output-folder=path/to/output_folder """, ) @click.option( "--model", "-m", - help="A torch.nn.Module instance implementing the network to be evaluated", + help="A lightining module instance implementing the network to be trained.", required=True, cls=ResourceOption, ) @click.option( - "--dataset", + "--datamodule", "-d", - help="A torch.utils.data.dataset.Dataset instance implementing a dataset " - "to be used for generating visualizations, possibly including all pre-processing " - "pipelines required or, optionally, a dictionary mapping string keys to " - "torch.utils.data.dataset.Dataset instances. All keys that do not start " - "with an underscore (_) will be processed.", + help="A lighting data module containing the training and validation sets.", required=True, cls=ResourceOption, ) @click.option( "--output-folder", "-o", - help="Path where to store the road metrics .csv files (created if does not exist)", + help="Path where to store the visualizations (created if does not exist)", required=True, + type=click.Path( + file_okay=False, + dir_okay=True, + writable=True, + path_type=pathlib.Path, + ), default="visualizations", cls=ResourceOption, - type=click.Path(), ) @click.option( "--batch-size", @@ -235,9 +237,9 @@ def prepare_csv_writers( cls=ResourceOption, ) @click.option( - "--accelerator", - "-a", - help='A string indicating the accelerator to use (e.g. "cpu" or "gpu"). The device can also be specified (gpu:0)', + "--device", + "-x", + help='A string indicating the device to use (e.g. "cpu" or "cuda:0")', show_default=True, required=True, default="cpu", @@ -246,7 +248,8 @@ def prepare_csv_writers( @click.option( "--weight", "-w", - help="Path or URL to pretrained model file (.ckpt extension)", + help="""Path or URL to pretrained model file (`.ckpt` extension), + corresponding to the architecture set with `--model`.""", required=True, cls=ResourceOption, ) @@ -280,10 +283,10 @@ def prepare_csv_writers( @verbosity_option(logger=logger, cls=ResourceOption, expose_value=False) def calculate_road( model, - dataset, + datamodule, output_folder, batch_size, - accelerator, + device, weight, visualization_types, target_class, @@ -296,65 +299,48 @@ def calculate_road( Calculates them for each target class and split of the dataset. """ - import torch + from ..engine.device import DeviceManager + from .utils import save_sh_command + from ..engine.road_calculator import run - from torch.utils.data import DataLoader + save_sh_command(output_folder / "command.sh") - from ..engine.road_calculator import run + device_manager = DeviceManager(device) + device = device_manager.torch_device() + use_cuda = device_manager.device_type == "cuda" - # Temporary solution due to transition to PyTorch Lightning - if accelerator.startswith("cuda") or accelerator.startswith("gpu"): - use_cuda = torch.cuda.is_available() - device = "cuda:0" if use_cuda else "cpu" - else: - use_cuda = False - device = "cpu" - - if "datadir" in dataset: - dataset = ( - dataset["dataset"] - if isinstance(dataset["dataset"], dict) - else dict(test=dataset["dataset"]) - ) - else: - dataset = dataset if isinstance(dataset, dict) else dict(test=dataset) + datamodule.set_chunk_size(batch_size, 1) + datamodule.drop_incomplete_batch = False + #datamodule.cache_samples = cache_samples + #datamodule.parallel = parallel + datamodule.model_transforms = model.model_transforms - logger.info(f"Loading checkpoint from {weight}") + datamodule.prepare_data() + datamodule.setup(stage="predict") - # This is a temporary solution due to transition to PyTorch Lightning - # This will not be necessary for future users of this package - state_dict = torch.load(weight, map_location=torch.device("cpu")).pop( - "model" - ) - new_state_dict = {k.replace("model.", ""): v for k, v in state_dict.items()} - model.load_state_dict(new_state_dict) + dataloaders = datamodule.predict_dataloader() - # This code should work for future users of this package (no guarantee) - # model = model.load_from_checkpoint(weight, strict=False) + logger.info(f"Loading checkpoint from {weight}") - model.eval() + model = model.load_from_checkpoint(weight, strict=False) visualization_types = check_vis_types(visualization_types) model_name = model.__class__.__name__ - if model_name == "PASA": + if model_name == "Pasa": if "fullgrad" in visualization_types: raise ValueError( "Fullgrad visualization is not supported for the Pasa model." ) target_layers = [model.fc14] # Last non-1x1 Conv2d layer else: - target_layers = [model.model_ft.features.denseblock4.denselayer16.conv2] + target_layers = [model.denseblock4.denselayer16.conv2] for vis_type in visualization_types: cam = create_cam(vis_type, model, target_layers, use_cuda) - for k, v in dataset.items(): - if k.startswith("_"): - logger.info(f"Skipping dataset '{k}' (not to be evaluated)") - continue - + for k, v in dataloaders.items(): if model_name == "DensenetRS" and target_class.lower() == "all": csv_files, csv_writers = prepare_csv_writers( output_folder, vis_type, k, num_classes=14 @@ -363,15 +349,10 @@ def calculate_road( csv_files, csv_writers = prepare_csv_writers( output_folder, vis_type, k, num_classes=0 ) - + logger.info(f"Calculating ROAD scores for '{k}' set...") - data_loader = DataLoader( - dataset=v, - batch_size=batch_size, - shuffle=False, - pin_memory=torch.cuda.is_available(), - ) + data_loader = v run( model, diff --git a/src/ptbench/scripts/generate_saliencymaps.py b/src/ptbench/scripts/generate_saliencymaps.py index ffecafe9b4f709dc00880bce80bf3f233c42e749..331e31db79ee4c26278046e0b34f0725e7d7a62c 100644 --- a/src/ptbench/scripts/generate_saliencymaps.py +++ b/src/ptbench/scripts/generate_saliencymaps.py @@ -3,6 +3,7 @@ # SPDX-License-Identifier: GPL-3.0-or-later import click +import pathlib from clapper.click import ConfigCommand, ResourceOption, verbosity_option from clapper.logging import setup @@ -125,25 +126,21 @@ def create_cam(vis_type, model, target_layers, use_cuda): .. code:: sh - ptbench generate-saliencymaps -vv densenet tbx11k_simplified_bbox_rgb --accelerator="cuda" --weight=path/to/model_final.pth --output-folder=path/to/visualizations + ptbench generate-saliencymaps -vv densenet tbx11k_simplified_bbox_rgb --device="cuda" --weight=path/to/model_final.pth --output-folder=path/to/visualizations """, ) @click.option( "--model", "-m", - help="A torch.nn.Module instance implementing the network to be evaluated", + help="A lightining module instance implementing the network to be trained.", required=True, cls=ResourceOption, ) @click.option( - "--dataset", + "--datamodule", "-d", - help="A torch.utils.data.dataset.Dataset instance implementing a dataset " - "to be used for generating visualizations, possibly including all pre-processing " - "pipelines required or, optionally, a dictionary mapping string keys to " - "torch.utils.data.dataset.Dataset instances. All keys that do not start " - "with an underscore (_) will be processed.", + help="A lighting data module containing the training and validation sets.", required=True, cls=ResourceOption, ) @@ -152,9 +149,14 @@ def create_cam(vis_type, model, target_layers, use_cuda): "-o", help="Path where to store the visualizations (created if does not exist)", required=True, + type=click.Path( + file_okay=False, + dir_okay=True, + writable=True, + path_type=pathlib.Path, + ), default="visualizations", cls=ResourceOption, - type=click.Path(), ) @click.option( "--batch-size", @@ -167,9 +169,9 @@ def create_cam(vis_type, model, target_layers, use_cuda): cls=ResourceOption, ) @click.option( - "--accelerator", - "-a", - help='A string indicating the accelerator to use (e.g. "cpu" or "gpu"). The device can also be specified (gpu:0)', + "--device", + "-x", + help='A string indicating the device to use (e.g. "cpu" or "cuda:0")', show_default=True, required=True, default="cpu", @@ -178,7 +180,8 @@ def create_cam(vis_type, model, target_layers, use_cuda): @click.option( "--weight", "-w", - help="Path or URL to pretrained model file (.ckpt extension)", + help="""Path or URL to pretrained model file (`.ckpt` extension), + corresponding to the architecture set with `--model`.""", required=True, cls=ResourceOption, ) @@ -212,10 +215,10 @@ def create_cam(vis_type, model, target_layers, use_cuda): @verbosity_option(logger=logger, cls=ResourceOption, expose_value=False) def generate_saliencymaps( model, - dataset, + datamodule, output_folder, batch_size, - accelerator, + device, weight, visualization_types, target_class, @@ -225,73 +228,52 @@ def generate_saliencymaps( """Generates saliency maps for locations with aTB for input CXRs, depending on visualization technique and model.""" - import torch - - from torch.utils.data import DataLoader - + from ..engine.device import DeviceManager + from .utils import save_sh_command from ..engine.saliencymap_generator import run - # Temporary solution due to transition to PyTorch Lightning - if accelerator.startswith("cuda") or accelerator.startswith("gpu"): - use_cuda = torch.cuda.is_available() - device = "cuda:0" if use_cuda else "cpu" - else: - use_cuda = False - device = "cpu" + save_sh_command(output_folder / "command.sh") - if "datadir" in dataset: - dataset = ( - dataset["dataset"] - if isinstance(dataset["dataset"], dict) - else dict(test=dataset["dataset"]) - ) - else: - dataset = dataset if isinstance(dataset, dict) else dict(test=dataset) + device_manager = DeviceManager(device) + device = device_manager.torch_device() + use_cuda = device_manager.device_type == "cuda" - logger.info(f"Loading checkpoint from {weight}") + datamodule.set_chunk_size(batch_size, 1) + datamodule.drop_incomplete_batch = False + #datamodule.cache_samples = cache_samples + #datamodule.parallel = parallel + datamodule.model_transforms = model.model_transforms + + datamodule.prepare_data() + datamodule.setup(stage="predict") - # This is a temporary solution due to transition to PyTorch Lightning - # This will not be necessary for future users of this package - state_dict = torch.load(weight, map_location=torch.device("cpu")).pop( - "model" - ) - new_state_dict = {k.replace("model.", ""): v for k, v in state_dict.items()} - model.load_state_dict(new_state_dict) + dataloaders = datamodule.predict_dataloader() - # This code should work for future users of this package (no guarantee) - # model = model.load_from_checkpoint(weight, strict=False) + logger.info(f"Loading checkpoint from {weight}") - model.eval() + model = model.load_from_checkpoint(weight, strict=False) visualization_types = check_vis_types(visualization_types) model_name = model.__class__.__name__ - if model_name == "PASA": + if model_name == "Pasa": if "fullgrad" in visualization_types: raise ValueError( "Fullgrad visualization is not supported for the Pasa model." ) target_layers = [model.fc14] # Last non-1x1 Conv2d layer else: - target_layers = [model.model_ft.features.denseblock4.denselayer16.conv2] + # If this does not work out of the box for densenet, then print(model) and find the correct path to the layer I was trying to use here + target_layers = [model.denseblock4.denselayer16.conv2] for vis_type in visualization_types: cam = create_cam(vis_type, model, target_layers, use_cuda) - for k, v in dataset.items(): - if k.startswith("_"): - logger.info(f"Skipping dataset '{k}' (not to be evaluated)") - continue - + for k, v in dataloaders.items(): logger.info(f"Generating saliency maps for '{k}' set...") - data_loader = DataLoader( - dataset=v, - batch_size=batch_size, - shuffle=False, - pin_memory=torch.cuda.is_available(), - ) + data_loader = v run( model,