From e4a0b7e2c26328bb601b97b24a8226ce3e203580 Mon Sep 17 00:00:00 2001 From: dcarron <daniel.carron@idiap.ch> Date: Mon, 1 Jul 2024 08:55:26 +0200 Subject: [PATCH] [segmentation.scripts] Add upload script --- .../libs/classification/scripts/upload.py | 2 +- src/mednet/libs/segmentation/scripts/cli.py | 3 + .../libs/segmentation/scripts/upload.py | 175 ++++++++++++++++++ 3 files changed, 179 insertions(+), 1 deletion(-) create mode 100644 src/mednet/libs/segmentation/scripts/upload.py diff --git a/src/mednet/libs/classification/scripts/upload.py b/src/mednet/libs/classification/scripts/upload.py index 12ff291d..7e95426a 100644 --- a/src/mednet/libs/classification/scripts/upload.py +++ b/src/mednet/libs/classification/scripts/upload.py @@ -79,7 +79,7 @@ def upload( ) # get train files - train_folder = experiment_folder / "model" + train_folder = experiment_folder train_log_file = train_folder / "trainlog.pdf" train_meta_file = train_folder / "train.meta.json" train_model_file = get_checkpoint_to_run_inference(train_folder) diff --git a/src/mednet/libs/segmentation/scripts/cli.py b/src/mednet/libs/segmentation/scripts/cli.py index 4f534e58..b6fa62ad 100644 --- a/src/mednet/libs/segmentation/scripts/cli.py +++ b/src/mednet/libs/segmentation/scripts/cli.py @@ -44,3 +44,6 @@ segmentation.add_command(dump_annotations.dump_annotations) segmentation.add_command( importlib.import_module("..experiment", package=__name__).experiment, ) +segmentation.add_command( + importlib.import_module("..upload", package=__name__).upload, +) diff --git a/src/mednet/libs/segmentation/scripts/upload.py b/src/mednet/libs/segmentation/scripts/upload.py new file mode 100644 index 00000000..9abbbd27 --- /dev/null +++ b/src/mednet/libs/segmentation/scripts/upload.py @@ -0,0 +1,175 @@ +# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> +# +# SPDX-License-Identifier: GPL-3.0-or-later + +import pathlib + +import click +from clapper.click import ResourceOption, verbosity_option +from clapper.logging import setup +from mednet.libs.common.scripts.click import ConfigCommand +from mednet.libs.common.scripts.upload import reusable_options + +logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") + + +@click.command( + entry_point_group="mednet.config", + cls=ConfigCommand, + epilog="""Examples: + +1. Upload an existing experiment result from a path it resides on (with a default experiment name as {model-name}_{database-name} and a default run name as {date-time}): + + .. code:: sh + + mednet segmentation upload --experiment-folder=/path/to/results + +2. Upload an existing experiment result with an experiment name: + + .. code:: sh + + mednet segmentation upload --experiment-folder=/path/to/results --experiment-name=exp-pasa_mc + +3. Upload an existing experiment result with a run name: + + .. code:: sh + + mednet segmentation upload --experiment-folder=/path/to/results --run-name=run-1 + +4. Upload an existing experiment result with defining a size limit of 20MB for each file: + + .. code:: sh + + mednet segmentation upload --experiment-folder=/path/to/results --upload-limit-mb=20 + +""", +) +@reusable_options +@verbosity_option(logger=logger, cls=ResourceOption, expose_value=False) +def upload( + project_path: str, + experiment_folder: pathlib.Path, + experiment_name: str, + run_name: str, + upload_limit_mb: int, + **_, # ignored +) -> None: # numpydoc ignore=PR01 + """Upload results from an experiment folder to GitLab's MLFlow server.""" + + import json + import os + import tempfile + + import mlflow + from mednet.libs.common.utils.checkpointer import ( + get_checkpoint_to_run_inference, + ) + from mednet.libs.common.utils.gitlab import ( + gitlab_instance_and_token, + sanitize_filename, + size_in_mb, + ) + + logger.info("Retrieving GitLab credentials for access to hosted MLFlow server...") + gitlab, token = gitlab_instance_and_token() + project = gitlab.projects.get(project_path) + os.environ["MLFLOW_TRACKING_TOKEN"] = token + os.environ["MLFLOW_TRACKING_URI"] = ( + gitlab.api_url + f"/projects/{project.id}/ml/mlflow" + ) + + # get train files + train_folder = experiment_folder + train_log_file = train_folder / "trainlog.pdf" + train_meta_file = train_folder / "train.meta.json" + train_model_file = get_checkpoint_to_run_inference(train_folder) + train_files = [train_meta_file, train_model_file, train_log_file] + + # get evaluation files + evaluation_file = experiment_folder / "evaluation.json" + evaluation_meta_file = experiment_folder / "evaluation.meta.json" + evaluation_meta_file = experiment_folder / "evaluation.rst" + evaluation_log_file = experiment_folder / "evaluation.pdf" + evaluation_files = [ + evaluation_file, + evaluation_meta_file, + evaluation_log_file, + ] + + # checks for maximum upload limit + total_size_mb = sum([size_in_mb(f) for f in train_files + evaluation_files]) + if upload_limit_mb != 0 and total_size_mb > upload_limit_mb: + raise RuntimeError( + f"Total size of upload ({total_size_mb:.2f} MB) exceeds " + f"permitted maximum ({upload_limit_mb:.2f} MB)." + ) + + with train_meta_file.open("r") as meta_file: + train_data = json.load(meta_file) + + with evaluation_file.open("r") as meta_file: + evaluation_data = json.load(meta_file) + evaluation_data = evaluation_data["test"] + + # get lowest validation epoch + best_epoch = str(train_model_file).split(".")[0].split("=")[1] + + experiment_name = ( + experiment_name or f"{train_data['model-name']}-{train_data['database-name']}" + ) + run_name = run_name or train_data["datetime"] + + click.secho( + f"Uploading entry `{run_name}` to experiment `{experiment_name}` " + f"on GitLab project {project_path} (id: {project.id})...", + bold=True, + fg="green", + ) + exp_meta = mlflow.set_experiment(experiment_name=experiment_name) + with mlflow.start_run(run_name=run_name): + click.echo("Uploading package metadata...") + click.echo(f" -> `version` ({train_data['package-version']})") + mlflow.log_param("package version", train_data["package-version"]) + + click.echo("Uploading metrics...") + + for k in [ + "epochs", + "batch-size", + ]: + click.secho(f" -> `{k}` ({train_data[k]})") + mlflow.log_param(k, train_data[k]) + + click.secho(f" -> `#accumulations` ({train_data['accumulate-grad-batches']})") + mlflow.log_param("#Accumulations", train_data["accumulate-grad-batches"]) + click.secho(f" -> `epoch (best)` ({best_epoch})") + mlflow.log_param("Epoch (best)", best_epoch) + + for k in [ + "precision", + "recall", + "f1", + "average_precision_score", + "specificity", + "auc_score", + "accuracy", + ]: + click.secho(f" -> `{k}` ({evaluation_data[k]:.3g})") + mlflow.log_metric(k, evaluation_data[k]) + + click.echo("Uploading artifacts (files)...") + + with tempfile.TemporaryDirectory() as tmpdir_name: + tmpdir = pathlib.Path(tmpdir_name) + for f in train_files + evaluation_files: + assert f.exists(), f"File `{f}` does not exist - cannot upload!" + clean_path = str(sanitize_filename(tmpdir, f)) + click.secho(f" -> `{clean_path}` ({size_in_mb(f):.2f} MB)") + mlflow.log_artifact(clean_path) + + click.secho(f"Uploaded {total_size_mb:.2f} MB to server.", bold=True, fg="green") + click.secho( + f"Visit {gitlab.url}/{project.path_with_namespace}/-/ml/experiments/{exp_meta.experiment_id}", + bold=True, + fg="blue", + ) -- GitLab