From 7e981881660fcde5a16b8ae75eb95c9a49a43068 Mon Sep 17 00:00:00 2001 From: dcarron <daniel.carron@idiap.ch> Date: Wed, 29 May 2024 17:18:01 +0200 Subject: [PATCH] [upload] Fix upload script after rebase --- .../libs/common/scripts/train_analysis.py | 2 +- src/mednet/libs/common/scripts/upload.py | 210 ------------------ src/mednet/scripts/cli.py | 2 + src/mednet/scripts/upload.py | 10 +- 4 files changed, 8 insertions(+), 216 deletions(-) delete mode 100644 src/mednet/libs/common/scripts/upload.py diff --git a/src/mednet/libs/common/scripts/train_analysis.py b/src/mednet/libs/common/scripts/train_analysis.py index f5295b1b..2fa7e16d 100644 --- a/src/mednet/libs/common/scripts/train_analysis.py +++ b/src/mednet/libs/common/scripts/train_analysis.py @@ -6,7 +6,7 @@ import pathlib import typing import click -from clapper.click import ResourceOption, verbosity_option +from clapper.click import verbosity_option from clapper.logging import setup # avoids X11/graphical desktop requirement when creating plots diff --git a/src/mednet/libs/common/scripts/upload.py b/src/mednet/libs/common/scripts/upload.py deleted file mode 100644 index 409bcc36..00000000 --- a/src/mednet/libs/common/scripts/upload.py +++ /dev/null @@ -1,210 +0,0 @@ -# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> -# -# SPDX-License-Identifier: GPL-3.0-or-later - -import pathlib - -import click -from clapper.click import ResourceOption, verbosity_option -from clapper.logging import setup - -from .click import ConfigCommand - -logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") - - -@click.command( - cls=ConfigCommand, - epilog="""Examples: - -1. Upload an existing experiment result from a path it resides on (with a default experiment name as {model-name}_{database-name} and a default run name as {date-time}): - - .. code:: sh - - mednet upload --experiment-folder=/path/to/results - -2. Upload an existing experiment result with an experiment name: - - .. code:: sh - - mednet upload --experiment-folder=/path/to/results --experiment-name=exp-pasa_mc - -3. Upload an existing experiment result with a run name: - - .. code:: sh - - mednet upload --experiment-folder=/path/to/results --run-name=run-1 - -4. Upload an existing experiment result with defining a size limit of 20MB for each file: - - .. code:: sh - - mednet upload --experiment-folder=/path/to/results --upload-limit-mb=20 - -""", -) -@click.option( - "--project-path", - "-p", - help="Path to the project where to upload model entries", - required=True, - type=str, - default="biosignal/software/mednet", - show_default=True, - cls=ResourceOption, -) -@click.option( - "--experiment-folder", - "-f", - help="Directory in which to upload results from", - required=True, - type=click.Path( - file_okay=False, - dir_okay=True, - path_type=pathlib.Path, - ), - default="results", - show_default=True, - cls=ResourceOption, -) -@click.option( - "--experiment-name", - "-e", - help='A string indicating the experiment name (e.g. "exp-pasa-mc" or "exp-densenet-mc-ch")', - cls=ResourceOption, -) -@click.option( - "--run-name", - "-r", - help='A string indicating the run name (e.g. "run-1")', - cls=ResourceOption, -) -@click.option( - "--upload-limit-mb", - "-l", - help="Maximim upload size in MB (set to 0 for no limit).", - show_default=True, - required=True, - default=10, - type=click.IntRange(min=0), - cls=ResourceOption, -) -@verbosity_option(logger=logger, cls=ResourceOption, expose_value=False) -def upload( - project_path: str, - experiment_folder: pathlib.Path, - experiment_name: str, - run_name: str, - upload_limit_mb: int, - **_, # ignored -) -> None: # numpydoc ignore=PR01 - """Upload results from an experiment folder to GitLab's MLFlow server.""" - - import json - import os - import tempfile - - import mlflow - from mednet.libs.common.utils.checkpointer import ( - get_checkpoint_to_run_inference, - ) - from mednet.libs.common.utils.gitlab import ( - gitlab_instance_and_token, - sanitize_filename, - size_in_mb, - ) - - logger.info( - "Retrieving GitLab credentials for access to hosted MLFlow server..." - ) - gitlab, token = gitlab_instance_and_token() - project = gitlab.projects.get(project_path) - os.environ["MLFLOW_TRACKING_TOKEN"] = token - os.environ["MLFLOW_TRACKING_URI"] = ( - gitlab.api_url + f"/projects/{project.id}/ml/mlflow" - ) - - # get train files - train_folder = experiment_folder / "model" - train_meta_file = train_folder / "meta.json" - train_log_file = train_folder / "trainlog.pdf" - train_model_file = get_checkpoint_to_run_inference(train_folder) - train_files = [train_meta_file, train_model_file, train_log_file] - - # get evaluation files - evaluation_file = experiment_folder / "evaluation.json" - evaluation_meta_file = experiment_folder / "evaluation.meta.json" - evaluation_log_file = experiment_folder / "evaluation.pdf" - evaluation_files = [ - evaluation_file, - evaluation_meta_file, - evaluation_log_file, - ] - - # checks for maximum upload limit - total_size_mb = sum([size_in_mb(f) for f in train_files + evaluation_files]) - if upload_limit_mb != 0 and total_size_mb > upload_limit_mb: - raise RuntimeError( - f"Total size of upload ({total_size_mb:.2f} MB) exceeds " - f"permitted maximum ({upload_limit_mb:.2f} MB)." - ) - - # prepare experiment and run names - with train_meta_file.open("r") as meta_file: - train_data = json.load(meta_file) - - with evaluation_file.open("r") as meta_file: - evaluation_data = json.load(meta_file) - evaluation_data = evaluation_data["test"] - - experiment_name = ( - experiment_name - or f"{train_data['model-name']}-{train_data['database-name']}" - ) - run_name = run_name or train_data["datetime"] - - click.secho( - f"Uploading entry `{run_name}` to experiment `{experiment_name}` " - f"on GitLab project {project_path} (id: {project.id})...", - bold=True, - fg="green", - ) - exp_meta = mlflow.set_experiment(experiment_name=experiment_name) - with mlflow.start_run(run_name=run_name): - click.echo("Uploading package metadata...") - click.echo(f" -> `version` ({train_data['package-version']})") - mlflow.log_param("package version", train_data["package-version"]) - - click.echo("Uploading metrics...") - - for k in [ - "threshold", - "precision", - "recall", - "f1_score", - "average_precision_score", - "specificity", - "auc_score", - "accuracy", - ]: - click.secho(f" -> `{k}` ({evaluation_data[k]:.3g})") - mlflow.log_metric(k, evaluation_data[k]) - - click.echo("Uploading artifacts (files)...") - - with tempfile.TemporaryDirectory() as tmpdir_name: - tmpdir = pathlib.Path(tmpdir_name) - for f in train_files + evaluation_files: - assert f.exists(), f"File `{f}` does not exist - cannot upload!" - clean_path = str(sanitize_filename(tmpdir, f)) - click.secho(f" -> `{clean_path}` ({size_in_mb(f):.2f} MB)") - mlflow.log_artifact(clean_path) - - click.secho( - f"Uploaded {total_size_mb:.2f} MB to server.", bold=True, fg="green" - ) - click.secho( - f"Visit {gitlab.url}/{project.path_with_namespace}/-/ml/experiments/{exp_meta.experiment_id}", - bold=True, - fg="blue", - ) diff --git a/src/mednet/scripts/cli.py b/src/mednet/scripts/cli.py index 59df4ef6..3b9beb51 100644 --- a/src/mednet/scripts/cli.py +++ b/src/mednet/scripts/cli.py @@ -4,6 +4,7 @@ from mednet.libs.classification.scripts.cli import classification from mednet.libs.segmentation.scripts.cli import segmentation from .info import info +from .upload import upload @click.group( @@ -18,3 +19,4 @@ def cli(): cli.add_command(classification) cli.add_command(segmentation) cli.add_command(info) +cli.add_command(upload) diff --git a/src/mednet/scripts/upload.py b/src/mednet/scripts/upload.py index 7df289ae..a10d6518 100644 --- a/src/mednet/scripts/upload.py +++ b/src/mednet/scripts/upload.py @@ -7,8 +7,7 @@ import pathlib import click from clapper.click import ResourceOption, verbosity_option from clapper.logging import setup - -from .click import ConfigCommand +from mednet.libs.common.scripts.click import ConfigCommand logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") @@ -106,9 +105,10 @@ def upload( import tempfile import mlflow - - from ..utils.checkpointer import get_checkpoint_to_run_inference - from ..utils.gitlab import ( + from mednet.libs.common.utils.checkpointer import ( + get_checkpoint_to_run_inference, + ) + from mednet.libs.common.utils.gitlab import ( gitlab_instance_and_token, sanitize_filename, size_in_mb, -- GitLab