diff --git a/src/mednet/scripts/cli.py b/src/mednet/scripts/cli.py index fb3f605739811c9a5a5455cf6d8c8a89919c59f3..a6950a566aca7f57dea8cb1ae19de8ef55380af0 100644 --- a/src/mednet/scripts/cli.py +++ b/src/mednet/scripts/cli.py @@ -69,3 +69,5 @@ saliency.add_command( saliency.add_command( importlib.import_module("..saliency.view", package=__name__).view, ) + +cli.add_command(importlib.import_module("..upload", package=__name__).upload) diff --git a/src/mednet/scripts/upload.py b/src/mednet/scripts/upload.py new file mode 100644 index 0000000000000000000000000000000000000000..a35184bf71425d784c31012485932aa7f71fc481 --- /dev/null +++ b/src/mednet/scripts/upload.py @@ -0,0 +1,155 @@ +# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch> +# +# SPDX-License-Identifier: GPL-3.0-or-later + +import os +import json +import pathlib +import shutil +import tempfile +import configparser + +import gitlab +import mlflow + +import click +from clapper.click import ResourceOption, verbosity_option +from clapper.logging import setup + +from .click import ConfigCommand + +logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s") + +def get_config(): + """Returns an instance of the Gitlab object for remote operations.""" + + cfg = pathlib.Path(os.path.expanduser("~/.python-gitlab.cfg")) + if cfg.exists(): + gl = gitlab.Gitlab.from_config("idiap", [str(cfg)]) + config = configparser.ConfigParser() + config.read(cfg) + else: # ask the user for a token or use one from the current runner + server = "https://gitlab.idiap.ch" + token = input(f"{server} (user or project) token: ") + gl = gitlab.Gitlab(server, private_token=token, api_version="4") + config = {"idiap": {"private_token": token}} + + return gl, config + +def create_temp_copy(source, target): + temp_dir = tempfile.gettempdir() + target = os.path.join(temp_dir, target) + shutil.copy2(source, target) + return target + +@click.command( + entry_point_group="mednet.config", + cls=ConfigCommand, + epilog="""Examples: + +1. Upload an existing experiment result from a path it resides on (with a default experiment name as {model-name}_{database-name} and a default run name as {date-time}): + + .. code:: sh + + mednet upload --output-folder=path/to/results + +2. Upload an existing experiment result with an experiment name: + + .. code:: sh + + mednet upload--output-folder=path/to/results --experiment-name=exp-pasa_mc + +3. Upload an existing experiment result with a run name: + + .. code:: sh + + mednet upload--output-folder=path/to/results --run-name=run-1 +""", +) +@click.option( + "--output-folder", + "-o", + help="Directory in which to upload results from", + required=True, + type=click.Path( + file_okay=False, + dir_okay=True, + path_type=pathlib.Path, + ), + default="results", + cls=ResourceOption, + ) +@click.option( + "--experiment-name", + "-e", + help='A string indicating the experiment name (e.g. "exp-pasa_mc" or "exp-densenet_mc-ch")', + cls=ResourceOption, + ) +@click.option( + "--run-name", + "-r", + help='A string indicating the run name (e.g. "run-1")', + cls=ResourceOption, + ) +@verbosity_option(logger=logger, cls=ResourceOption, expose_value=False) +def upload( + output_folder: pathlib.Path, + experiment_name: str, + run_name: str, + **_, # ignored +) -> None: # numpydoc ignore=PR01 + """Upload results from an experiment output folder.""" + + logger.info(f"Getting Gitlab credentials for accessing to MLFlow server...") + gitlab, config = get_config() + project = gitlab.projects.get('biosignal/software/mednet') + tracking_uri = gitlab.api_url + f"/projects/{project.id}/ml/mlflow" + os.environ["MLFLOW_TRACKING_TOKEN"] = config["idiap"]["private_token"] + os.environ["MLFLOW_TRACKING_URI"] = ( + gitlab.api_url + f"/projects/{project.id}/ml/mlflow" + ) + + # prepare train files + train_output_folder = output_folder / "model" + train_meta_file = train_output_folder / "meta.json" + train_log_file = train_output_folder / "trainlog.pdf" + train_model_file = [str(f) for f in train_output_folder.glob("*lowest*")][0] + train_model_temp_file = train_model_file.split(os.sep)[-1].replace("=", "_") + train_model_temp_file = create_temp_copy(train_model_file, train_model_temp_file) + with train_meta_file.open("r") as f: + meta_data = json.load(f) + + # prepare evaluation files + evaluation_file = output_folder / "evaluation.json" + evaluation_meta_file = output_folder / "evaluation.meta.json" + evaluation_log_file = output_folder / "evaluation.pdf" + with evaluation_file.open("r") as f: + evaluation_data = json.load(f) + test_data = evaluation_data["test"] + + # prepare experiment and run names + experiment_name = experiment_name if experiment_name else f'{meta_data["model-name"]}_{meta_data["database-name"]}' + run_name = run_name if run_name else meta_data["datetime"] + + logger.info(f"Setting experiment and run names on the MLFlow server...") + mlflow.set_experiment(experiment_name=experiment_name) + with mlflow.start_run(run_name=run_name) as run: + # upload metrics + logger.info(f"Uploading metrics to MLFlow server...") + mlflow.log_metric("threshold", test_data["threshold"]) + mlflow.log_metric("precision", test_data["precision"]) + mlflow.log_metric("recall", test_data["recall"]) + mlflow.log_metric("f1_score", test_data["f1_score"]) + mlflow.log_metric("average_precision_score", test_data["average_precision_score"]) + mlflow.log_metric("specificity", test_data["specificity"]) + mlflow.log_metric("auc_score", test_data["auc_score"]) + mlflow.log_metric("accuracy", test_data["accuracy"]) + mlflow.log_param("version", meta_data["package-version"]) + # upload artifacts + logger.info(f"Uploading artifacts to MLFlow server...") + mlflow.log_artifact(train_meta_file) + mlflow.log_artifact(train_log_file) + mlflow.log_artifact(train_model_temp_file) + mlflow.log_artifact(evaluation_file) + mlflow.log_artifact(evaluation_meta_file) + mlflow.log_artifact(evaluation_log_file) \ No newline at end of file diff --git a/tests/test_cli.py b/tests/test_cli.py index aa04ec07067766e5d7174db4cdd892783c7b5de6..c8d597469b7a8ed733f036e9cd5d2d054b2b779f 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -175,6 +175,12 @@ def test_saliency_evaluate_help(): _check_help(evaluate) +def test_upload_help(): + from mednet.scripts.upload import upload + + _check_help(upload) + + @pytest.mark.skip_if_rc_var_not_set("datadir.montgomery") def test_train_pasa_montgomery(temporary_basedir): from mednet.scripts.train import train