Skip to content
Snippets Groups Projects
Commit 7e981881 authored by Daniel CARRON's avatar Daniel CARRON :b: Committed by André Anjos
Browse files

[upload] Fix upload script after rebase

parent 8ce33d1d
No related branches found
No related tags found
1 merge request!46Create common library
......@@ -6,7 +6,7 @@ import pathlib
import typing
import click
from clapper.click import ResourceOption, verbosity_option
from clapper.click import verbosity_option
from clapper.logging import setup
# avoids X11/graphical desktop requirement when creating plots
......
# SPDX-FileCopyrightText: Copyright © 2023 Idiap Research Institute <contact@idiap.ch>
#
# SPDX-License-Identifier: GPL-3.0-or-later
import pathlib
import click
from clapper.click import ResourceOption, verbosity_option
from clapper.logging import setup
from .click import ConfigCommand
logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s")
@click.command(
cls=ConfigCommand,
epilog="""Examples:
1. Upload an existing experiment result from a path it resides on (with a default experiment name as {model-name}_{database-name} and a default run name as {date-time}):
.. code:: sh
mednet upload --experiment-folder=/path/to/results
2. Upload an existing experiment result with an experiment name:
.. code:: sh
mednet upload --experiment-folder=/path/to/results --experiment-name=exp-pasa_mc
3. Upload an existing experiment result with a run name:
.. code:: sh
mednet upload --experiment-folder=/path/to/results --run-name=run-1
4. Upload an existing experiment result with defining a size limit of 20MB for each file:
.. code:: sh
mednet upload --experiment-folder=/path/to/results --upload-limit-mb=20
""",
)
@click.option(
"--project-path",
"-p",
help="Path to the project where to upload model entries",
required=True,
type=str,
default="biosignal/software/mednet",
show_default=True,
cls=ResourceOption,
)
@click.option(
"--experiment-folder",
"-f",
help="Directory in which to upload results from",
required=True,
type=click.Path(
file_okay=False,
dir_okay=True,
path_type=pathlib.Path,
),
default="results",
show_default=True,
cls=ResourceOption,
)
@click.option(
"--experiment-name",
"-e",
help='A string indicating the experiment name (e.g. "exp-pasa-mc" or "exp-densenet-mc-ch")',
cls=ResourceOption,
)
@click.option(
"--run-name",
"-r",
help='A string indicating the run name (e.g. "run-1")',
cls=ResourceOption,
)
@click.option(
"--upload-limit-mb",
"-l",
help="Maximim upload size in MB (set to 0 for no limit).",
show_default=True,
required=True,
default=10,
type=click.IntRange(min=0),
cls=ResourceOption,
)
@verbosity_option(logger=logger, cls=ResourceOption, expose_value=False)
def upload(
project_path: str,
experiment_folder: pathlib.Path,
experiment_name: str,
run_name: str,
upload_limit_mb: int,
**_, # ignored
) -> None: # numpydoc ignore=PR01
"""Upload results from an experiment folder to GitLab's MLFlow server."""
import json
import os
import tempfile
import mlflow
from mednet.libs.common.utils.checkpointer import (
get_checkpoint_to_run_inference,
)
from mednet.libs.common.utils.gitlab import (
gitlab_instance_and_token,
sanitize_filename,
size_in_mb,
)
logger.info(
"Retrieving GitLab credentials for access to hosted MLFlow server..."
)
gitlab, token = gitlab_instance_and_token()
project = gitlab.projects.get(project_path)
os.environ["MLFLOW_TRACKING_TOKEN"] = token
os.environ["MLFLOW_TRACKING_URI"] = (
gitlab.api_url + f"/projects/{project.id}/ml/mlflow"
)
# get train files
train_folder = experiment_folder / "model"
train_meta_file = train_folder / "meta.json"
train_log_file = train_folder / "trainlog.pdf"
train_model_file = get_checkpoint_to_run_inference(train_folder)
train_files = [train_meta_file, train_model_file, train_log_file]
# get evaluation files
evaluation_file = experiment_folder / "evaluation.json"
evaluation_meta_file = experiment_folder / "evaluation.meta.json"
evaluation_log_file = experiment_folder / "evaluation.pdf"
evaluation_files = [
evaluation_file,
evaluation_meta_file,
evaluation_log_file,
]
# checks for maximum upload limit
total_size_mb = sum([size_in_mb(f) for f in train_files + evaluation_files])
if upload_limit_mb != 0 and total_size_mb > upload_limit_mb:
raise RuntimeError(
f"Total size of upload ({total_size_mb:.2f} MB) exceeds "
f"permitted maximum ({upload_limit_mb:.2f} MB)."
)
# prepare experiment and run names
with train_meta_file.open("r") as meta_file:
train_data = json.load(meta_file)
with evaluation_file.open("r") as meta_file:
evaluation_data = json.load(meta_file)
evaluation_data = evaluation_data["test"]
experiment_name = (
experiment_name
or f"{train_data['model-name']}-{train_data['database-name']}"
)
run_name = run_name or train_data["datetime"]
click.secho(
f"Uploading entry `{run_name}` to experiment `{experiment_name}` "
f"on GitLab project {project_path} (id: {project.id})...",
bold=True,
fg="green",
)
exp_meta = mlflow.set_experiment(experiment_name=experiment_name)
with mlflow.start_run(run_name=run_name):
click.echo("Uploading package metadata...")
click.echo(f" -> `version` ({train_data['package-version']})")
mlflow.log_param("package version", train_data["package-version"])
click.echo("Uploading metrics...")
for k in [
"threshold",
"precision",
"recall",
"f1_score",
"average_precision_score",
"specificity",
"auc_score",
"accuracy",
]:
click.secho(f" -> `{k}` ({evaluation_data[k]:.3g})")
mlflow.log_metric(k, evaluation_data[k])
click.echo("Uploading artifacts (files)...")
with tempfile.TemporaryDirectory() as tmpdir_name:
tmpdir = pathlib.Path(tmpdir_name)
for f in train_files + evaluation_files:
assert f.exists(), f"File `{f}` does not exist - cannot upload!"
clean_path = str(sanitize_filename(tmpdir, f))
click.secho(f" -> `{clean_path}` ({size_in_mb(f):.2f} MB)")
mlflow.log_artifact(clean_path)
click.secho(
f"Uploaded {total_size_mb:.2f} MB to server.", bold=True, fg="green"
)
click.secho(
f"Visit {gitlab.url}/{project.path_with_namespace}/-/ml/experiments/{exp_meta.experiment_id}",
bold=True,
fg="blue",
)
......@@ -4,6 +4,7 @@ from mednet.libs.classification.scripts.cli import classification
from mednet.libs.segmentation.scripts.cli import segmentation
from .info import info
from .upload import upload
@click.group(
......@@ -18,3 +19,4 @@ def cli():
cli.add_command(classification)
cli.add_command(segmentation)
cli.add_command(info)
cli.add_command(upload)
......@@ -7,8 +7,7 @@ import pathlib
import click
from clapper.click import ResourceOption, verbosity_option
from clapper.logging import setup
from .click import ConfigCommand
from mednet.libs.common.scripts.click import ConfigCommand
logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s")
......@@ -106,9 +105,10 @@ def upload(
import tempfile
import mlflow
from ..utils.checkpointer import get_checkpoint_to_run_inference
from ..utils.gitlab import (
from mednet.libs.common.utils.checkpointer import (
get_checkpoint_to_run_inference,
)
from mednet.libs.common.utils.gitlab import (
gitlab_instance_and_token,
sanitize_filename,
size_in_mb,
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment