Skip to content
Snippets Groups Projects
Commit 740c3278 authored by Daniel CARRON's avatar Daniel CARRON :b:
Browse files

[upload] Move common upload code to common lib

parent 3797df82
No related branches found
No related tags found
1 merge request!46Create common library
Pipeline #89234 failed
...@@ -54,123 +54,26 @@ def upload( ...@@ -54,123 +54,26 @@ def upload(
upload_limit_mb: int, upload_limit_mb: int,
**_, # ignored **_, # ignored
) -> None: # numpydoc ignore=PR01 ) -> None: # numpydoc ignore=PR01
"""Upload results from an experiment folder to GitLab's MLFlow server.""" """Upload results from a classification experiment folder to GitLab's MLFlow server."""
import json from mednet.libs.common.scripts.upload import upload as upload_
import os
import tempfile metrics = [
"threshold",
import mlflow "precision",
from mednet.libs.common.utils.checkpointer import ( "recall",
get_checkpoint_to_run_inference, "f1_score",
) "average_precision_score",
from mednet.libs.common.utils.gitlab import ( "specificity",
gitlab_instance_and_token, "auc_score",
sanitize_filename, "accuracy",
size_in_mb,
)
logger.info("Retrieving GitLab credentials for access to hosted MLFlow server...")
gitlab, token = gitlab_instance_and_token()
project = gitlab.projects.get(project_path)
os.environ["MLFLOW_TRACKING_TOKEN"] = token
os.environ["MLFLOW_TRACKING_URI"] = (
gitlab.api_url + f"/projects/{project.id}/ml/mlflow"
)
# get train files
train_folder = experiment_folder
train_log_file = train_folder / "trainlog.pdf"
train_meta_file = train_folder / "train.meta.json"
train_model_file = get_checkpoint_to_run_inference(train_folder)
train_files = [train_meta_file, train_model_file, train_log_file]
# get evaluation files
evaluation_file = experiment_folder / "evaluation.json"
evaluation_meta_file = experiment_folder / "evaluation.meta.json"
evaluation_meta_file = experiment_folder / "evaluation.rst"
evaluation_log_file = experiment_folder / "evaluation.pdf"
evaluation_files = [
evaluation_file,
evaluation_meta_file,
evaluation_log_file,
] ]
# checks for maximum upload limit upload_(
total_size_mb = sum([size_in_mb(f) for f in train_files + evaluation_files]) project_path,
if upload_limit_mb != 0 and total_size_mb > upload_limit_mb: experiment_folder,
raise RuntimeError( experiment_name,
f"Total size of upload ({total_size_mb:.2f} MB) exceeds " run_name,
f"permitted maximum ({upload_limit_mb:.2f} MB)." metrics,
) upload_limit_mb,
with train_meta_file.open("r") as meta_file:
train_data = json.load(meta_file)
with evaluation_file.open("r") as meta_file:
evaluation_data = json.load(meta_file)
evaluation_data = evaluation_data["test"]
# get lowest validation epoch
best_epoch = str(train_model_file).split(".")[0].split("=")[1]
experiment_name = (
experiment_name or f"{train_data['model-name']}-{train_data['database-name']}"
)
run_name = run_name or train_data["datetime"]
click.secho(
f"Uploading entry `{run_name}` to experiment `{experiment_name}` "
f"on GitLab project {project_path} (id: {project.id})...",
bold=True,
fg="green",
)
exp_meta = mlflow.set_experiment(experiment_name=experiment_name)
with mlflow.start_run(run_name=run_name):
click.echo("Uploading package metadata...")
click.echo(f" -> `version` ({train_data['package-version']})")
mlflow.log_param("package version", train_data["package-version"])
click.echo("Uploading metrics...")
for k in [
"epochs",
"batch-size",
]:
click.secho(f" -> `{k}` ({train_data[k]})")
mlflow.log_param(k, train_data[k])
click.secho(f" -> `#accumulations` ({train_data['accumulate-grad-batches']})")
mlflow.log_param("#Accumulations", train_data["accumulate-grad-batches"])
click.secho(f" -> `epoch (best)` ({best_epoch})")
mlflow.log_param("Epoch (best)", best_epoch)
for k in [
"threshold",
"precision",
"recall",
"f1_score",
"average_precision_score",
"specificity",
"auc_score",
"accuracy",
]:
click.secho(f" -> `{k}` ({evaluation_data[k]:.3g})")
mlflow.log_metric(k, evaluation_data[k])
click.echo("Uploading artifacts (files)...")
with tempfile.TemporaryDirectory() as tmpdir_name:
tmpdir = pathlib.Path(tmpdir_name)
for f in train_files + evaluation_files:
assert f.exists(), f"File `{f}` does not exist - cannot upload!"
clean_path = str(sanitize_filename(tmpdir, f))
click.secho(f" -> `{clean_path}` ({size_in_mb(f):.2f} MB)")
mlflow.log_artifact(clean_path)
click.secho(f"Uploaded {total_size_mb:.2f} MB to server.", bold=True, fg="green")
click.secho(
f"Visit {gitlab.url}/{project.path_with_namespace}/-/ml/experiments/{exp_meta.experiment_id}",
bold=True,
fg="blue",
) )
...@@ -81,3 +81,140 @@ def reusable_options(f): ...@@ -81,3 +81,140 @@ def reusable_options(f):
return f(*args, **kwargs) return f(*args, **kwargs)
return wrapper_reusable_options return wrapper_reusable_options
def upload(
project_path: str,
experiment_folder: pathlib.Path,
experiment_name: str,
run_name: str,
metrics: list[str],
upload_limit_mb: int,
) -> None:
"""Upload results from an experiment folder to GitLab's MLFlow server.
Parameters
----------
project_path
Path to the project where to upload model entries.
experiment_folder
Directory in which to upload results from.
experiment_name
A string indicating the experiment name (e.g. "exp-pasa-mc" or "exp-densenet-mc-ch").
run_name
A string indicating the run name (e.g. "run-1").
metrics
List of metrics to upload.
upload_limit_mb
Maximim upload size in MB (set to 0 for no limit).
"""
import json
import os
import tempfile
import mlflow
from mednet.libs.common.utils.checkpointer import (
get_checkpoint_to_run_inference,
)
from mednet.libs.common.utils.gitlab import (
gitlab_instance_and_token,
sanitize_filename,
size_in_mb,
)
logger.info("Retrieving GitLab credentials for access to hosted MLFlow server...")
gitlab, token = gitlab_instance_and_token()
project = gitlab.projects.get(project_path)
os.environ["MLFLOW_TRACKING_TOKEN"] = token
os.environ["MLFLOW_TRACKING_URI"] = (
gitlab.api_url + f"/projects/{project.id}/ml/mlflow"
)
# get train files
train_folder = experiment_folder
train_log_file = train_folder / "trainlog.pdf"
train_meta_file = train_folder / "train.meta.json"
train_model_file = get_checkpoint_to_run_inference(train_folder)
train_files = [train_meta_file, train_model_file, train_log_file]
# get evaluation files
evaluation_file = experiment_folder / "evaluation.json"
evaluation_meta_file = experiment_folder / "evaluation.meta.json"
evaluation_meta_file = experiment_folder / "evaluation.rst"
evaluation_log_file = experiment_folder / "evaluation.pdf"
evaluation_files = [
evaluation_file,
evaluation_meta_file,
evaluation_log_file,
]
# checks for maximum upload limit
total_size_mb = sum([size_in_mb(f) for f in train_files + evaluation_files])
if upload_limit_mb != 0 and total_size_mb > upload_limit_mb:
raise RuntimeError(
f"Total size of upload ({total_size_mb:.2f} MB) exceeds "
f"permitted maximum ({upload_limit_mb:.2f} MB)."
)
with train_meta_file.open("r") as meta_file:
train_data = json.load(meta_file)
with evaluation_file.open("r") as meta_file:
evaluation_data = json.load(meta_file)
evaluation_data = evaluation_data["test"]
# get lowest validation epoch
best_epoch = str(train_model_file).split(".")[0].split("=")[1]
experiment_name = (
experiment_name or f"{train_data['model-name']}-{train_data['database-name']}"
)
run_name = run_name or train_data["datetime"]
click.secho(
f"Uploading entry `{run_name}` to experiment `{experiment_name}` "
f"on GitLab project {project_path} (id: {project.id})...",
bold=True,
fg="green",
)
exp_meta = mlflow.set_experiment(experiment_name=experiment_name)
with mlflow.start_run(run_name=run_name):
click.echo("Uploading package metadata...")
click.echo(f" -> `version` ({train_data['package-version']})")
mlflow.log_param("package version", train_data["package-version"])
click.echo("Uploading metrics...")
for k in [
"epochs",
"batch-size",
]:
click.secho(f" -> `{k}` ({train_data[k]})")
mlflow.log_param(k, train_data[k])
click.secho(f" -> `#accumulations` ({train_data['accumulate-grad-batches']})")
mlflow.log_param("#Accumulations", train_data["accumulate-grad-batches"])
click.secho(f" -> `epoch (best)` ({best_epoch})")
mlflow.log_param("Epoch (best)", best_epoch)
for k in metrics:
click.secho(f" -> `{k}` ({evaluation_data[k]:.3g})")
mlflow.log_metric(k, evaluation_data[k])
click.echo("Uploading artifacts (files)...")
with tempfile.TemporaryDirectory() as tmpdir_name:
tmpdir = pathlib.Path(tmpdir_name)
for f in train_files + evaluation_files:
assert f.exists(), f"File `{f}` does not exist - cannot upload!"
clean_path = str(sanitize_filename(tmpdir, f))
click.secho(f" -> `{clean_path}` ({size_in_mb(f):.2f} MB)")
mlflow.log_artifact(clean_path)
click.secho(f"Uploaded {total_size_mb:.2f} MB to server.", bold=True, fg="green")
click.secho(
f"Visit {gitlab.url}/{project.path_with_namespace}/-/ml/experiments/{exp_meta.experiment_id}",
bold=True,
fg="blue",
)
...@@ -54,122 +54,25 @@ def upload( ...@@ -54,122 +54,25 @@ def upload(
upload_limit_mb: int, upload_limit_mb: int,
**_, # ignored **_, # ignored
) -> None: # numpydoc ignore=PR01 ) -> None: # numpydoc ignore=PR01
"""Upload results from an experiment folder to GitLab's MLFlow server.""" """Upload results from a segmentation experiment folder to GitLab's MLFlow server."""
import json from mednet.libs.common.scripts.upload import upload as upload_
import os
import tempfile metrics = [
"precision",
import mlflow "recall",
from mednet.libs.common.utils.checkpointer import ( "f1",
get_checkpoint_to_run_inference, "average_precision_score",
) "specificity",
from mednet.libs.common.utils.gitlab import ( "auc_score",
gitlab_instance_and_token, "accuracy",
sanitize_filename,
size_in_mb,
)
logger.info("Retrieving GitLab credentials for access to hosted MLFlow server...")
gitlab, token = gitlab_instance_and_token()
project = gitlab.projects.get(project_path)
os.environ["MLFLOW_TRACKING_TOKEN"] = token
os.environ["MLFLOW_TRACKING_URI"] = (
gitlab.api_url + f"/projects/{project.id}/ml/mlflow"
)
# get train files
train_folder = experiment_folder
train_log_file = train_folder / "trainlog.pdf"
train_meta_file = train_folder / "train.meta.json"
train_model_file = get_checkpoint_to_run_inference(train_folder)
train_files = [train_meta_file, train_model_file, train_log_file]
# get evaluation files
evaluation_file = experiment_folder / "evaluation.json"
evaluation_meta_file = experiment_folder / "evaluation.meta.json"
evaluation_meta_file = experiment_folder / "evaluation.rst"
evaluation_log_file = experiment_folder / "evaluation.pdf"
evaluation_files = [
evaluation_file,
evaluation_meta_file,
evaluation_log_file,
] ]
# checks for maximum upload limit upload_(
total_size_mb = sum([size_in_mb(f) for f in train_files + evaluation_files]) project_path,
if upload_limit_mb != 0 and total_size_mb > upload_limit_mb: experiment_folder,
raise RuntimeError( experiment_name,
f"Total size of upload ({total_size_mb:.2f} MB) exceeds " run_name,
f"permitted maximum ({upload_limit_mb:.2f} MB)." metrics,
) upload_limit_mb,
with train_meta_file.open("r") as meta_file:
train_data = json.load(meta_file)
with evaluation_file.open("r") as meta_file:
evaluation_data = json.load(meta_file)
evaluation_data = evaluation_data["test"]
# get lowest validation epoch
best_epoch = str(train_model_file).split(".")[0].split("=")[1]
experiment_name = (
experiment_name or f"{train_data['model-name']}-{train_data['database-name']}"
)
run_name = run_name or train_data["datetime"]
click.secho(
f"Uploading entry `{run_name}` to experiment `{experiment_name}` "
f"on GitLab project {project_path} (id: {project.id})...",
bold=True,
fg="green",
)
exp_meta = mlflow.set_experiment(experiment_name=experiment_name)
with mlflow.start_run(run_name=run_name):
click.echo("Uploading package metadata...")
click.echo(f" -> `version` ({train_data['package-version']})")
mlflow.log_param("package version", train_data["package-version"])
click.echo("Uploading metrics...")
for k in [
"epochs",
"batch-size",
]:
click.secho(f" -> `{k}` ({train_data[k]})")
mlflow.log_param(k, train_data[k])
click.secho(f" -> `#accumulations` ({train_data['accumulate-grad-batches']})")
mlflow.log_param("#Accumulations", train_data["accumulate-grad-batches"])
click.secho(f" -> `epoch (best)` ({best_epoch})")
mlflow.log_param("Epoch (best)", best_epoch)
for k in [
"precision",
"recall",
"f1",
"average_precision_score",
"specificity",
"auc_score",
"accuracy",
]:
click.secho(f" -> `{k}` ({evaluation_data[k]:.3g})")
mlflow.log_metric(k, evaluation_data[k])
click.echo("Uploading artifacts (files)...")
with tempfile.TemporaryDirectory() as tmpdir_name:
tmpdir = pathlib.Path(tmpdir_name)
for f in train_files + evaluation_files:
assert f.exists(), f"File `{f}` does not exist - cannot upload!"
clean_path = str(sanitize_filename(tmpdir, f))
click.secho(f" -> `{clean_path}` ({size_in_mb(f):.2f} MB)")
mlflow.log_artifact(clean_path)
click.secho(f"Uploaded {total_size_mb:.2f} MB to server.", bold=True, fg="green")
click.secho(
f"Visit {gitlab.url}/{project.path_with_namespace}/-/ml/experiments/{exp_meta.experiment_id}",
bold=True,
fg="blue",
) )
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment