From d7b67bb670dc1770a658304d98da149b7b29c37a Mon Sep 17 00:00:00 2001 From: Gokhan Ozbulak <gokhan.ozbulak@idiap.ch> Date: Tue, 23 Apr 2024 22:09:42 +0200 Subject: [PATCH] File limit flag is added #60. --- src/mednet/scripts/upload.py | 64 +++++++++++++++++++++++++----------- 1 file changed, 45 insertions(+), 19 deletions(-) diff --git a/src/mednet/scripts/upload.py b/src/mednet/scripts/upload.py index c8c7f8da..ec2ba7b2 100644 --- a/src/mednet/scripts/upload.py +++ b/src/mednet/scripts/upload.py @@ -89,6 +89,13 @@ def _create_temp_copy(source, target): .. code:: sh mednet upload --experiment-folder=/path/to/results --run-name=run-1 + +4. Upload an existing experiment result with defining a size limit of 20MB for each file (set 0 for no limit): + + .. code:: sh + + mednet upload --experiment-folder=/path/to/results --file-size=20 + """, ) @click.option( @@ -116,11 +123,22 @@ def _create_temp_copy(source, target): help='A string indicating the run name (e.g. "run-1")', cls=ResourceOption, ) +@click.option( + "--file-limit", + "-l", + help='Limit file size to be uploaded in MB (set 0 for no limit).', + show_default=True, + required=True, + default=10, + type=click.IntRange(min=0), + cls=ResourceOption, +) @verbosity_option(logger=logger, cls=ResourceOption, expose_value=False) def upload( experiment_folder: pathlib.Path, experiment_name: str, run_name: str, + file_limit: int, **_, # ignored ) -> None: # numpydoc ignore=PR01 """Upload results from an experiment folder.""" @@ -146,7 +164,8 @@ def upload( train_model_file, train_model_temp_file ) with train_meta_file.open("r") as f: - meta_data = json.load(f) + train_data = json.load(f) + train_files = [train_meta_file, train_log_file, train_model_temp_file] # prepare evaluation files evaluation_file = experiment_folder / "evaluation.json" @@ -154,39 +173,46 @@ def upload( evaluation_log_file = experiment_folder / "evaluation.pdf" with evaluation_file.open("r") as f: evaluation_data = json.load(f) - test_data = evaluation_data["test"] + evaluation_data = evaluation_data["test"] + evaluation_files = [evaluation_file, evaluation_meta_file, evaluation_log_file] + + # check for file sizes. + for f in train_files + evaluation_files: + file_size = f.stat().st_size / (1024**2) + if file_limit != 0 and file_size > file_limit: + raise RuntimeError( + f"Size of {f} ({file_size:.2f} MB) must be less than or equal to {file_limit} MB." + ) # prepare experiment and run names experiment_name = ( experiment_name if experiment_name - else f'{meta_data["model-name"]}_{meta_data["database-name"]}' + else f'{train_data["model-name"]}_{train_data["database-name"]}' ) - run_name = run_name if run_name else meta_data["datetime"] + run_name = run_name if run_name else train_data["datetime"] logger.info("Setting experiment and run names on the MLFlow server...") mlflow.set_experiment(experiment_name=experiment_name) with mlflow.start_run(run_name=run_name): # upload metrics logger.info("Uploading metrics to MLFlow server...") - mlflow.log_metric("threshold", test_data["threshold"]) - mlflow.log_metric("precision", test_data["precision"]) - mlflow.log_metric("recall", test_data["recall"]) - mlflow.log_metric("f1_score", test_data["f1_score"]) + mlflow.log_metric("threshold", evaluation_data["threshold"]) + mlflow.log_metric("precision", evaluation_data["precision"]) + mlflow.log_metric("recall", evaluation_data["recall"]) + mlflow.log_metric("f1_score", evaluation_data["f1_score"]) mlflow.log_metric( - "average_precision_score", test_data["average_precision_score"] + "average_precision_score", evaluation_data["average_precision_score"] ) - mlflow.log_metric("specificity", test_data["specificity"]) - mlflow.log_metric("auc_score", test_data["auc_score"]) - mlflow.log_metric("accuracy", test_data["accuracy"]) - mlflow.log_param("version", meta_data["package-version"]) + mlflow.log_metric("specificity", evaluation_data["specificity"]) + mlflow.log_metric("auc_score", evaluation_data["auc_score"]) + mlflow.log_metric("accuracy", evaluation_data["accuracy"]) + mlflow.log_param("version", train_data["package-version"]) # upload artifacts logger.info("Uploading artifacts to MLFlow server...") - mlflow.log_artifact(train_meta_file) - mlflow.log_artifact(train_log_file) - mlflow.log_artifact(train_model_temp_file) - mlflow.log_artifact(evaluation_file) - mlflow.log_artifact(evaluation_meta_file) - mlflow.log_artifact(evaluation_log_file) + for f in train_files: + mlflow.log_artifact(f) + for f in evaluation_files: + mlflow.log_artifact(f) # delete temporary file as no need it after logging. train_model_temp_file.unlink() -- GitLab