File limit flag is added #60.

2aa77950 · Gokhan OZBULAK · André Anjos · 82d66ea5 · 2aa77950
Commit 2aa77950 authored 1 year ago by Gokhan OZBULAK Committed by André Anjos 1 year ago
--- a/src/mednet/scripts/upload.py
+++ b/src/mednet/scripts/upload.py
@@ -89,6 +89,13 @@ def _create_temp_copy(source, target):
   .. code:: sh
      mednet upload --experiment-folder=/path/to/results --run-name=run-1
+4. Upload an existing experiment result with defining a size limit of 20MB for each file (set 0 for no limit):
+   .. code:: sh
+      mednet upload --experiment-folder=/path/to/results --file-size=20
 """,
 )
 @click.option(
@@ -116,11 +123,22 @@ def _create_temp_copy(source, target):
    help='A string indicating the run name (e.g. "run-1")',
    cls=ResourceOption,
 )
+@click.option(
+    "--file-limit",
+    "-l",
+    help='Limit file size to be uploaded in MB (set 0 for no limit).',
+    show_default=True,
+    required=True,
+    default=10,
+    type=click.IntRange(min=0),
+    cls=ResourceOption,
+)
 @verbosity_option(logger=logger, cls=ResourceOption, expose_value=False)
 def upload(
    experiment_folder: pathlib.Path,
    experiment_name: str,
    run_name: str,
+    file_limit: int,
    **_,  # ignored
 ) -> None:  # numpydoc ignore=PR01
    """Upload results from an experiment folder."""
@@ -146,7 +164,8 @@ def upload(
        train_model_file, train_model_temp_file
    )
    with train_meta_file.open("r") as f:
-        meta_data = json.load(f)
+        train_data = json.load(f)
+    train_files = [train_meta_file, train_log_file, train_model_temp_file]
    # prepare evaluation files
    evaluation_file = experiment_folder / "evaluation.json"
@@ -154,39 +173,46 @@ def upload(
    evaluation_log_file = experiment_folder / "evaluation.pdf"
    with evaluation_file.open("r") as f:
        evaluation_data = json.load(f)
-    test_data = evaluation_data["test"]
+    evaluation_data = evaluation_data["test"]
+    evaluation_files = [evaluation_file, evaluation_meta_file, evaluation_log_file]
+    # check for file sizes.
+    for f in train_files + evaluation_files:
+        file_size = f.stat().st_size / (1024**2)
+        if file_limit != 0 and file_size > file_limit:
+            raise RuntimeError(
+                    f"Size of {f} ({file_size:.2f} MB) must be less than or equal to {file_limit} MB."
+            )
    # prepare experiment and run names
    experiment_name = (
        experiment_name
        if experiment_name
-        else f'{meta_data["model-name"]}_{meta_data["database-name"]}'
+        else f'{train_data["model-name"]}_{train_data["database-name"]}'
    )
-    run_name = run_name if run_name else meta_data["datetime"]
+    run_name = run_name if run_name else train_data["datetime"]
    logger.info("Setting experiment and run names on the MLFlow server...")
    mlflow.set_experiment(experiment_name=experiment_name)
    with mlflow.start_run(run_name=run_name):
        # upload metrics
        logger.info("Uploading metrics to MLFlow server...")
-        mlflow.log_metric("threshold", test_data["threshold"])
+        mlflow.log_metric("threshold", evaluation_data["threshold"])
-        mlflow.log_metric("precision", test_data["precision"])
+        mlflow.log_metric("precision", evaluation_data["precision"])
-        mlflow.log_metric("recall", test_data["recall"])
+        mlflow.log_metric("recall", evaluation_data["recall"])
-        mlflow.log_metric("f1_score", test_data["f1_score"])
+        mlflow.log_metric("f1_score", evaluation_data["f1_score"])
        mlflow.log_metric(
-            "average_precision_score", test_data["average_precision_score"]
+            "average_precision_score", evaluation_data["average_precision_score"]
        )
-        mlflow.log_metric("specificity", test_data["specificity"])
+        mlflow.log_metric("specificity", evaluation_data["specificity"])
-        mlflow.log_metric("auc_score", test_data["auc_score"])
+        mlflow.log_metric("auc_score", evaluation_data["auc_score"])
-        mlflow.log_metric("accuracy", test_data["accuracy"])
+        mlflow.log_metric("accuracy", evaluation_data["accuracy"])
-        mlflow.log_param("version", meta_data["package-version"])
+        mlflow.log_param("version", train_data["package-version"])
        # upload artifacts
        logger.info("Uploading artifacts to MLFlow server...")
-        mlflow.log_artifact(train_meta_file)
+        for f in train_files:
-        mlflow.log_artifact(train_log_file)
+            mlflow.log_artifact(f)
-        mlflow.log_artifact(train_model_temp_file)
+        for f in evaluation_files:
-        mlflow.log_artifact(evaluation_file)
+            mlflow.log_artifact(f)
-        mlflow.log_artifact(evaluation_meta_file)
-        mlflow.log_artifact(evaluation_log_file)
    # delete temporary file as no need it after logging.
    train_model_temp_file.unlink()