Skip to content
Snippets Groups Projects
Commit 2aa77950 authored by Gokhan OZBULAK's avatar Gokhan OZBULAK Committed by André Anjos
Browse files

File limit flag is added #60.

parent 82d66ea5
No related branches found
No related tags found
No related merge requests found
...@@ -89,6 +89,13 @@ def _create_temp_copy(source, target): ...@@ -89,6 +89,13 @@ def _create_temp_copy(source, target):
.. code:: sh .. code:: sh
mednet upload --experiment-folder=/path/to/results --run-name=run-1 mednet upload --experiment-folder=/path/to/results --run-name=run-1
4. Upload an existing experiment result with defining a size limit of 20MB for each file (set 0 for no limit):
.. code:: sh
mednet upload --experiment-folder=/path/to/results --file-size=20
""", """,
) )
@click.option( @click.option(
...@@ -116,11 +123,22 @@ def _create_temp_copy(source, target): ...@@ -116,11 +123,22 @@ def _create_temp_copy(source, target):
help='A string indicating the run name (e.g. "run-1")', help='A string indicating the run name (e.g. "run-1")',
cls=ResourceOption, cls=ResourceOption,
) )
@click.option(
"--file-limit",
"-l",
help='Limit file size to be uploaded in MB (set 0 for no limit).',
show_default=True,
required=True,
default=10,
type=click.IntRange(min=0),
cls=ResourceOption,
)
@verbosity_option(logger=logger, cls=ResourceOption, expose_value=False) @verbosity_option(logger=logger, cls=ResourceOption, expose_value=False)
def upload( def upload(
experiment_folder: pathlib.Path, experiment_folder: pathlib.Path,
experiment_name: str, experiment_name: str,
run_name: str, run_name: str,
file_limit: int,
**_, # ignored **_, # ignored
) -> None: # numpydoc ignore=PR01 ) -> None: # numpydoc ignore=PR01
"""Upload results from an experiment folder.""" """Upload results from an experiment folder."""
...@@ -146,7 +164,8 @@ def upload( ...@@ -146,7 +164,8 @@ def upload(
train_model_file, train_model_temp_file train_model_file, train_model_temp_file
) )
with train_meta_file.open("r") as f: with train_meta_file.open("r") as f:
meta_data = json.load(f) train_data = json.load(f)
train_files = [train_meta_file, train_log_file, train_model_temp_file]
# prepare evaluation files # prepare evaluation files
evaluation_file = experiment_folder / "evaluation.json" evaluation_file = experiment_folder / "evaluation.json"
...@@ -154,39 +173,46 @@ def upload( ...@@ -154,39 +173,46 @@ def upload(
evaluation_log_file = experiment_folder / "evaluation.pdf" evaluation_log_file = experiment_folder / "evaluation.pdf"
with evaluation_file.open("r") as f: with evaluation_file.open("r") as f:
evaluation_data = json.load(f) evaluation_data = json.load(f)
test_data = evaluation_data["test"] evaluation_data = evaluation_data["test"]
evaluation_files = [evaluation_file, evaluation_meta_file, evaluation_log_file]
# check for file sizes.
for f in train_files + evaluation_files:
file_size = f.stat().st_size / (1024**2)
if file_limit != 0 and file_size > file_limit:
raise RuntimeError(
f"Size of {f} ({file_size:.2f} MB) must be less than or equal to {file_limit} MB."
)
# prepare experiment and run names # prepare experiment and run names
experiment_name = ( experiment_name = (
experiment_name experiment_name
if experiment_name if experiment_name
else f'{meta_data["model-name"]}_{meta_data["database-name"]}' else f'{train_data["model-name"]}_{train_data["database-name"]}'
) )
run_name = run_name if run_name else meta_data["datetime"] run_name = run_name if run_name else train_data["datetime"]
logger.info("Setting experiment and run names on the MLFlow server...") logger.info("Setting experiment and run names on the MLFlow server...")
mlflow.set_experiment(experiment_name=experiment_name) mlflow.set_experiment(experiment_name=experiment_name)
with mlflow.start_run(run_name=run_name): with mlflow.start_run(run_name=run_name):
# upload metrics # upload metrics
logger.info("Uploading metrics to MLFlow server...") logger.info("Uploading metrics to MLFlow server...")
mlflow.log_metric("threshold", test_data["threshold"]) mlflow.log_metric("threshold", evaluation_data["threshold"])
mlflow.log_metric("precision", test_data["precision"]) mlflow.log_metric("precision", evaluation_data["precision"])
mlflow.log_metric("recall", test_data["recall"]) mlflow.log_metric("recall", evaluation_data["recall"])
mlflow.log_metric("f1_score", test_data["f1_score"]) mlflow.log_metric("f1_score", evaluation_data["f1_score"])
mlflow.log_metric( mlflow.log_metric(
"average_precision_score", test_data["average_precision_score"] "average_precision_score", evaluation_data["average_precision_score"]
) )
mlflow.log_metric("specificity", test_data["specificity"]) mlflow.log_metric("specificity", evaluation_data["specificity"])
mlflow.log_metric("auc_score", test_data["auc_score"]) mlflow.log_metric("auc_score", evaluation_data["auc_score"])
mlflow.log_metric("accuracy", test_data["accuracy"]) mlflow.log_metric("accuracy", evaluation_data["accuracy"])
mlflow.log_param("version", meta_data["package-version"]) mlflow.log_param("version", train_data["package-version"])
# upload artifacts # upload artifacts
logger.info("Uploading artifacts to MLFlow server...") logger.info("Uploading artifacts to MLFlow server...")
mlflow.log_artifact(train_meta_file) for f in train_files:
mlflow.log_artifact(train_log_file) mlflow.log_artifact(f)
mlflow.log_artifact(train_model_temp_file) for f in evaluation_files:
mlflow.log_artifact(evaluation_file) mlflow.log_artifact(f)
mlflow.log_artifact(evaluation_meta_file)
mlflow.log_artifact(evaluation_log_file)
# delete temporary file as no need it after logging. # delete temporary file as no need it after logging.
train_model_temp_file.unlink() train_model_temp_file.unlink()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment