diff --git a/src/mednet/libs/classification/engine/evaluator.py b/src/mednet/libs/classification/engine/evaluator.py
index acbf79b3fcdd9db808a6ee28e6ded77c42d887fb..b79f2b14d73aa826d24713613a9feca7b059fd3f 100644
--- a/src/mednet/libs/classification/engine/evaluator.py
+++ b/src/mednet/libs/classification/engine/evaluator.py
@@ -5,7 +5,6 @@
 
 import contextlib
 import itertools
-import json
 import logging
 import typing
 from collections.abc import Iterable, Iterator
@@ -609,31 +608,3 @@ def aggregate_pr(
             )
 
     return fig
-
-
-class NumpyJSONEncoder(json.JSONEncoder):
-    """Extends the standard JSON encoder to support Numpy arrays."""
-
-    def default(self, o: typing.Any) -> typing.Any:
-        """If input object is a ndarray it will be converted into a list.
-
-        Parameters
-        ----------
-        o
-            Input object to be JSON serialized.
-
-        Returns
-        -------
-            A serializable representation of object ``o``.
-        """
-
-        if isinstance(o, numpy.ndarray):
-            try:
-                retval = o.tolist()
-            except TypeError:
-                pass
-            else:
-                return retval
-
-        # Let the base class default method raise the TypeError
-        return super().default(o)
diff --git a/src/mednet/libs/classification/scripts/evaluate.py b/src/mednet/libs/classification/scripts/evaluate.py
index 4e7932e81ad79a4a77ce6862e3dee739a2b03d65..418600ee95954ddb8bd6d57b0013b4b0411021ba 100644
--- a/src/mednet/libs/classification/scripts/evaluate.py
+++ b/src/mednet/libs/classification/scripts/evaluate.py
@@ -116,12 +116,11 @@ def evaluate(
 
     from matplotlib.backends.backend_pdf import PdfPages
     from mednet.libs.common.scripts.utils import (
-        execution_metadata,
+        save_json_metadata,
         save_json_with_backup,
     )
 
     from ..engine.evaluator import (
-        NumpyJSONEncoder,
         aggregate_pr,
         aggregate_roc,
         run_binary,
@@ -129,26 +128,18 @@ def evaluate(
         tabulate_results,
     )
 
-    evaluation_filename = "evaluation.json"
-    evaluation_file = pathlib.Path(output_folder) / evaluation_filename
-
     with predictions.open("r") as f:
         predict_data = json.load(f)
 
     # register metadata
-    json_data: dict[str, typing.Any] = execution_metadata()
-    json_data.update(
-        dict(
-            predictions=str(predictions),
-            output_folder=str(output_folder),
-            threshold=threshold,
-            binning=binning,
-            plot=plot,
-        ),
+    save_json_metadata(
+        output_file=output_folder / "evaluation.meta.json",
+        predictions=str(predictions),
+        output_folder=str(output_folder),
+        threshold=threshold,
+        binning=binning,
+        plot=plot,
     )
-    json_data = {k.replace("_", "-"): v for k, v in json_data.items()}
-    evaluation_meta = evaluation_file.with_suffix(".meta.json")
-    save_json_with_backup(evaluation_meta, json_data)
 
     if threshold in predict_data:
         # it is the name of a split
@@ -180,9 +171,9 @@ def evaluate(
         )
 
     # records full result analysis to a JSON file
+    evaluation_file = output_folder / "evaluation.json"
     logger.info(f"Saving evaluation results at `{str(evaluation_file)}`...")
-    with evaluation_file.open("w") as f:
-        json.dump(results, f, indent=2, cls=NumpyJSONEncoder)
+    save_json_with_backup(evaluation_file, results)
 
     # dump evaluation results in RST format to screen and file
     table_data = {}
diff --git a/src/mednet/libs/classification/scripts/experiment.py b/src/mednet/libs/classification/scripts/experiment.py
index 690bc74f5f4a561e35daaa2ba7ec2a8b9a2cd4cf..5ce51f314688f23f08ac17342d1ae6f094fab9a2 100644
--- a/src/mednet/libs/classification/scripts/experiment.py
+++ b/src/mednet/libs/classification/scripts/experiment.py
@@ -59,8 +59,7 @@ def experiment(
 
     .. code::
 
-       └─ <output-folder>/
-          ├── model/  # the generated model will be here
+       └─ <output-folder>/  # the generated model will be here
           ├── predictions.json  # the prediction outputs
           ├── evaluation.json  # the evaluation outputs
     """
@@ -72,11 +71,10 @@ def experiment(
 
     from .train import train
 
-    train_output_folder = output_folder / "model"
     ctx.invoke(
         train,
         model=model,
-        output_folder=train_output_folder,
+        output_folder=output_folder,
         epochs=epochs,
         batch_size=batch_size,
         accumulate_grad_batches=accumulate_grad_batches,
@@ -98,11 +96,11 @@ def experiment(
     logger.info("Started train analysis")
     from mednet.libs.common.scripts.train_analysis import train_analysis
 
-    logdir = train_output_folder / "logs"
+    logdir = output_folder / "logs"
     ctx.invoke(
         train_analysis,
         logdir=logdir,
-        output_folder=train_output_folder,
+        output_folder=output_folder,
     )
 
     logger.info("Ended train analysis")
@@ -118,7 +116,7 @@ def experiment(
         model=model,
         datamodule=datamodule,
         device=device,
-        weight=train_output_folder,
+        weight=output_folder,
         batch_size=batch_size,
         parallel=parallel,
     )
diff --git a/src/mednet/libs/classification/scripts/predict.py b/src/mednet/libs/classification/scripts/predict.py
index 0a0f4451ccd51b0bcf05d9489b0475e08b2378fd..9d2e510364effaaed06ebe5d46ca7b426912f599 100644
--- a/src/mednet/libs/classification/scripts/predict.py
+++ b/src/mednet/libs/classification/scripts/predict.py
@@ -45,16 +45,16 @@ def predict(
 ) -> None:  # numpydoc ignore=PR01
     """Run inference (generates scores) on all input images, using a pre-trained model."""
 
-    import json
-
     from mednet.libs.classification.engine.predictor import run
     from mednet.libs.common.engine.device import DeviceManager
     from mednet.libs.common.scripts.predict import (
         load_checkpoint,
-        save_json_data,
         setup_datamodule,
     )
-    from mednet.libs.common.scripts.utils import save_json_with_backup
+    from mednet.libs.common.scripts.utils import (
+        save_json_metadata,
+        save_json_with_backup,
+    )
 
     predictions_meta_file = output_folder / "predictions.meta.json"
     predictions_meta_file.parent.mkdir(parents=True, exist_ok=True)
@@ -62,13 +62,19 @@ def predict(
     setup_datamodule(datamodule, model, batch_size, parallel)
     model = load_checkpoint(model, weight)
     device_manager = DeviceManager(device)
-    save_json_data(datamodule, model, device_manager, predictions_meta_file)
+    save_json_metadata(
+        output_file=output_folder / "predictions.meta.json",
+        output_folder=output_folder,
+        model=model,
+        datamodule=datamodule,
+        batch_size=batch_size,
+        device=device,
+        weight=device,
+        parallel=parallel,
+    )
 
     predictions = run(model, datamodule, device_manager)
 
     predictions_file = output_folder / "predictions.json"
     save_json_with_backup(predictions_file, predictions)
-
-    with predictions_file.open("w") as f:
-        json.dump(predictions, f, indent=2)
     logger.info(f"Predictions saved to `{str(predictions_file)}`")
diff --git a/src/mednet/libs/classification/scripts/saliency/completeness.py b/src/mednet/libs/classification/scripts/saliency/completeness.py
index 3ee81288101676fff4714adcfb08d1f41c20c378..7dcef916d77339b327474d5b34037182305c3b75 100644
--- a/src/mednet/libs/classification/scripts/saliency/completeness.py
+++ b/src/mednet/libs/classification/scripts/saliency/completeness.py
@@ -52,7 +52,7 @@ logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s")
 @click.option(
     "--output-json",
     "-o",
-    help="""Directory in which to store the output .json file containing all
+    help="""File name in which to store the output .json file containing all
     measures.""",
     required=True,
     type=click.Path(
@@ -60,7 +60,7 @@ logger = setup(__name__.split(".")[0], format="%(levelname)s: %(message)s")
         dir_okay=False,
         path_type=pathlib.Path,
     ),
-    default="saliency-interpretability.json",
+    default="saliency-completeness.json",
     cls=ResourceOption,
 )
 @click.option(
@@ -201,10 +201,12 @@ def completeness(
        This application is relatively slow when processing a large DataModule
        with many (positive) samples.
     """
-    import json
-
     from mednet.libs.common.engine.device import DeviceManager
     from mednet.libs.common.scripts.predict import setup_datamodule
+    from mednet.libs.common.scripts.utils import (
+        save_json_metadata,
+        save_json_with_backup,
+    )
     from mednet.libs.common.utils.checkpointer import (
         get_checkpoint_to_run_inference,
     )
@@ -231,6 +233,22 @@ def completeness(
     logger.info(f"Loading checkpoint from `{weight}`...")
     model = type(model).load_from_checkpoint(weight, strict=False)
 
+    # stores all information we can think of, to reproduce this later
+    save_json_metadata(
+        output_file=output_json.with_suffix(".meta.json"),
+        model=model,
+        datamodule=datamodule,
+        output_json=output_json,
+        device=device,
+        cache_samples=cache_samples,
+        weight=weight,
+        parallel=parallel,
+        saliency_map_algorithm=saliency_map_algorithm,
+        target_class=target_class,
+        positive_only=positive_only,
+        percentile=percentile,
+    )
+
     logger.info(
         f"Evaluating RemOve And Debias (ROAD) average scores for "
         f"algorithm `{saliency_map_algorithm}` with percentiles "
@@ -247,7 +265,5 @@ def completeness(
         parallel=parallel,
     )
 
-    output_json.parent.mkdir(parents=True, exist_ok=True)
-    with output_json.open("w") as f:
-        logger.info(f"Saving output file to `{str(output_json)}`...")
-        json.dump(results, f, indent=2)
+    logger.info(f"Saving output file to `{str(output_json)}`...")
+    save_json_with_backup(output_json, results)
diff --git a/src/mednet/libs/classification/scripts/saliency/generate.py b/src/mednet/libs/classification/scripts/saliency/generate.py
index 2e4c75693fd7d224e0d3a42c3c55b9e025d01295..d80bbefb9803659286fd014f678b9ae882eac349 100644
--- a/src/mednet/libs/classification/scripts/saliency/generate.py
+++ b/src/mednet/libs/classification/scripts/saliency/generate.py
@@ -171,36 +171,26 @@ def generate(
 
     from mednet.libs.common.engine.device import DeviceManager
     from mednet.libs.common.scripts.predict import setup_datamodule
-    from mednet.libs.common.scripts.utils import (
-        execution_metadata,
-        save_json_with_backup,
-    )
+    from mednet.libs.common.scripts.utils import save_json_metadata
     from mednet.libs.common.utils.checkpointer import (
         get_checkpoint_to_run_inference,
     )
 
     from ...engine.saliency.generator import run
 
-    # register metadata
-    json_data: dict[str, typing.Any] = execution_metadata()
-    json_data.update(
-        dict(
-            database_name=datamodule.database_name,
-            database_split=datamodule.split_name,
-            model_name=model.name,
-            output_folder=str(output_folder),
-            device=device,
-            cache_samples=cache_samples,
-            weight=str(weight),
-            parallel=parallel,
-            saliency_map_algorithm=saliency_map_algorithm,
-            target_class=target_class,
-            positive_only=positive_only,
-        ),
+    save_json_metadata(
+        output_file=output_folder / "saliency-generation.meta.json",
+        datamodule=datamodule,
+        model=model,
+        output_folder=output_folder,
+        device=device,
+        cache_samples=cache_samples,
+        weight=weight,
+        parallel=parallel,
+        saliency_map_algorithm=saliency_map_algorithm,
+        target_class=target_class,
+        positive_only=positive_only,
     )
-    json_data = {k.replace("_", "-"): v for k, v in json_data.items()}
-    saliency_meta = output_folder / "saliency-generation.meta.json"
-    save_json_with_backup(saliency_meta, json_data)
 
     logger.info(f"Output folder: {output_folder}")
     output_folder.mkdir(parents=True, exist_ok=True)
diff --git a/src/mednet/libs/classification/scripts/saliency/interpretability.py b/src/mednet/libs/classification/scripts/saliency/interpretability.py
index 0d0f556b49e6b6dd8f0a4c9d68adbfa9d5003192..d3b57f28bf820df3158d623dae32db6cd62d06b9 100644
--- a/src/mednet/libs/classification/scripts/saliency/interpretability.py
+++ b/src/mednet/libs/classification/scripts/saliency/interpretability.py
@@ -119,7 +119,11 @@ def interpretability(
       proportional energy measure in the sense that it does not need explicit
       thresholding.
     """
-    import json
+
+    from mednet.libs.common.scripts.utils import (
+        save_json_metadata,
+        save_json_with_backup,
+    )
 
     from ...engine.saliency.interpretability import run
 
@@ -128,8 +132,17 @@ def interpretability(
     datamodule.prepare_data()
     datamodule.setup(stage="predict")
 
+    # stores all information we can think of, to reproduce this later
+    save_json_metadata(
+        output_file=output_json.with_suffix(".meta.json"),
+        model=model,
+        datamodule=datamodule,
+        output_json=output_json,
+        input_folder=input_folder,
+        target_label=target_label,
+    )
+
     results = run(input_folder, target_label, datamodule)
 
-    with output_json.open("w") as f:
-        logger.info(f"Saving output file to `{str(output_json)}`...")
-        json.dump(results, f, indent=2)
+    logger.info(f"Saving output file to `{str(output_json)}`...")
+    save_json_with_backup(output_json, results)
diff --git a/src/mednet/libs/classification/scripts/saliency/view.py b/src/mednet/libs/classification/scripts/saliency/view.py
index aed9a81d03b7a2cf7ad5832f2fbf8b2b1c017d59..226af15027e02975dc231fa00530ebafcd0c018c 100644
--- a/src/mednet/libs/classification/scripts/saliency/view.py
+++ b/src/mednet/libs/classification/scripts/saliency/view.py
@@ -3,7 +3,6 @@
 # SPDX-License-Identifier: GPL-3.0-or-later
 
 import pathlib
-import typing
 
 import click
 from clapper.click import ConfigCommand, ResourceOption, verbosity_option
@@ -98,10 +97,7 @@ def view(
     **_,
 ) -> None:  # numpydoc ignore=PR01
     """Generate heatmaps for input CXRs based on existing saliency maps."""
-    from mednet.libs.common.scripts.utils import (
-        execution_metadata,
-        save_json_with_backup,
-    )
+    from mednet.libs.common.scripts.utils import save_json_metadata
 
     from ...engine.saliency.viewer import run
 
@@ -109,21 +105,15 @@ def view(
     output_folder.mkdir(parents=True, exist_ok=True)
 
     # register metadata
-    json_data: dict[str, typing.Any] = execution_metadata()
-    json_data.update(
-        dict(
-            database_name=datamodule.database_name,
-            database_split=datamodule.split_name,
-            model_name=model.name,
-            input_folder=str(input_folder),
-            output_folder=str(output_folder),
-            show_groundtruth=show_groundtruth,
-            threshold=threshold,
-        ),
+    save_json_metadata(
+        output_file=output_folder / "saliency-view.meta.json",
+        datamodule=datamodule,
+        model=model,
+        input_folder=input_folder,
+        output_folder=output_folder,
+        show_groundtruth=show_groundtruth,
+        threshold=threshold,
     )
-    json_data = {k.replace("_", "-"): v for k, v in json_data.items()}
-    saliency_meta = output_folder / "saliency-view.meta.json"
-    save_json_with_backup(saliency_meta, json_data)
 
     datamodule.drop_incomplete_batch = False
     # datamodule.cache_samples = cache_samples
diff --git a/src/mednet/libs/classification/scripts/train.py b/src/mednet/libs/classification/scripts/train.py
index 2fc53078e32af07e20d03301c6351ad43aac5731..87d0c23c7a02c2a983e0cd81780002cc25e4d7b5 100644
--- a/src/mednet/libs/classification/scripts/train.py
+++ b/src/mednet/libs/classification/scripts/train.py
@@ -64,9 +64,9 @@ def train(
     from mednet.libs.common.scripts.train import (
         get_checkpoint_file,
         load_checkpoint,
-        save_json_data,
         setup_datamodule,
     )
+    from mednet.libs.common.scripts.utils import save_json_metadata
 
     seed_everything(seed)
 
@@ -96,26 +96,27 @@ def train(
     checkpoint_file = get_checkpoint_file(output_folder)
     load_checkpoint(checkpoint_file, datamodule, model)
 
-    logger.info(f"Training for at most {epochs} epochs.")
-
     # stores all information we can think of, to reproduce this later
-    save_json_data(
-        datamodule,
-        model,
-        output_folder,
-        device_manager,
-        epochs,
-        batch_size,
-        accumulate_grad_batches,
-        drop_incomplete_batch,
-        validation_period,
-        cache_samples,
-        seed,
-        parallel,
-        monitoring_interval,
+    save_json_metadata(
+        output_file=output_folder / "train.meta.json",
+        datamodule=datamodule,
+        model=model,
+        device_manager=device_manager,
+        output_folder=output_folder,
+        epochs=epochs,
+        batch_size=batch_size,
+        accumulate_grad_batches=accumulate_grad_batches,
+        drop_incomplete_batch=drop_incomplete_batch,
+        validation_period=validation_period,
+        cache_samples=cache_samples,
+        seed=seed,
+        parallel=parallel,
+        monitoring_interval=monitoring_interval,
         balance_classes=balance_classes,
     )
 
+    logger.info(f"Training for at most {epochs} epochs.")
+
     run(
         model=model,
         datamodule=datamodule,
diff --git a/src/mednet/libs/classification/scripts/upload.py b/src/mednet/libs/classification/scripts/upload.py
index f4b0114ceced942f0d9eee4ef26464ff46118b10..12ff291d8f12853b7c83fbd05f5f95de5544af5c 100644
--- a/src/mednet/libs/classification/scripts/upload.py
+++ b/src/mednet/libs/classification/scripts/upload.py
@@ -81,13 +81,14 @@ def upload(
     # get train files
     train_folder = experiment_folder / "model"
     train_log_file = train_folder / "trainlog.pdf"
-    train_meta_file = train_folder / "meta.json"
+    train_meta_file = train_folder / "train.meta.json"
     train_model_file = get_checkpoint_to_run_inference(train_folder)
     train_files = [train_meta_file, train_model_file, train_log_file]
 
     # get evaluation files
     evaluation_file = experiment_folder / "evaluation.json"
     evaluation_meta_file = experiment_folder / "evaluation.meta.json"
+    evaluation_meta_file = experiment_folder / "evaluation.rst"
     evaluation_log_file = experiment_folder / "evaluation.pdf"
     evaluation_files = [
         evaluation_file,
diff --git a/src/mednet/libs/common/scripts/predict.py b/src/mednet/libs/common/scripts/predict.py
index 422b9e56585649a124e1d9c01428ed07178b23b7..ac0e2e0968822734e862b1a22e2a7ce6671c209b 100644
--- a/src/mednet/libs/common/scripts/predict.py
+++ b/src/mednet/libs/common/scripts/predict.py
@@ -163,32 +163,3 @@ def load_checkpoint(
 
     logger.info(f"Loading checkpoint from `{weight}`...")
     return type(model).load_from_checkpoint(weight, strict=False)
-
-
-def save_json_data(
-    datamodule: mednet.libs.common.data.datamodule.ConcatDataModule,
-    model: mednet.libs.common.models.model.Model,
-    device_manager,
-    output_file: pathlib.Path,
-) -> None:  # numpydoc ignore=PR01
-    """Save prediction hyperparameters into a .json file."""
-
-    from .utils import (
-        device_properties,
-        execution_metadata,
-        model_summary,
-        save_json_with_backup,
-    )
-
-    json_data: dict[str, typing.Any] = execution_metadata()
-    json_data.update(device_properties(device_manager.device_type))
-    json_data.update(
-        dict(
-            database_name=datamodule.database_name,
-            database_split=datamodule.split_name,
-            model_name=model.name,
-        ),
-    )
-    json_data.update(model_summary(model))
-    json_data = {k.replace("_", "-"): v for k, v in json_data.items()}
-    save_json_with_backup(output_file, json_data)
diff --git a/src/mednet/libs/common/scripts/train.py b/src/mednet/libs/common/scripts/train.py
index 10da6c1acaf856b629671228f632f5ae97440a36..52e780505e7087069e1ad80f12052accd63146db 100644
--- a/src/mednet/libs/common/scripts/train.py
+++ b/src/mednet/libs/common/scripts/train.py
@@ -4,7 +4,6 @@
 
 import functools
 import pathlib
-import typing
 
 import click
 from clapper.click import ResourceOption
@@ -299,51 +298,3 @@ def setup_datamodule(
 
     datamodule.prepare_data()
     datamodule.setup(stage="fit")
-
-
-def save_json_data(
-    datamodule,
-    model,
-    output_folder,
-    device_manager,
-    epochs,
-    batch_size,
-    accumulate_grad_batches,
-    drop_incomplete_batch,
-    validation_period,
-    cache_samples,
-    seed,
-    parallel,
-    monitoring_interval,
-    **kwargs,
-) -> None:  # numpydoc ignore=PR01
-    """Save training hyperparameters into a .json file."""
-    from .utils import (
-        device_properties,
-        execution_metadata,
-        model_summary,
-        save_json_with_backup,
-    )
-
-    json_data: dict[str, typing.Any] = execution_metadata()
-    json_data.update(device_properties(device_manager.device_type))
-    json_data.update(
-        dict(
-            database_name=datamodule.database_name,
-            split_name=datamodule.split_name,
-            epochs=epochs,
-            batch_size=batch_size,
-            accumulate_grad_batches=accumulate_grad_batches,
-            drop_incomplete_batch=drop_incomplete_batch,
-            validation_period=validation_period,
-            cache_samples=cache_samples,
-            seed=seed,
-            parallel=parallel,
-            monitoring_interval=monitoring_interval,
-            model_name=model.name,
-        ),
-    )
-    json_data.update(kwargs)
-    json_data.update(model_summary(model))
-    json_data = {k.replace("_", "-"): v for k, v in json_data.items()}
-    save_json_with_backup(output_folder / "meta.json", json_data)
diff --git a/src/mednet/libs/common/scripts/utils.py b/src/mednet/libs/common/scripts/utils.py
index 66381e96708b307af3deaebd8c23eb6ccf3d7db8..3986cf0542b7b94bf635fe6526c8205925f11dd2 100644
--- a/src/mednet/libs/common/scripts/utils.py
+++ b/src/mednet/libs/common/scripts/utils.py
@@ -12,6 +12,7 @@ import typing
 
 import lightning.pytorch
 import lightning.pytorch.callbacks
+import numpy
 import torch.nn
 from mednet.libs.common.engine.device import SupportedPytorchDevice
 
@@ -211,6 +212,34 @@ def execution_metadata() -> dict[str, int | float | str | dict[str, str] | list[
     }
 
 
+class NumpyJSONEncoder(json.JSONEncoder):
+    """Extends the standard JSON encoder to support Numpy arrays."""
+
+    def default(self, o: typing.Any) -> typing.Any:
+        """If input object is a ndarray it will be converted into a list.
+
+        Parameters
+        ----------
+        o
+            Input object to be JSON serialized.
+
+        Returns
+        -------
+            A serializable representation of object ``o``.
+        """
+
+        if isinstance(o, numpy.ndarray):
+            try:
+                retval = o.tolist()
+            except TypeError:
+                pass
+            else:
+                return retval
+
+        # Let the base class default method raise the TypeError
+        return super().default(o)
+
+
 def save_json_with_backup(path: pathlib.Path, data: JSONable) -> None:
     """Save a dictionary into a JSON file with path checking and backup.
 
@@ -227,12 +256,49 @@ def save_json_with_backup(path: pathlib.Path, data: JSONable) -> None:
         The data to save on the JSON file.
     """
 
-    logger.info(f"Writing run metadata at `{path}`...")
-
     path.parent.mkdir(parents=True, exist_ok=True)
     if path.exists():
         backup = path.parent / (path.name + "~")
         shutil.copy(path, backup)
 
     with path.open("w") as f:
-        json.dump(data, f, indent=2)
+        json.dump(data, f, indent=2, cls=NumpyJSONEncoder)
+
+
+def save_json_metadata(
+    output_file: pathlib.Path,
+    **kwargs: dict[str, typing.Any],
+) -> None:  # numpydoc ignore=PR01
+    """Save prediction hyperparameters into a .json file."""
+
+    from mednet.libs.common.data.datamodule import ConcatDataModule
+    from mednet.libs.common.engine.device import DeviceManager
+    from mednet.libs.common.models.model import Model
+
+    from .utils import (
+        device_properties,
+        execution_metadata,
+        model_summary,
+        save_json_with_backup,
+    )
+
+    json_data: dict[str, typing.Any] = execution_metadata()
+
+    for key, value in kwargs.items():
+        match value:
+            case ConcatDataModule():
+                json_data["database_name"] = value.database_name
+                json_data["database_split"] = value.split_name
+            case Model():
+                json_data["model_name"] = value.name
+                json_data.update(model_summary(value))
+            case pathlib.Path():
+                json_data[key] = str(value)
+            case DeviceManager():
+                json_data.update(device_properties(value.device_type))
+            case _:
+                json_data[key] = value
+
+    json_data = {k.replace("_", "-"): v for k, v in json_data.items()}
+    logger.info(f"Writing run metadata at `{output_file}`...")
+    save_json_with_backup(output_file, json_data)
diff --git a/src/mednet/libs/segmentation/scripts/dump_annotations.py b/src/mednet/libs/segmentation/scripts/dump_annotations.py
index d199ccdc7540d0d3112a0b9cd41a372d687dfdf8..5ee47567ab38e79ce386fd0b3ed18c84aeae9a54 100644
--- a/src/mednet/libs/segmentation/scripts/dump_annotations.py
+++ b/src/mednet/libs/segmentation/scripts/dump_annotations.py
@@ -70,11 +70,22 @@ def dump_annotations(
     from mednet.libs.common.scripts.predict import (
         setup_datamodule,
     )
-    from mednet.libs.common.scripts.utils import save_json_with_backup
+    from mednet.libs.common.scripts.utils import (
+        save_json_metadata,
+        save_json_with_backup,
+    )
     from mednet.libs.segmentation.engine.dumper import run
 
     setup_datamodule(datamodule, model, batch_size=1, parallel=-1)
 
+    # stores all information we can think of, to reproduce this later
+    save_json_metadata(
+        output_file=output_folder / "annotations.meta.json",
+        output_folder=output_folder,
+        model=model,
+        datamodule=datamodule,
+    )
+
     json_data = run(datamodule, output_folder)
 
     base_file = output_folder / "annotations.json"
diff --git a/src/mednet/libs/segmentation/scripts/evaluate.py b/src/mednet/libs/segmentation/scripts/evaluate.py
index dc579d225c17a4ee846ddb34b66f75e3dd33aafd..b6ea220dd3bb232f08889a70a017ff40d5fe1434 100644
--- a/src/mednet/libs/segmentation/scripts/evaluate.py
+++ b/src/mednet/libs/segmentation/scripts/evaluate.py
@@ -176,9 +176,8 @@ def evaluate(
     import matplotlib.backends.backend_pdf
     import numpy
     import tabulate
-    from mednet.libs.classification.engine.evaluator import NumpyJSONEncoder
     from mednet.libs.common.scripts.utils import (
-        execution_metadata,
+        save_json_metadata,
         save_json_with_backup,
     )
     from mednet.libs.segmentation.engine.evaluator import (
@@ -191,29 +190,20 @@ def evaluate(
         specificity,
     )
 
-    evaluation_filename = "evaluation.json"
-    evaluation_file = pathlib.Path(output_folder) / evaluation_filename
-
     with predictions.open("r") as f:
         predict_data = json.load(f)
 
     # register metadata
-    json_data: dict[str, typing.Any] = execution_metadata()
-    json_data.update(
-        dict(
-            predictions=str(predictions),
-            output_folder=str(output_folder),
-            threshold=threshold,
-            metric=metric,
-            steps=steps,
-            compare_annotator=str(compare_annotator),
-            plot=plot,
-        ),
+    save_json_metadata(
+        output_file=output_folder / "evaluation.meta.json",
+        predictions=str(predictions),
+        output_folder=str(output_folder),
+        threshold=threshold,
+        metric=metric,
+        steps=steps,
+        compare_annotator=str(compare_annotator),
+        plot=plot,
     )
-    json_data = {k.replace("_", "-"): v for k, v in json_data.items()}
-    evaluation_meta = evaluation_file.with_suffix(".meta.json")
-    logger.info(f"Saving evaluation metadata at `{str(evaluation_meta)}`...")
-    save_json_with_backup(evaluation_meta, json_data)
 
     threshold = validate_threshold(threshold, predict_data)
     threshold_list = numpy.arange(
@@ -290,8 +280,7 @@ def evaluate(
     # records full result analysis to a JSON file
     evaluation_file = output_folder / "evaluation.json"
     logger.info(f"Saving evaluation results at `{str(evaluation_file)}`...")
-    with evaluation_file.open("w") as f:
-        json.dump(eval_json_data, f, indent=2, cls=NumpyJSONEncoder)
+    save_json_with_backup(evaluation_file, eval_json_data)
 
     table_format = "rst"
     table = tabulate.tabulate(
diff --git a/src/mednet/libs/segmentation/scripts/experiment.py b/src/mednet/libs/segmentation/scripts/experiment.py
index 523e09fb448da82d40cec6c60c60a7baeb244ab5..5960e1394e9fc819d9b44e90055d7d1dfd74f0d5 100644
--- a/src/mednet/libs/segmentation/scripts/experiment.py
+++ b/src/mednet/libs/segmentation/scripts/experiment.py
@@ -58,8 +58,7 @@ def experiment(
     running prediction, and evaluating.  It organises the output in a preset way::
 
         \b
-       └─ <output-folder>/
-          ├── model/  # the generated model will be here
+       └─ <output-folder>/  # the generated model will be here
           ├── predictions.json  # the prediction outputs
           └── evaluation.json  # the evaluation outputs
     """
@@ -71,11 +70,10 @@ def experiment(
 
     from .train import train
 
-    train_output_folder = output_folder / "model"
     ctx.invoke(
         train,
         model=model,
-        output_folder=train_output_folder,
+        output_folder=output_folder,
         epochs=epochs,
         batch_size=batch_size,
         accumulate_grad_batches=accumulate_grad_batches,
@@ -96,11 +94,11 @@ def experiment(
     logger.info("Started train analysis")
     from mednet.libs.common.scripts.train_analysis import train_analysis
 
-    logdir = train_output_folder / "logs"
+    logdir = output_folder / "logs"
     ctx.invoke(
         train_analysis,
         logdir=logdir,
-        output_folder=train_output_folder,
+        output_folder=output_folder,
     )
 
     logger.info("Ended train analysis")
@@ -116,7 +114,7 @@ def experiment(
         model=model,
         datamodule=datamodule,
         device=device,
-        weight=train_output_folder,
+        weight=output_folder,
         batch_size=batch_size,
         parallel=parallel,
     )
diff --git a/src/mednet/libs/segmentation/scripts/predict.py b/src/mednet/libs/segmentation/scripts/predict.py
index b15d7dafc74becbe38ff8a0b098d6858fdc62846..b82d70bb5ddabc9a6d67470ebf938bc9b6e803ca 100644
--- a/src/mednet/libs/segmentation/scripts/predict.py
+++ b/src/mednet/libs/segmentation/scripts/predict.py
@@ -51,21 +51,31 @@ def predict(
     from mednet.libs.common.engine.device import DeviceManager
     from mednet.libs.common.scripts.predict import (
         load_checkpoint,
-        save_json_data,
         setup_datamodule,
     )
-    from mednet.libs.common.scripts.utils import save_json_with_backup
+    from mednet.libs.common.scripts.utils import (
+        save_json_metadata,
+        save_json_with_backup,
+    )
     from mednet.libs.segmentation.engine.predictor import run
 
-    predictions_meta_file = output_folder / "predictions.meta.json"
-
     setup_datamodule(datamodule, model, batch_size, parallel)
     model = load_checkpoint(model, weight)
     device_manager = DeviceManager(device)
-    save_json_data(datamodule, model, device_manager, predictions_meta_file)
 
-    json_predictions = run(model, datamodule, device_manager, output_folder)
+    save_json_metadata(
+        output_file=output_folder / "predictions.meta.json",
+        output_folder=output_folder,
+        model=model,
+        datamodule=datamodule,
+        batch_size=batch_size,
+        device=device,
+        weight=weight,
+        parallel=parallel,
+    )
+
+    predictions = run(model, datamodule, device_manager, output_folder)
 
     predictions_file = output_folder / "predictions.json"
-    save_json_with_backup(predictions_file, json_predictions)
+    save_json_with_backup(predictions_file, predictions)
     logger.info(f"Predictions saved to `{str(predictions_file)}`")
diff --git a/src/mednet/libs/segmentation/scripts/train.py b/src/mednet/libs/segmentation/scripts/train.py
index 308d2e300c608b0d11c9b60ca36244a4bad02e2c..1bf5ec00c4712c9c6812638b035e389bd2a8323c 100644
--- a/src/mednet/libs/segmentation/scripts/train.py
+++ b/src/mednet/libs/segmentation/scripts/train.py
@@ -51,9 +51,9 @@ def train(
     from mednet.libs.common.scripts.train import (
         get_checkpoint_file,
         load_checkpoint,
-        save_json_data,
         setup_datamodule,
     )
+    from mednet.libs.common.scripts.utils import save_json_metadata
 
     seed_everything(seed)
 
@@ -76,25 +76,26 @@ def train(
     checkpoint_file = get_checkpoint_file(output_folder)
     load_checkpoint(checkpoint_file, datamodule, model)
 
-    logger.info(f"Training for at most {epochs} epochs.")
-
     # stores all information we can think of, to reproduce this later
-    save_json_data(
-        datamodule,
-        model,
-        output_folder,
-        device_manager,
-        epochs,
-        batch_size,
-        accumulate_grad_batches,
-        drop_incomplete_batch,
-        validation_period,
-        cache_samples,
-        seed,
-        parallel,
-        monitoring_interval,
+    save_json_metadata(
+        output_file=output_folder / "train.meta.json",
+        datamodule=datamodule,
+        model=model,
+        output_folder=output_folder,
+        device_manager=device_manager,
+        epochs=epochs,
+        batch_size=batch_size,
+        accumulate_grad_batches=accumulate_grad_batches,
+        drop_incomplete_batch=drop_incomplete_batch,
+        validation_period=validation_period,
+        cache_samples=cache_samples,
+        seed=seed,
+        parallel=parallel,
+        monitoring_interval=monitoring_interval,
     )
 
+    logger.info(f"Training for at most {epochs} epochs.")
+
     run(
         model=model,
         datamodule=datamodule,
diff --git a/tests/classification/test_cli.py b/tests/classification/test_cli.py
index 54d09503fac4653265aae61d5a57d93357c48ebd..391fa818db6233f419c2d1cdcf29d87fd5f6040d 100644
--- a/tests/classification/test_cli.py
+++ b/tests/classification/test_cli.py
@@ -216,7 +216,7 @@ def test_train_pasa_montgomery(session_tmp_path):
         assert best.name.endswith("epoch=0" + CHECKPOINT_EXTENSION)
 
         assert len(list((output_folder / "logs").glob("events.out.tfevents.*"))) == 1
-        assert (output_folder / "meta.json").exists()
+        assert (output_folder / "train.meta.json").exists()
 
         keywords = {
             r"^Writing run metadata at .*$": 1,
@@ -225,7 +225,6 @@ def test_train_pasa_montgomery(session_tmp_path):
             r"^Applying train/valid loss balancing...$": 1,
             r"^Training for at most 1 epochs.$": 1,
             r"^Uninitialised pasa model - computing z-norm factors from train dataloader.$": 1,
-            r"^Writing run metadata at.*$": 1,
             r"^Dataset `train` is already setup. Not re-instantiating it.$": 1,
             r"^Dataset `validation` is already setup. Not re-instantiating it.$": 1,
         }
@@ -462,7 +461,7 @@ def test_train_pasa_montgomery_from_checkpoint(tmp_path):
     best = _get_checkpoint_from_alias(tmp_path, "best")
     assert best.name.endswith("epoch=0" + CHECKPOINT_EXTENSION)
 
-    assert (tmp_path / "meta.json").exists()
+    assert (tmp_path / "train.meta.json").exists()
     assert len(list((tmp_path / "logs").glob("events.out.tfevents.*"))) == 1
 
     with stdout_logging() as buf:
@@ -484,7 +483,7 @@ def test_train_pasa_montgomery_from_checkpoint(tmp_path):
         assert last.name.endswith("epoch=1" + CHECKPOINT_EXTENSION)
         best = _get_checkpoint_from_alias(tmp_path, "best")
 
-        assert (tmp_path / "meta.json").exists()
+        assert (tmp_path / "train.meta.json").exists()
         assert len(list((tmp_path / "logs").glob("events.out.tfevents.*"))) == 2
 
         keywords = {
@@ -529,33 +528,17 @@ def test_experiment(tmp_path):
     )
     _assert_exit_0(result)
 
-    assert (tmp_path / "model" / "meta.json").exists()
-    assert (tmp_path / "model" / f"model-at-epoch={num_epochs-1}.ckpt").exists()
+    assert (tmp_path / "train.meta.json").exists()
+    assert (tmp_path / f"model-at-epoch={num_epochs-1}.ckpt").exists()
     assert (tmp_path / "predictions.json").exists()
     assert (tmp_path / "predictions.meta.json").exists()
 
     # Need to glob because we cannot be sure of the checkpoint with lowest validation loss
     assert (
-        len(
-            list(
-                (tmp_path / "model").glob(
-                    "model-at-lowest-validation-loss-epoch=*.ckpt"
-                )
-            )
-        )
-        == 1
-    )
-    assert (tmp_path / "model" / "trainlog.pdf").exists()
-    assert (
-        len(
-            list(
-                (tmp_path / "model" / "logs").glob(
-                    "events.out.tfevents.*",
-                ),
-            ),
-        )
-        == 1
+        len(list((tmp_path).glob("model-at-lowest-validation-loss-epoch=*.ckpt"))) == 1
     )
+    assert (tmp_path / "trainlog.pdf").exists()
+    assert len(list((tmp_path / "logs").glob("events.out.tfevents.*"))) == 1
     assert (tmp_path / "evaluation.json").exists()
     assert (tmp_path / "evaluation.meta.json").exists()
     assert (tmp_path / "evaluation.rst").exists()
diff --git a/tests/segmentation/test_cli.py b/tests/segmentation/test_cli.py
index 5f96456b767a1c6935aae4f462519b4ce3c8f96f..a66daa24b5ae15b2f55d3a315c80f190dc816249 100644
--- a/tests/segmentation/test_cli.py
+++ b/tests/segmentation/test_cli.py
@@ -184,13 +184,13 @@ def test_train_lwnet_drive(session_tmp_path):
         assert best.name.endswith("epoch=0" + CHECKPOINT_EXTENSION)
 
         assert len(list((output_folder / "logs").glob("events.out.tfevents.*"))) == 1
-        assert (output_folder / "meta.json").exists()
+        assert (output_folder / "train.meta.json").exists()
 
         keywords = {
+            r"^Writing run metadata at .*$": 1,
             r"^Loading dataset:`train` without caching. Trade-off: CPU RAM usage: less | Disk I/O: more.$": 1,
             r"^Training for at most 1 epochs.$": 1,
             r"^Uninitialised lwnet model - computing z-norm factors from train dataloader.$": 1,
-            r"^Writing run metadata at.*$": 1,
             r"^Dataset `train` is already setup. Not re-instantiating it.$": 3,
         }
         buf.seek(0)
@@ -236,6 +236,7 @@ def test_predict_lwnet_drive(session_tmp_path):
         assert (output_folder / "predictions.json").exists()
 
         keywords = {
+            r"^Writing run metadata at .*$": 1,
             r"^Loading dataset: * without caching. Trade-off: CPU RAM usage: less | Disk I/O: more$": 2,
             r"^Loading checkpoint from .*$": 1,
             r"^Restoring normalizer from checkpoint.$": 1,
@@ -255,6 +256,48 @@ def test_predict_lwnet_drive(session_tmp_path):
             )
 
 
+@pytest.mark.slow
+@pytest.mark.skip_if_rc_var_not_set("datadir.drive")
+def test_dump_annotations_lwnet_drive(session_tmp_path):
+    from mednet.libs.segmentation.scripts.dump_annotations import dump_annotations
+
+    runner = CliRunner()
+
+    with stdout_logging() as buf:
+        output_folder = (
+            session_tmp_path / "segmentation-standalone" / "second-annotator"
+        )
+        result = runner.invoke(
+            dump_annotations,
+            [
+                "lwnet",
+                "drive-2nd",
+                "-vv",
+                f"--output-folder={str(output_folder)}",
+            ],
+        )
+        _assert_exit_0(result)
+
+        assert (output_folder / "annotations.meta.json").exists()
+        assert (output_folder / "annotations.json").exists()
+
+        keywords = {
+            r"^Writing run metadata at .*$": 1,
+            r"^Loading dataset:.*$": 1,
+            r"^Dumping annotations from split.*$": 1,
+        }
+
+        buf.seek(0)
+        logging_output = buf.read()
+
+        for k, v in keywords.items():
+            assert _str_counter(k, logging_output) == v, (
+                f"Count for string '{k}' appeared "
+                f"({_str_counter(k, logging_output)}) "
+                f"instead of the expected {v}:\nOutput:\n{logging_output}"
+            )
+
+
 @pytest.mark.slow
 @pytest.mark.skip_if_rc_var_not_set("datadir.drive")
 def test_evaluate_lwnet_drive(session_tmp_path):
@@ -281,7 +324,7 @@ def test_evaluate_lwnet_drive(session_tmp_path):
         assert (output_folder / "evaluation.pdf").exists()
 
         keywords = {
-            r"^Saving evaluation metadata at .*$": 1,
+            r"^Writing run metadata at .*$": 1,
             r"^Counting true/false positive/negatives at split.*$": 2,
             r"^Evaluating threshold on split .*$": 1,
             r"^Computing performance on split .*...$": 2,
@@ -330,7 +373,7 @@ def test_train_lwnet_drive_from_checkpoint(tmp_path):
     best = _get_checkpoint_from_alias(tmp_path, "best")
     assert best.name.endswith("epoch=0" + CHECKPOINT_EXTENSION)
 
-    assert (tmp_path / "meta.json").exists()
+    assert (tmp_path / "train.meta.json").exists()
     assert len(list((tmp_path / "logs").glob("events.out.tfevents.*"))) == 1
 
     with stdout_logging() as buf:
@@ -352,14 +395,14 @@ def test_train_lwnet_drive_from_checkpoint(tmp_path):
         assert last.name.endswith("epoch=1" + CHECKPOINT_EXTENSION)
         best = _get_checkpoint_from_alias(tmp_path, "best")
 
-        assert (tmp_path / "meta.json").exists()
+        assert (tmp_path / "train.meta.json").exists()
         assert len(list((tmp_path / "logs").glob("events.out.tfevents.*"))) == 2
 
         keywords = {
+            r"^Writing run metadata at .*$": 1,
             r"^Loading dataset:`train` without caching. Trade-off: CPU RAM usage: less | Disk I/O: more.$": 1,
             r"^Training for at most 2 epochs.$": 1,
             r"^Resuming from epoch 0 \(checkpoint file: .*$": 1,
-            r"^Writing run metadata at.*$": 1,
             r"^Dataset `train` is already setup. Not re-instantiating it.$": 3,
             r"^Restoring normalizer from checkpoint.$": 1,
         }
@@ -394,32 +437,13 @@ def test_experiment(tmp_path):
     )
     _assert_exit_0(result)
 
-    assert (tmp_path / "model" / "meta.json").exists()
-    assert (tmp_path / "model" / f"model-at-epoch={num_epochs-1}.ckpt").exists()
+    assert (tmp_path / "train.meta.json").exists()
+    assert (tmp_path / f"model-at-epoch={num_epochs-1}.ckpt").exists()
 
     # Need to glob because we cannot be sure of the checkpoint with lowest validation loss
-    assert (
-        len(
-            list(
-                (tmp_path / "model").glob(
-                    "model-at-lowest-validation-loss-epoch=*.ckpt",
-                ),
-            ),
-        )
-        == 1
-    )
-    assert (tmp_path / "model" / "trainlog.pdf").exists()
-    assert (
-        len(
-            list(
-                (tmp_path / "model" / "logs").glob(
-                    "events.out.tfevents.*",
-                ),
-            ),
-        )
-        == 1
-    )
-
+    assert len(list(tmp_path.glob("model-at-lowest-validation-loss-epoch=*.ckpt"))) == 1
+    assert (tmp_path / "trainlog.pdf").exists()
+    assert len(list((tmp_path / "logs").glob("events.out.tfevents.*"))) == 1
     assert (tmp_path / "predictions.json").exists()
     assert (tmp_path / "predictions.meta.json").exists()
     assert (tmp_path / "evaluation.json").exists()