Implemented the checkpoint_dir feature

c36b47dc · Tiago de Freitas Pereira · 1c3f542e · c36b47dc · c36b47dc · c36b47dc
Commit c36b47dc authored 3 years ago by Tiago de Freitas Pereira
--- a/bob/bio/base/pipelines/vanilla_biometrics/vanilla_biometrics.py
+++ b/bob/bio/base/pipelines/vanilla_biometrics/vanilla_biometrics.py
@@ -42,6 +42,7 @@ def execute_vanilla_biometrics(
    checkpoint,
    dask_partition_size,
    dask_n_workers,
+    checkpoint_dir=None,
    **kwargs,
 ):
    """
@@ -72,26 +73,39 @@ def execute_vanilla_biometrics(
        Groups of the dataset that will be requested from the database interface.

    output: str
-        Path where the results and checkpoints will be saved to.
+        Path where the scores will be saved.

    write_metadata_scores: bool
        Use the CSVScoreWriter instead of the FourColumnScoreWriter when True.

    checkpoint: bool
        Whether checkpoint files will be created for every step of the pipelines.
+
+    checkpoint_dir: str
+        If `checkpoint` is set, this path will be used to save the checkpoints.
+        If `None`, the content of `output` will be used.
    """
    if not os.path.exists(output):
        os.makedirs(output, exist_ok=True)

+    # Setting the `checkpoint_dir`
+    if checkpoint_dir is None:
+        checkpoint_dir = output
+    else:
+        os.makedirs(checkpoint_dir, exist_ok=True)
+
+    # Scores are written on `output`
    if write_metadata_scores:
        pipeline.score_writer = CSVScoreWriter(os.path.join(output, "./tmp"))
    else:
        pipeline.score_writer = FourColumnsScoreWriter(os.path.join(output, "./tmp"))

-    # Check if it's already checkpointed
+    # Checkpoint if it's already checkpointed
    if checkpoint and not is_checkpointed(pipeline):
        hash_fn = database.hash_fn if hasattr(database, "hash_fn") else None
-        pipeline = checkpoint_vanilla_biometrics(pipeline, output, hash_fn=hash_fn)
+        pipeline = checkpoint_vanilla_biometrics(
+            pipeline, checkpoint_dir, hash_fn=hash_fn
+        )

    # Load the background model samples only if the transformer requires fitting
    if all([is_estimator_stateless(step) for step in pipeline.transformer]):
@@ -162,6 +176,7 @@ def execute_vanilla_biometrics_ztnorm(
    checkpoint,
    dask_partition_size,
    dask_n_workers,
+    checkpoint_dir=None,
    **kwargs,
 ):
    """
@@ -209,6 +224,10 @@ def execute_vanilla_biometrics_ztnorm(

    consider_genuines: float
        If set, will consider genuine scores in the ZT score normalization
+    checkpoint_dir: str
+        If `checkpoint` is set, this path will be used to save the checkpoints.
+        If `None`, the content of `output` will be used.
+
    """

    def _merge_references_ztnorm(biometric_references, probes, zprobes, treferences):
@@ -225,6 +244,13 @@ def execute_vanilla_biometrics_ztnorm(
    if not os.path.exists(output):
        os.makedirs(output, exist_ok=True)

+    # Setting the `checkpoint_dir`
+    if checkpoint_dir is None:
+        checkpoint_dir = output
+    else:
+        os.makedirs(checkpoint_dir, exist_ok=True)
+
+    # Scores are written on `output`
    if write_metadata_scores:
        pipeline.score_writer = CSVScoreWriter(os.path.join(output, "./tmp"))
    else:
@@ -232,13 +258,13 @@ def execute_vanilla_biometrics_ztnorm(

    # Check if it's already checkpointed
    if checkpoint and not is_checkpointed(pipeline):
-        pipeline = checkpoint_vanilla_biometrics(pipeline, output)
+        pipeline = checkpoint_vanilla_biometrics(pipeline, checkpoint_dir)

    # Patching the pipeline in case of ZNorm and checkpointing it
    pipeline = ZTNormPipeline(pipeline)
    if checkpoint:
        pipeline.ztnorm_solver = ZTNormCheckpointWrapper(
-            pipeline.ztnorm_solver, os.path.join(output, "normed-scores")
+            pipeline.ztnorm_solver, os.path.join(checkpoint_dir, "normed-scores")
        )

    background_model_samples = database.background_model_samples()

--- a/bob/bio/base/script/vanilla_biometrics.py
+++ b/bob/bio/base/script/vanilla_biometrics.py
@@ -96,7 +96,7 @@ It is possible to do it via configuration file
    "--output",
    show_default=True,
    default="results",
-    help="Name of output directory where output scores will be saved. In case --checkpoint is set, checkpoints will be saved in this directory.",
+    help="Name of output directory where output scores will be saved.",
    cls=ResourceOption,
 )
 @click.option(
@@ -113,6 +113,14 @@ It is possible to do it via configuration file
    help="If set, it will checkpoint all steps of the pipeline. Checkpoints will be saved in `--output`.",
    cls=ResourceOption,
 )
+@click.option(
+    "-c",
+    "--checkpoint-dir",
+    show_default=True,
+    default=None,
+    help="Name of output directory where the checkpoints will be saved. In case --checkpoint is set, checkpoints will be saved in this directory.",
+    cls=ResourceOption,
+)
 @click.option(
    "--dask-partition-size",
    "-s",
@@ -142,6 +150,7 @@ def vanilla_biometrics(
    output,
    write_metadata_scores,
    checkpoint,
+    checkpoint_dir,
    dask_partition_size,
    dask_n_workers,
    **kwargs,
@@ -212,6 +221,7 @@ def vanilla_biometrics(
        checkpoint,
        dask_partition_size,
        dask_n_workers,
+        checkpoint_dir=checkpoint_dir,
        **kwargs,
    )


--- a/doc/vanilla_biometrics_features.rst
+++ b/doc/vanilla_biometrics_features.rst
@@ -355,11 +355,13 @@ To enable the checkpointing of a Transformer or :any:`bob.bio.base.pipelines.van
 This class takes a Transformer as input and returns the same Transformer with the ability to automatically create checkpoint files.
 The :py:class:`bob.pipelines.CheckpointWrapper` class is available in the :py:mod:`bob.pipelines`.

-The ``--checkpoint`` option is a command-line option that automatically wraps every steps of the pipeline with checkpointing::
+The ``--checkpoint`` option is a command-line option that automatically wraps every steps of the pipeline with checkpointing.
+If set, the ``--checkpoint-dir`` sets the path for such a checkpoints::

-$ bob bio pipelines vanilla-biometrics <database> <pipeline> --checkpoint --output <output_dir>
+$ bob bio pipelines vanilla-biometrics <database> <pipeline> --checkpoint --output <output_dir> --checkpoint-dir <checkpoint_dir>

-When doing so, the output of each Transformer of the pipeline will be saved to the disk in the ``<output_dir>`` folder specified with the ``--output`` option.
+When doing so, the output of each Transformer of the pipeline will be saved to the disk in the ``<checkpoint_dir>`` folder specified with the ``--checkpoint-dir`` option.
+Output scores will be saved on ``<output_dir>``.


 .. WARNING::
@@ -371,7 +373,7 @@ When doing so, the output of each Transformer of the pipeline will be saved to t
  **You** have to take care of removing invalid checkpoints files.

  When changing the pipeline or the dataset of an experiment, you should change
-  the output folder (``--output``) accordingly. Otherwise, the system could try to
+  the output folder (``--checkpoint-dir``) accordingly. Otherwise, the system could try to
  load a checkpoint of an older experiment, or samples from another dataset.