Skip to content
Snippets Groups Projects
Commit c36b47dc authored by Tiago de Freitas Pereira's avatar Tiago de Freitas Pereira
Browse files

Implemented the checkpoint_dir feature

parent 1c3f542e
No related branches found
No related tags found
1 merge request!268Implemented the checkpoint_dir feature
Pipeline #56671 passed
......@@ -42,6 +42,7 @@ def execute_vanilla_biometrics(
checkpoint,
dask_partition_size,
dask_n_workers,
checkpoint_dir=None,
**kwargs,
):
"""
......@@ -72,26 +73,39 @@ def execute_vanilla_biometrics(
Groups of the dataset that will be requested from the database interface.
output: str
Path where the results and checkpoints will be saved to.
Path where the scores will be saved.
write_metadata_scores: bool
Use the CSVScoreWriter instead of the FourColumnScoreWriter when True.
checkpoint: bool
Whether checkpoint files will be created for every step of the pipelines.
checkpoint_dir: str
If `checkpoint` is set, this path will be used to save the checkpoints.
If `None`, the content of `output` will be used.
"""
if not os.path.exists(output):
os.makedirs(output, exist_ok=True)
# Setting the `checkpoint_dir`
if checkpoint_dir is None:
checkpoint_dir = output
else:
os.makedirs(checkpoint_dir, exist_ok=True)
# Scores are written on `output`
if write_metadata_scores:
pipeline.score_writer = CSVScoreWriter(os.path.join(output, "./tmp"))
else:
pipeline.score_writer = FourColumnsScoreWriter(os.path.join(output, "./tmp"))
# Check if it's already checkpointed
# Checkpoint if it's already checkpointed
if checkpoint and not is_checkpointed(pipeline):
hash_fn = database.hash_fn if hasattr(database, "hash_fn") else None
pipeline = checkpoint_vanilla_biometrics(pipeline, output, hash_fn=hash_fn)
pipeline = checkpoint_vanilla_biometrics(
pipeline, checkpoint_dir, hash_fn=hash_fn
)
# Load the background model samples only if the transformer requires fitting
if all([is_estimator_stateless(step) for step in pipeline.transformer]):
......@@ -162,6 +176,7 @@ def execute_vanilla_biometrics_ztnorm(
checkpoint,
dask_partition_size,
dask_n_workers,
checkpoint_dir=None,
**kwargs,
):
"""
......@@ -209,6 +224,10 @@ def execute_vanilla_biometrics_ztnorm(
consider_genuines: float
If set, will consider genuine scores in the ZT score normalization
checkpoint_dir: str
If `checkpoint` is set, this path will be used to save the checkpoints.
If `None`, the content of `output` will be used.
"""
def _merge_references_ztnorm(biometric_references, probes, zprobes, treferences):
......@@ -225,6 +244,13 @@ def execute_vanilla_biometrics_ztnorm(
if not os.path.exists(output):
os.makedirs(output, exist_ok=True)
# Setting the `checkpoint_dir`
if checkpoint_dir is None:
checkpoint_dir = output
else:
os.makedirs(checkpoint_dir, exist_ok=True)
# Scores are written on `output`
if write_metadata_scores:
pipeline.score_writer = CSVScoreWriter(os.path.join(output, "./tmp"))
else:
......@@ -232,13 +258,13 @@ def execute_vanilla_biometrics_ztnorm(
# Check if it's already checkpointed
if checkpoint and not is_checkpointed(pipeline):
pipeline = checkpoint_vanilla_biometrics(pipeline, output)
pipeline = checkpoint_vanilla_biometrics(pipeline, checkpoint_dir)
# Patching the pipeline in case of ZNorm and checkpointing it
pipeline = ZTNormPipeline(pipeline)
if checkpoint:
pipeline.ztnorm_solver = ZTNormCheckpointWrapper(
pipeline.ztnorm_solver, os.path.join(output, "normed-scores")
pipeline.ztnorm_solver, os.path.join(checkpoint_dir, "normed-scores")
)
background_model_samples = database.background_model_samples()
......
......@@ -96,7 +96,7 @@ It is possible to do it via configuration file
"--output",
show_default=True,
default="results",
help="Name of output directory where output scores will be saved. In case --checkpoint is set, checkpoints will be saved in this directory.",
help="Name of output directory where output scores will be saved.",
cls=ResourceOption,
)
@click.option(
......@@ -113,6 +113,14 @@ It is possible to do it via configuration file
help="If set, it will checkpoint all steps of the pipeline. Checkpoints will be saved in `--output`.",
cls=ResourceOption,
)
@click.option(
"-c",
"--checkpoint-dir",
show_default=True,
default=None,
help="Name of output directory where the checkpoints will be saved. In case --checkpoint is set, checkpoints will be saved in this directory.",
cls=ResourceOption,
)
@click.option(
"--dask-partition-size",
"-s",
......@@ -142,6 +150,7 @@ def vanilla_biometrics(
output,
write_metadata_scores,
checkpoint,
checkpoint_dir,
dask_partition_size,
dask_n_workers,
**kwargs,
......@@ -212,6 +221,7 @@ def vanilla_biometrics(
checkpoint,
dask_partition_size,
dask_n_workers,
checkpoint_dir=checkpoint_dir,
**kwargs,
)
......
......@@ -355,11 +355,13 @@ To enable the checkpointing of a Transformer or :any:`bob.bio.base.pipelines.van
This class takes a Transformer as input and returns the same Transformer with the ability to automatically create checkpoint files.
The :py:class:`bob.pipelines.CheckpointWrapper` class is available in the :py:mod:`bob.pipelines`.
The ``--checkpoint`` option is a command-line option that automatically wraps every steps of the pipeline with checkpointing::
The ``--checkpoint`` option is a command-line option that automatically wraps every steps of the pipeline with checkpointing.
If set, the ``--checkpoint-dir`` sets the path for such a checkpoints::
$ bob bio pipelines vanilla-biometrics <database> <pipeline> --checkpoint --output <output_dir>
$ bob bio pipelines vanilla-biometrics <database> <pipeline> --checkpoint --output <output_dir> --checkpoint-dir <checkpoint_dir>
When doing so, the output of each Transformer of the pipeline will be saved to the disk in the ``<output_dir>`` folder specified with the ``--output`` option.
When doing so, the output of each Transformer of the pipeline will be saved to the disk in the ``<checkpoint_dir>`` folder specified with the ``--checkpoint-dir`` option.
Output scores will be saved on ``<output_dir>``.
.. WARNING::
......@@ -371,7 +373,7 @@ When doing so, the output of each Transformer of the pipeline will be saved to t
**You** have to take care of removing invalid checkpoints files.
When changing the pipeline or the dataset of an experiment, you should change
the output folder (``--output``) accordingly. Otherwise, the system could try to
the output folder (``--checkpoint-dir``) accordingly. Otherwise, the system could try to
load a checkpoint of an older experiment, or samples from another dataset.
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment