From 9b7a8633e4690d9bb39e408b65028db24ad01b6b Mon Sep 17 00:00:00 2001
From: Tiago Freitas Pereira <tiagofrepereira@gmail.com>
Date: Thu, 18 Jun 2020 10:55:59 +0200
Subject: [PATCH] Cleaning up vanilla biometrics

---
 bob/bio/base/script/vanilla_biometrics.py     |  92 +++++++------
 .../base/script/vanilla_biometrics_ztnorm.py  | 127 +++++++++++++-----
 2 files changed, 143 insertions(+), 76 deletions(-)

diff --git a/bob/bio/base/script/vanilla_biometrics.py b/bob/bio/base/script/vanilla_biometrics.py
index 7af52380..74be6c84 100644
--- a/bob/bio/base/script/vanilla_biometrics.py
+++ b/bob/bio/base/script/vanilla_biometrics.py
@@ -25,7 +25,7 @@ from bob.bio.base.pipelines.vanilla_biometrics import (
     dask_vanilla_biometrics,
     dask_get_partition_size,
     FourColumnsScoreWriter,
-    CSVScoreWriter
+    CSVScoreWriter,
 )
 from dask.delayed import Delayed
 import pkg_resources
@@ -44,31 +44,18 @@ EPILOG = """\b
  -----------------------
 
 
- $ bob pipelines vanilla-biometrics my_experiment.py -vv
+ $ bob pipelines vanilla-biometrics -p my_experiment.py -vv
 
 
  my_experiment.py must contain the following elements:
 
- >>> preprocessor = my_preprocessor() \n
- >>> extractor = my_extractor() \n
- >>> algorithm = my_algorithm() \n
- >>> checkpoints = EXPLAIN CHECKPOINTING \n
+   >>> transformer = ... # A scikit-learn pipeline
+   >>> algorithm   = ... # `An BioAlgorithm`
+   >>> pipeline = VanillaBiometricsPipeline(transformer,algorithm)
+   >>> database = .... # Biometric Database connector (class that implements the methods: `background_model_samples`, `references` and `probes`)" 
 
 \b
 
-
-Look at the following example
-
- $ bob pipelines vanilla-biometrics ./bob/pipelines/config/distributed/sge_iobig_16cores.py \
-                                    ./bob/pipelines/config/database/mobio_male.py \
-                                    ./bob/pipelines/config/baselines/facecrop_pca.py
-
-\b
-
-
-
-TODO: Work out this help
-
 """
 
 
@@ -122,24 +109,49 @@ def post_process_scores(pipeline, scores, path):
     help="Name of output directory",
 )
 @click.option(
-    "--write-metadata-scores", "-m",
+    "--write-metadata-scores",
+    "-m",
     is_flag=True,
-    help="If set, all the scores will be written with all its metadata",
+    help="If set, all the scores will be written with all its metadata using the `CSVScoreWriter`",
+)
+@click.option(
+    "--checkpoint",
+    "-c",
+    is_flag=True,
+    help="If set, it will checkpoint all steps of the pipeline",
 )
 @verbosity_option(cls=ResourceOption)
-def vanilla_biometrics(pipeline, database, dask_client, groups, output, write_metadata_scores, **kwargs):
+def vanilla_biometrics(
+    pipeline,
+    database,
+    dask_client,
+    groups,
+    output,
+    write_metadata_scores,
+    checkpoint,
+    **kwargs,
+):
     """Runs the simplest biometrics pipeline.
 
-    Such pipeline consists into three sub-pipelines.
-    In all of them, given raw data as input it does the following steps:
+    Such pipeline consists into two major components.
+    The first component consists of a scikit-learn `Pipeline`,
+    where a sequence of transformations of the input data
+    is defined.
+    The second component is a `BioAlgorithm` that defines the primitives
+    `enroll` and `score`
+
+    With those two components any Biometric Experiment can be done.
+    A Biometric experiment consists of three sub-pipelines and 
+    they are defined below:
 
     Sub-pipeline 1:\n
     ---------------
 
-    Training background model. Some biometric algorithms demands the training of background model, for instance, PCA/LDA matrix or a Neural networks. This sub-pipeline handles that and it consists of 3 steps:
-
+    Training background model.
+    Some biometric algorithms demands the training of background model, for instance, PCA/LDA matrix or a Neural networks. 
+    
     \b
-    raw_data --> preprocessing >> feature extraction >> train background model --> background_model
+    This pipeline runs: `Pipeline.fit(DATA_FOR_FIT)`
 
 
 
@@ -149,26 +161,24 @@ def vanilla_biometrics(pipeline, database, dask_client, groups, output, write_me
     ---------------
 
     Creation of biometric references: This is a standard step in a biometric pipelines.
-    Given a set of samples of one identity, create a biometric reference (a.k.a template) for sub identity. This sub-pipeline handles that in 3 steps and they are the following:
+    Given a set of samples of one identity, create a biometric reference (a.k.a template) for sub identity. 
+
 
     \b
     raw_data --> preprocessing >> feature extraction >> enroll(background_model) --> biometric_reference
 
-    Note that this sub-pipeline depends on the previous one
-
+    This pipeline runs: `BioAlgorithm.enroll(Pipeline.transform(DATA_ENROLL))` >> biometric_references
 
 
     Sub-pipeline 3:\n
     ---------------
 
+    Probing: This is another standard step in biometric pipelines. 
+    Given one sample and one biometric reference, computes a score.
+    Such score has different meanings depending on the scoring method your biometric algorithm uses. 
+    It's out of scope to explain in a help message to explain what scoring is for different biometric algorithms.
 
-    Probing: This is another standard step in biometric pipelines. Given one sample and one biometric reference, computes a score. Such score has different meanings depending on the scoring method your biometric algorithm uses. It's out of scope to explain in a help message to explain what scoring is for different biometric algorithms.
-
-
-    raw_data --> preprocessing >> feature extraction >> probe(biometric_reference, background_model) --> score
-
-    Note that this sub-pipeline depends on the two previous ones
-
+    This pipeline runs: `BioAlgorithm.score(Pipeline.transform(DATA_SCORE, biometric_references))` >> biometric_references
 
     """
 
@@ -185,12 +195,12 @@ def vanilla_biometrics(pipeline, database, dask_client, groups, output, write_me
     database = vanilla_pipeline.database
     pipeline = vanilla_pipeline.pipeline
     if write_metadata_scores:
-        pipeline.score_writer = CSVScoreWriter(os.path.join(output,"./tmp"))
+        pipeline.score_writer = CSVScoreWriter(os.path.join(output, "./tmp"))
     else:
-        pipeline.score_writer = FourColumnsScoreWriter(os.path.join(output,"./tmp"))
+        pipeline.score_writer = FourColumnsScoreWriter(os.path.join(output, "./tmp"))
 
     # Check if it's already checkpointed
-    if not isinstance_nested(
+    if checkpoint and not isinstance_nested(
         pipeline.biometric_algorithm,
         "biometric_algorithm",
         BioAlgorithmCheckpointWrapper,
@@ -206,7 +216,7 @@ def vanilla_biometrics(pipeline, database, dask_client, groups, output, write_me
 
         if dask_client is not None and not isinstance_nested(
             pipeline.biometric_algorithm, "biometric_algorithm", BioAlgorithmDaskWrapper
-        ):            
+        ):
             n_objects = (
                 len(background_model_samples) + len(biometric_references) + len(probes)
             )
diff --git a/bob/bio/base/script/vanilla_biometrics_ztnorm.py b/bob/bio/base/script/vanilla_biometrics_ztnorm.py
index 0ef67e3f..57f5bbb9 100644
--- a/bob/bio/base/script/vanilla_biometrics_ztnorm.py
+++ b/bob/bio/base/script/vanilla_biometrics_ztnorm.py
@@ -28,7 +28,7 @@ from bob.bio.base.pipelines.vanilla_biometrics import (
     dask_vanilla_biometrics,
     dask_get_partition_size,
     FourColumnsScoreWriter,
-    CSVScoreWriter
+    CSVScoreWriter,
 )
 from dask.delayed import Delayed
 from bob.bio.base.utils import get_resource_filename
@@ -47,31 +47,19 @@ EPILOG = """\b
  -----------------------
 
 
- $ bob pipelines vanilla-biometrics my_experiment.py -vv
+ $ bob pipelines vanilla-biometrics -p my_experiment.py -vv
 
 
  my_experiment.py must contain the following elements:
 
- >>> preprocessor = my_preprocessor() \n
- >>> extractor = my_extractor() \n
- >>> algorithm = my_algorithm() \n
- >>> checkpoints = EXPLAIN CHECKPOINTING \n
+   >>> transformer = ... # A scikit-learn pipeline
+   >>> algorithm   = ... # `An BioAlgorithm`
+   >>> pipeline = VanillaBiometricsPipeline(transformer,algorithm)
+   >>> database = .... # Biometric Database connector (class that implements the methods: `background_model_samples`, `references` and `probes`)" 
 
 \b
 
 
-Look at the following example
-
- $ bob pipelines vanilla-biometrics ./bob/pipelines/config/distributed/sge_iobig_16cores.py \
-                                    ./bob/pipelines/config/database/mobio_male.py \
-                                    ./bob/pipelines/config/baselines/facecrop_pca.py
-
-\b
-
-
-
-TODO: Work out this help
-
 """
 
 
@@ -79,12 +67,11 @@ TODO: Work out this help
     entry_point_group="bob.pipelines.config", cls=ConfigCommand, epilog=EPILOG,
 )
 @click.option(
-    "--pipeline", "-p", required=True, help="Vanilla biometrics pipeline",
+    "--pipeline", "-p", required=True, help="An entry point or a configuration file containing a `VanillaBiometricsPipeline`.",
 )
 @click.option(
     "--database",
-    "-d",
-    required=True,
+    "-d",    
     help="Biometric Database connector (class that implements the methods: `background_model_samples`, `references` and `probes`)",
 )
 @click.option(
@@ -115,17 +102,84 @@ TODO: Work out this help
     help="If set, will consider genuine scores in the ZT score normalization",
 )
 @click.option(
-    "--write-metadata-scores", "-m",
+    "--write-metadata-scores",
+    "-m",
     is_flag=True,
     help="If set, all the scores will be written with all its metadata",
 )
-@click.option("--ztnorm-cohort-proportion", default=1., type=float, 
-    help="Sets the percentage of samples used for t-norm and z-norm. Sometimes you don't want to use all the t/z samples for normalization")
+@click.option(
+    "--ztnorm-cohort-proportion",
+    default=1.0,
+    type=float,
+    help="Sets the percentage of samples used for t-norm and z-norm. Sometimes you don't want to use all the t/z samples for normalization",
+)
+@click.option(
+    "--checkpoint",
+    "-c",
+    is_flag=True,
+    help="If set, it will checkpoint all steps of the pipeline",
+)
 @verbosity_option(cls=ResourceOption)
 def vanilla_biometrics_ztnorm(
-    pipeline, database, dask_client, groups, output, consider_genuines, write_metadata_scores, ztnorm_cohort_proportion, **kwargs
+    pipeline,
+    database,
+    dask_client,
+    groups,
+    output,
+    consider_genuines,
+    write_metadata_scores,
+    ztnorm_cohort_proportion,
+    checkpoint,
+    **kwargs,
 ):
-    """Runs the simplest biometrics pipeline under ZTNorm.
+    """Runs the the vanilla-biometrics with ZT-Norm like score normalizations.
+
+    Such pipeline consists into two major components.
+    The first component consists of a scikit-learn `Pipeline`,
+    where a sequence of transformations of the input data
+    is defined.
+    The second component is a `BioAlgorithm` that defines the primitives
+    `enroll` and `score`
+
+    With those two components any Biometric Experiment can be done.
+    A Biometric experiment consists of three sub-pipelines and 
+    they are defined below:
+
+    Sub-pipeline 1:\n
+    ---------------
+
+    Training background model.
+    Some biometric algorithms demands the training of background model, for instance, PCA/LDA matrix or a Neural networks. 
+    
+    \b
+    This pipeline runs: `Pipeline.fit(DATA_FOR_FIT)`
+
+
+
+    \b
+
+    Sub-pipeline 2:\n
+    ---------------
+
+    Creation of biometric references: This is a standard step in a biometric pipelines.
+    Given a set of samples of one identity, create a biometric reference (a.k.a template) for sub identity. 
+
+
+    \b
+    raw_data --> preprocessing >> feature extraction >> enroll(background_model) --> biometric_reference
+
+    This pipeline runs: `BioAlgorithm.enroll(Pipeline.transform(DATA_ENROLL))` >> biometric_references
+
+
+    Sub-pipeline 3:\n
+    ---------------
+
+    Probing: This is another standard step in biometric pipelines. 
+    Given one sample and one biometric reference, computes a score.
+    Such score has different meanings depending on the scoring method your biometric algorithm uses. 
+    It's out of scope to explain in a help message to explain what scoring is for different biometric algorithms.
+
+    This pipeline runs: `BioAlgorithm.score(Pipeline.transform(DATA_SCORE, biometric_references))` >> biometric_references
 
     """
 
@@ -144,7 +198,6 @@ def vanilla_biometrics_ztnorm(
     if not os.path.exists(output):
         os.makedirs(output, exist_ok=True)
 
-
     # It's necessary to chain load 2 resources together
     pipeline_config = get_resource_filename(pipeline, "bob.bio.pipeline")
     database_config = get_resource_filename(database, "bob.bio.database")
@@ -157,20 +210,18 @@ def vanilla_biometrics_ztnorm(
     pipeline = vanilla_pipeline.pipeline
 
     if write_metadata_scores:
-        pipeline.score_writer = CSVScoreWriter(os.path.join(output,"./tmp"))
+        pipeline.score_writer = CSVScoreWriter(os.path.join(output, "./tmp"))
     else:
-        pipeline.score_writer = FourColumnsScoreWriter(os.path.join(output,"./tmp"))
-
+        pipeline.score_writer = FourColumnsScoreWriter(os.path.join(output, "./tmp"))
 
     # Check if it's already checkpointed
-    if not isinstance_nested(
+    if checkpoint and not isinstance_nested(
         pipeline.biometric_algorithm,
         "biometric_algorithm",
         BioAlgorithmCheckpointWrapper,
     ):
         pipeline = checkpoint_vanilla_biometrics(pipeline, output)
 
-
     # Patching the pipeline in case of ZNorm and checkpointing it
     pipeline = ZTNormPipeline(pipeline)
     pipeline.ztnorm_solver = ZTNormCheckpointWrapper(
@@ -213,8 +264,14 @@ def vanilla_biometrics_ztnorm(
         probes, zprobes = _merge_references_ztnorm(
             biometric_references, probes, zprobes, treferences
         )
-        
-        raw_scores, z_normed_scores, t_normed_scores, zt_normed_scores, s_normed_scores = pipeline(
+
+        (
+            raw_scores,
+            z_normed_scores,
+            t_normed_scores,
+            zt_normed_scores,
+            s_normed_scores,
+        ) = pipeline(
             background_model_samples,
             biometric_references,
             probes,
@@ -225,7 +282,7 @@ def vanilla_biometrics_ztnorm(
 
         def _build_filename(score_file_name, suffix):
             return os.path.join(score_file_name, suffix)
-        
+
         # Running RAW_SCORES
         raw_scores = post_process_scores(
             pipeline, raw_scores, _build_filename(score_file_name, "raw_scores")
-- 
GitLab