diff --git a/bob/bio/base/script/vanilla_biometrics.py b/bob/bio/base/script/vanilla_biometrics.py index 7af52380c9146629066cec9b2ed6dfeb43168bd8..74be6c840b496751d05facd165b488081dbab4b9 100644 --- a/bob/bio/base/script/vanilla_biometrics.py +++ b/bob/bio/base/script/vanilla_biometrics.py @@ -25,7 +25,7 @@ from bob.bio.base.pipelines.vanilla_biometrics import ( dask_vanilla_biometrics, dask_get_partition_size, FourColumnsScoreWriter, - CSVScoreWriter + CSVScoreWriter, ) from dask.delayed import Delayed import pkg_resources @@ -44,31 +44,18 @@ EPILOG = """\b ----------------------- - $ bob pipelines vanilla-biometrics my_experiment.py -vv + $ bob pipelines vanilla-biometrics -p my_experiment.py -vv my_experiment.py must contain the following elements: - >>> preprocessor = my_preprocessor() \n - >>> extractor = my_extractor() \n - >>> algorithm = my_algorithm() \n - >>> checkpoints = EXPLAIN CHECKPOINTING \n + >>> transformer = ... # A scikit-learn pipeline + >>> algorithm = ... # `An BioAlgorithm` + >>> pipeline = VanillaBiometricsPipeline(transformer,algorithm) + >>> database = .... # Biometric Database connector (class that implements the methods: `background_model_samples`, `references` and `probes`)" \b - -Look at the following example - - $ bob pipelines vanilla-biometrics ./bob/pipelines/config/distributed/sge_iobig_16cores.py \ - ./bob/pipelines/config/database/mobio_male.py \ - ./bob/pipelines/config/baselines/facecrop_pca.py - -\b - - - -TODO: Work out this help - """ @@ -122,24 +109,49 @@ def post_process_scores(pipeline, scores, path): help="Name of output directory", ) @click.option( - "--write-metadata-scores", "-m", + "--write-metadata-scores", + "-m", is_flag=True, - help="If set, all the scores will be written with all its metadata", + help="If set, all the scores will be written with all its metadata using the `CSVScoreWriter`", +) +@click.option( + "--checkpoint", + "-c", + is_flag=True, + help="If set, it will checkpoint all steps of the pipeline", ) @verbosity_option(cls=ResourceOption) -def vanilla_biometrics(pipeline, database, dask_client, groups, output, write_metadata_scores, **kwargs): +def vanilla_biometrics( + pipeline, + database, + dask_client, + groups, + output, + write_metadata_scores, + checkpoint, + **kwargs, +): """Runs the simplest biometrics pipeline. - Such pipeline consists into three sub-pipelines. - In all of them, given raw data as input it does the following steps: + Such pipeline consists into two major components. + The first component consists of a scikit-learn `Pipeline`, + where a sequence of transformations of the input data + is defined. + The second component is a `BioAlgorithm` that defines the primitives + `enroll` and `score` + + With those two components any Biometric Experiment can be done. + A Biometric experiment consists of three sub-pipelines and + they are defined below: Sub-pipeline 1:\n --------------- - Training background model. Some biometric algorithms demands the training of background model, for instance, PCA/LDA matrix or a Neural networks. This sub-pipeline handles that and it consists of 3 steps: - + Training background model. + Some biometric algorithms demands the training of background model, for instance, PCA/LDA matrix or a Neural networks. + \b - raw_data --> preprocessing >> feature extraction >> train background model --> background_model + This pipeline runs: `Pipeline.fit(DATA_FOR_FIT)` @@ -149,26 +161,24 @@ def vanilla_biometrics(pipeline, database, dask_client, groups, output, write_me --------------- Creation of biometric references: This is a standard step in a biometric pipelines. - Given a set of samples of one identity, create a biometric reference (a.k.a template) for sub identity. This sub-pipeline handles that in 3 steps and they are the following: + Given a set of samples of one identity, create a biometric reference (a.k.a template) for sub identity. + \b raw_data --> preprocessing >> feature extraction >> enroll(background_model) --> biometric_reference - Note that this sub-pipeline depends on the previous one - + This pipeline runs: `BioAlgorithm.enroll(Pipeline.transform(DATA_ENROLL))` >> biometric_references Sub-pipeline 3:\n --------------- + Probing: This is another standard step in biometric pipelines. + Given one sample and one biometric reference, computes a score. + Such score has different meanings depending on the scoring method your biometric algorithm uses. + It's out of scope to explain in a help message to explain what scoring is for different biometric algorithms. - Probing: This is another standard step in biometric pipelines. Given one sample and one biometric reference, computes a score. Such score has different meanings depending on the scoring method your biometric algorithm uses. It's out of scope to explain in a help message to explain what scoring is for different biometric algorithms. - - - raw_data --> preprocessing >> feature extraction >> probe(biometric_reference, background_model) --> score - - Note that this sub-pipeline depends on the two previous ones - + This pipeline runs: `BioAlgorithm.score(Pipeline.transform(DATA_SCORE, biometric_references))` >> biometric_references """ @@ -185,12 +195,12 @@ def vanilla_biometrics(pipeline, database, dask_client, groups, output, write_me database = vanilla_pipeline.database pipeline = vanilla_pipeline.pipeline if write_metadata_scores: - pipeline.score_writer = CSVScoreWriter(os.path.join(output,"./tmp")) + pipeline.score_writer = CSVScoreWriter(os.path.join(output, "./tmp")) else: - pipeline.score_writer = FourColumnsScoreWriter(os.path.join(output,"./tmp")) + pipeline.score_writer = FourColumnsScoreWriter(os.path.join(output, "./tmp")) # Check if it's already checkpointed - if not isinstance_nested( + if checkpoint and not isinstance_nested( pipeline.biometric_algorithm, "biometric_algorithm", BioAlgorithmCheckpointWrapper, @@ -206,7 +216,7 @@ def vanilla_biometrics(pipeline, database, dask_client, groups, output, write_me if dask_client is not None and not isinstance_nested( pipeline.biometric_algorithm, "biometric_algorithm", BioAlgorithmDaskWrapper - ): + ): n_objects = ( len(background_model_samples) + len(biometric_references) + len(probes) ) diff --git a/bob/bio/base/script/vanilla_biometrics_ztnorm.py b/bob/bio/base/script/vanilla_biometrics_ztnorm.py index 0ef67e3f482b573403e985413bec023151170008..57f5bbb9c46c327ef652707e65855253d8bb7710 100644 --- a/bob/bio/base/script/vanilla_biometrics_ztnorm.py +++ b/bob/bio/base/script/vanilla_biometrics_ztnorm.py @@ -28,7 +28,7 @@ from bob.bio.base.pipelines.vanilla_biometrics import ( dask_vanilla_biometrics, dask_get_partition_size, FourColumnsScoreWriter, - CSVScoreWriter + CSVScoreWriter, ) from dask.delayed import Delayed from bob.bio.base.utils import get_resource_filename @@ -47,31 +47,19 @@ EPILOG = """\b ----------------------- - $ bob pipelines vanilla-biometrics my_experiment.py -vv + $ bob pipelines vanilla-biometrics -p my_experiment.py -vv my_experiment.py must contain the following elements: - >>> preprocessor = my_preprocessor() \n - >>> extractor = my_extractor() \n - >>> algorithm = my_algorithm() \n - >>> checkpoints = EXPLAIN CHECKPOINTING \n + >>> transformer = ... # A scikit-learn pipeline + >>> algorithm = ... # `An BioAlgorithm` + >>> pipeline = VanillaBiometricsPipeline(transformer,algorithm) + >>> database = .... # Biometric Database connector (class that implements the methods: `background_model_samples`, `references` and `probes`)" \b -Look at the following example - - $ bob pipelines vanilla-biometrics ./bob/pipelines/config/distributed/sge_iobig_16cores.py \ - ./bob/pipelines/config/database/mobio_male.py \ - ./bob/pipelines/config/baselines/facecrop_pca.py - -\b - - - -TODO: Work out this help - """ @@ -79,12 +67,11 @@ TODO: Work out this help entry_point_group="bob.pipelines.config", cls=ConfigCommand, epilog=EPILOG, ) @click.option( - "--pipeline", "-p", required=True, help="Vanilla biometrics pipeline", + "--pipeline", "-p", required=True, help="An entry point or a configuration file containing a `VanillaBiometricsPipeline`.", ) @click.option( "--database", - "-d", - required=True, + "-d", help="Biometric Database connector (class that implements the methods: `background_model_samples`, `references` and `probes`)", ) @click.option( @@ -115,17 +102,84 @@ TODO: Work out this help help="If set, will consider genuine scores in the ZT score normalization", ) @click.option( - "--write-metadata-scores", "-m", + "--write-metadata-scores", + "-m", is_flag=True, help="If set, all the scores will be written with all its metadata", ) -@click.option("--ztnorm-cohort-proportion", default=1., type=float, - help="Sets the percentage of samples used for t-norm and z-norm. Sometimes you don't want to use all the t/z samples for normalization") +@click.option( + "--ztnorm-cohort-proportion", + default=1.0, + type=float, + help="Sets the percentage of samples used for t-norm and z-norm. Sometimes you don't want to use all the t/z samples for normalization", +) +@click.option( + "--checkpoint", + "-c", + is_flag=True, + help="If set, it will checkpoint all steps of the pipeline", +) @verbosity_option(cls=ResourceOption) def vanilla_biometrics_ztnorm( - pipeline, database, dask_client, groups, output, consider_genuines, write_metadata_scores, ztnorm_cohort_proportion, **kwargs + pipeline, + database, + dask_client, + groups, + output, + consider_genuines, + write_metadata_scores, + ztnorm_cohort_proportion, + checkpoint, + **kwargs, ): - """Runs the simplest biometrics pipeline under ZTNorm. + """Runs the the vanilla-biometrics with ZT-Norm like score normalizations. + + Such pipeline consists into two major components. + The first component consists of a scikit-learn `Pipeline`, + where a sequence of transformations of the input data + is defined. + The second component is a `BioAlgorithm` that defines the primitives + `enroll` and `score` + + With those two components any Biometric Experiment can be done. + A Biometric experiment consists of three sub-pipelines and + they are defined below: + + Sub-pipeline 1:\n + --------------- + + Training background model. + Some biometric algorithms demands the training of background model, for instance, PCA/LDA matrix or a Neural networks. + + \b + This pipeline runs: `Pipeline.fit(DATA_FOR_FIT)` + + + + \b + + Sub-pipeline 2:\n + --------------- + + Creation of biometric references: This is a standard step in a biometric pipelines. + Given a set of samples of one identity, create a biometric reference (a.k.a template) for sub identity. + + + \b + raw_data --> preprocessing >> feature extraction >> enroll(background_model) --> biometric_reference + + This pipeline runs: `BioAlgorithm.enroll(Pipeline.transform(DATA_ENROLL))` >> biometric_references + + + Sub-pipeline 3:\n + --------------- + + Probing: This is another standard step in biometric pipelines. + Given one sample and one biometric reference, computes a score. + Such score has different meanings depending on the scoring method your biometric algorithm uses. + It's out of scope to explain in a help message to explain what scoring is for different biometric algorithms. + + This pipeline runs: `BioAlgorithm.score(Pipeline.transform(DATA_SCORE, biometric_references))` >> biometric_references """ @@ -144,7 +198,6 @@ def vanilla_biometrics_ztnorm( if not os.path.exists(output): os.makedirs(output, exist_ok=True) - # It's necessary to chain load 2 resources together pipeline_config = get_resource_filename(pipeline, "bob.bio.pipeline") database_config = get_resource_filename(database, "bob.bio.database") @@ -157,20 +210,18 @@ def vanilla_biometrics_ztnorm( pipeline = vanilla_pipeline.pipeline if write_metadata_scores: - pipeline.score_writer = CSVScoreWriter(os.path.join(output,"./tmp")) + pipeline.score_writer = CSVScoreWriter(os.path.join(output, "./tmp")) else: - pipeline.score_writer = FourColumnsScoreWriter(os.path.join(output,"./tmp")) - + pipeline.score_writer = FourColumnsScoreWriter(os.path.join(output, "./tmp")) # Check if it's already checkpointed - if not isinstance_nested( + if checkpoint and not isinstance_nested( pipeline.biometric_algorithm, "biometric_algorithm", BioAlgorithmCheckpointWrapper, ): pipeline = checkpoint_vanilla_biometrics(pipeline, output) - # Patching the pipeline in case of ZNorm and checkpointing it pipeline = ZTNormPipeline(pipeline) pipeline.ztnorm_solver = ZTNormCheckpointWrapper( @@ -213,8 +264,14 @@ def vanilla_biometrics_ztnorm( probes, zprobes = _merge_references_ztnorm( biometric_references, probes, zprobes, treferences ) - - raw_scores, z_normed_scores, t_normed_scores, zt_normed_scores, s_normed_scores = pipeline( + + ( + raw_scores, + z_normed_scores, + t_normed_scores, + zt_normed_scores, + s_normed_scores, + ) = pipeline( background_model_samples, biometric_references, probes, @@ -225,7 +282,7 @@ def vanilla_biometrics_ztnorm( def _build_filename(score_file_name, suffix): return os.path.join(score_file_name, suffix) - + # Running RAW_SCORES raw_scores = post_process_scores( pipeline, raw_scores, _build_filename(score_file_name, "raw_scores")