Skip to content
Snippets Groups Projects
Commit 9b7a8633 authored by Tiago de Freitas Pereira's avatar Tiago de Freitas Pereira
Browse files

Cleaning up vanilla biometrics

parent 5c9dd28f
No related branches found
No related tags found
2 merge requests!192Redoing baselines,!180[dask] Preparing bob.bio.base for dask pipelines
Pipeline #40540 passed
......@@ -25,7 +25,7 @@ from bob.bio.base.pipelines.vanilla_biometrics import (
dask_vanilla_biometrics,
dask_get_partition_size,
FourColumnsScoreWriter,
CSVScoreWriter
CSVScoreWriter,
)
from dask.delayed import Delayed
import pkg_resources
......@@ -44,31 +44,18 @@ EPILOG = """\b
-----------------------
$ bob pipelines vanilla-biometrics my_experiment.py -vv
$ bob pipelines vanilla-biometrics -p my_experiment.py -vv
my_experiment.py must contain the following elements:
>>> preprocessor = my_preprocessor() \n
>>> extractor = my_extractor() \n
>>> algorithm = my_algorithm() \n
>>> checkpoints = EXPLAIN CHECKPOINTING \n
>>> transformer = ... # A scikit-learn pipeline
>>> algorithm = ... # `An BioAlgorithm`
>>> pipeline = VanillaBiometricsPipeline(transformer,algorithm)
>>> database = .... # Biometric Database connector (class that implements the methods: `background_model_samples`, `references` and `probes`)"
\b
Look at the following example
$ bob pipelines vanilla-biometrics ./bob/pipelines/config/distributed/sge_iobig_16cores.py \
./bob/pipelines/config/database/mobio_male.py \
./bob/pipelines/config/baselines/facecrop_pca.py
\b
TODO: Work out this help
"""
......@@ -122,24 +109,49 @@ def post_process_scores(pipeline, scores, path):
help="Name of output directory",
)
@click.option(
"--write-metadata-scores", "-m",
"--write-metadata-scores",
"-m",
is_flag=True,
help="If set, all the scores will be written with all its metadata",
help="If set, all the scores will be written with all its metadata using the `CSVScoreWriter`",
)
@click.option(
"--checkpoint",
"-c",
is_flag=True,
help="If set, it will checkpoint all steps of the pipeline",
)
@verbosity_option(cls=ResourceOption)
def vanilla_biometrics(pipeline, database, dask_client, groups, output, write_metadata_scores, **kwargs):
def vanilla_biometrics(
pipeline,
database,
dask_client,
groups,
output,
write_metadata_scores,
checkpoint,
**kwargs,
):
"""Runs the simplest biometrics pipeline.
Such pipeline consists into three sub-pipelines.
In all of them, given raw data as input it does the following steps:
Such pipeline consists into two major components.
The first component consists of a scikit-learn `Pipeline`,
where a sequence of transformations of the input data
is defined.
The second component is a `BioAlgorithm` that defines the primitives
`enroll` and `score`
With those two components any Biometric Experiment can be done.
A Biometric experiment consists of three sub-pipelines and
they are defined below:
Sub-pipeline 1:\n
---------------
Training background model. Some biometric algorithms demands the training of background model, for instance, PCA/LDA matrix or a Neural networks. This sub-pipeline handles that and it consists of 3 steps:
Training background model.
Some biometric algorithms demands the training of background model, for instance, PCA/LDA matrix or a Neural networks.
\b
raw_data --> preprocessing >> feature extraction >> train background model --> background_model
This pipeline runs: `Pipeline.fit(DATA_FOR_FIT)`
......@@ -149,26 +161,24 @@ def vanilla_biometrics(pipeline, database, dask_client, groups, output, write_me
---------------
Creation of biometric references: This is a standard step in a biometric pipelines.
Given a set of samples of one identity, create a biometric reference (a.k.a template) for sub identity. This sub-pipeline handles that in 3 steps and they are the following:
Given a set of samples of one identity, create a biometric reference (a.k.a template) for sub identity.
\b
raw_data --> preprocessing >> feature extraction >> enroll(background_model) --> biometric_reference
Note that this sub-pipeline depends on the previous one
This pipeline runs: `BioAlgorithm.enroll(Pipeline.transform(DATA_ENROLL))` >> biometric_references
Sub-pipeline 3:\n
---------------
Probing: This is another standard step in biometric pipelines.
Given one sample and one biometric reference, computes a score.
Such score has different meanings depending on the scoring method your biometric algorithm uses.
It's out of scope to explain in a help message to explain what scoring is for different biometric algorithms.
Probing: This is another standard step in biometric pipelines. Given one sample and one biometric reference, computes a score. Such score has different meanings depending on the scoring method your biometric algorithm uses. It's out of scope to explain in a help message to explain what scoring is for different biometric algorithms.
raw_data --> preprocessing >> feature extraction >> probe(biometric_reference, background_model) --> score
Note that this sub-pipeline depends on the two previous ones
This pipeline runs: `BioAlgorithm.score(Pipeline.transform(DATA_SCORE, biometric_references))` >> biometric_references
"""
......@@ -185,12 +195,12 @@ def vanilla_biometrics(pipeline, database, dask_client, groups, output, write_me
database = vanilla_pipeline.database
pipeline = vanilla_pipeline.pipeline
if write_metadata_scores:
pipeline.score_writer = CSVScoreWriter(os.path.join(output,"./tmp"))
pipeline.score_writer = CSVScoreWriter(os.path.join(output, "./tmp"))
else:
pipeline.score_writer = FourColumnsScoreWriter(os.path.join(output,"./tmp"))
pipeline.score_writer = FourColumnsScoreWriter(os.path.join(output, "./tmp"))
# Check if it's already checkpointed
if not isinstance_nested(
if checkpoint and not isinstance_nested(
pipeline.biometric_algorithm,
"biometric_algorithm",
BioAlgorithmCheckpointWrapper,
......@@ -206,7 +216,7 @@ def vanilla_biometrics(pipeline, database, dask_client, groups, output, write_me
if dask_client is not None and not isinstance_nested(
pipeline.biometric_algorithm, "biometric_algorithm", BioAlgorithmDaskWrapper
):
):
n_objects = (
len(background_model_samples) + len(biometric_references) + len(probes)
)
......
......@@ -28,7 +28,7 @@ from bob.bio.base.pipelines.vanilla_biometrics import (
dask_vanilla_biometrics,
dask_get_partition_size,
FourColumnsScoreWriter,
CSVScoreWriter
CSVScoreWriter,
)
from dask.delayed import Delayed
from bob.bio.base.utils import get_resource_filename
......@@ -47,31 +47,19 @@ EPILOG = """\b
-----------------------
$ bob pipelines vanilla-biometrics my_experiment.py -vv
$ bob pipelines vanilla-biometrics -p my_experiment.py -vv
my_experiment.py must contain the following elements:
>>> preprocessor = my_preprocessor() \n
>>> extractor = my_extractor() \n
>>> algorithm = my_algorithm() \n
>>> checkpoints = EXPLAIN CHECKPOINTING \n
>>> transformer = ... # A scikit-learn pipeline
>>> algorithm = ... # `An BioAlgorithm`
>>> pipeline = VanillaBiometricsPipeline(transformer,algorithm)
>>> database = .... # Biometric Database connector (class that implements the methods: `background_model_samples`, `references` and `probes`)"
\b
Look at the following example
$ bob pipelines vanilla-biometrics ./bob/pipelines/config/distributed/sge_iobig_16cores.py \
./bob/pipelines/config/database/mobio_male.py \
./bob/pipelines/config/baselines/facecrop_pca.py
\b
TODO: Work out this help
"""
......@@ -79,12 +67,11 @@ TODO: Work out this help
entry_point_group="bob.pipelines.config", cls=ConfigCommand, epilog=EPILOG,
)
@click.option(
"--pipeline", "-p", required=True, help="Vanilla biometrics pipeline",
"--pipeline", "-p", required=True, help="An entry point or a configuration file containing a `VanillaBiometricsPipeline`.",
)
@click.option(
"--database",
"-d",
required=True,
"-d",
help="Biometric Database connector (class that implements the methods: `background_model_samples`, `references` and `probes`)",
)
@click.option(
......@@ -115,17 +102,84 @@ TODO: Work out this help
help="If set, will consider genuine scores in the ZT score normalization",
)
@click.option(
"--write-metadata-scores", "-m",
"--write-metadata-scores",
"-m",
is_flag=True,
help="If set, all the scores will be written with all its metadata",
)
@click.option("--ztnorm-cohort-proportion", default=1., type=float,
help="Sets the percentage of samples used for t-norm and z-norm. Sometimes you don't want to use all the t/z samples for normalization")
@click.option(
"--ztnorm-cohort-proportion",
default=1.0,
type=float,
help="Sets the percentage of samples used for t-norm and z-norm. Sometimes you don't want to use all the t/z samples for normalization",
)
@click.option(
"--checkpoint",
"-c",
is_flag=True,
help="If set, it will checkpoint all steps of the pipeline",
)
@verbosity_option(cls=ResourceOption)
def vanilla_biometrics_ztnorm(
pipeline, database, dask_client, groups, output, consider_genuines, write_metadata_scores, ztnorm_cohort_proportion, **kwargs
pipeline,
database,
dask_client,
groups,
output,
consider_genuines,
write_metadata_scores,
ztnorm_cohort_proportion,
checkpoint,
**kwargs,
):
"""Runs the simplest biometrics pipeline under ZTNorm.
"""Runs the the vanilla-biometrics with ZT-Norm like score normalizations.
Such pipeline consists into two major components.
The first component consists of a scikit-learn `Pipeline`,
where a sequence of transformations of the input data
is defined.
The second component is a `BioAlgorithm` that defines the primitives
`enroll` and `score`
With those two components any Biometric Experiment can be done.
A Biometric experiment consists of three sub-pipelines and
they are defined below:
Sub-pipeline 1:\n
---------------
Training background model.
Some biometric algorithms demands the training of background model, for instance, PCA/LDA matrix or a Neural networks.
\b
This pipeline runs: `Pipeline.fit(DATA_FOR_FIT)`
\b
Sub-pipeline 2:\n
---------------
Creation of biometric references: This is a standard step in a biometric pipelines.
Given a set of samples of one identity, create a biometric reference (a.k.a template) for sub identity.
\b
raw_data --> preprocessing >> feature extraction >> enroll(background_model) --> biometric_reference
This pipeline runs: `BioAlgorithm.enroll(Pipeline.transform(DATA_ENROLL))` >> biometric_references
Sub-pipeline 3:\n
---------------
Probing: This is another standard step in biometric pipelines.
Given one sample and one biometric reference, computes a score.
Such score has different meanings depending on the scoring method your biometric algorithm uses.
It's out of scope to explain in a help message to explain what scoring is for different biometric algorithms.
This pipeline runs: `BioAlgorithm.score(Pipeline.transform(DATA_SCORE, biometric_references))` >> biometric_references
"""
......@@ -144,7 +198,6 @@ def vanilla_biometrics_ztnorm(
if not os.path.exists(output):
os.makedirs(output, exist_ok=True)
# It's necessary to chain load 2 resources together
pipeline_config = get_resource_filename(pipeline, "bob.bio.pipeline")
database_config = get_resource_filename(database, "bob.bio.database")
......@@ -157,20 +210,18 @@ def vanilla_biometrics_ztnorm(
pipeline = vanilla_pipeline.pipeline
if write_metadata_scores:
pipeline.score_writer = CSVScoreWriter(os.path.join(output,"./tmp"))
pipeline.score_writer = CSVScoreWriter(os.path.join(output, "./tmp"))
else:
pipeline.score_writer = FourColumnsScoreWriter(os.path.join(output,"./tmp"))
pipeline.score_writer = FourColumnsScoreWriter(os.path.join(output, "./tmp"))
# Check if it's already checkpointed
if not isinstance_nested(
if checkpoint and not isinstance_nested(
pipeline.biometric_algorithm,
"biometric_algorithm",
BioAlgorithmCheckpointWrapper,
):
pipeline = checkpoint_vanilla_biometrics(pipeline, output)
# Patching the pipeline in case of ZNorm and checkpointing it
pipeline = ZTNormPipeline(pipeline)
pipeline.ztnorm_solver = ZTNormCheckpointWrapper(
......@@ -213,8 +264,14 @@ def vanilla_biometrics_ztnorm(
probes, zprobes = _merge_references_ztnorm(
biometric_references, probes, zprobes, treferences
)
raw_scores, z_normed_scores, t_normed_scores, zt_normed_scores, s_normed_scores = pipeline(
(
raw_scores,
z_normed_scores,
t_normed_scores,
zt_normed_scores,
s_normed_scores,
) = pipeline(
background_model_samples,
biometric_references,
probes,
......@@ -225,7 +282,7 @@ def vanilla_biometrics_ztnorm(
def _build_filename(score_file_name, suffix):
return os.path.join(score_file_name, suffix)
# Running RAW_SCORES
raw_scores = post_process_scores(
pipeline, raw_scores, _build_filename(score_file_name, "raw_scores")
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment