diff --git a/bob/bio/base/pipelines/vanilla_biometrics/pipelines.py b/bob/bio/base/pipelines/vanilla_biometrics/pipelines.py index 5bf01e09a7669dd47b76d96df35225323b352d8a..059d48a32a8b97ffdcbc2186c059a4565f7e4e41 100644 --- a/bob/bio/base/pipelines/vanilla_biometrics/pipelines.py +++ b/bob/bio/base/pipelines/vanilla_biometrics/pipelines.py @@ -13,6 +13,8 @@ import numpy from .score_writers import FourColumnsScoreWriter logger = logging.getLogger(__name__) +import tempfile +import os class VanillaBiometricsPipeline(object): @@ -71,11 +73,14 @@ class VanillaBiometricsPipeline(object): self, transformer, biometric_algorithm, - score_writer=FourColumnsScoreWriter("./scores.txt"), + score_writer=None, ): self.transformer = transformer self.biometric_algorithm = biometric_algorithm self.score_writer = score_writer + if self.score_writer is None: + tempdir = tempfile.TemporaryDirectory() + self.score_writer = FourColumnsScoreWriter(tempdir.name) def __call__( self, diff --git a/bob/bio/base/pipelines/vanilla_biometrics/wrappers.py b/bob/bio/base/pipelines/vanilla_biometrics/wrappers.py index 08df4bda677ad0667bee327924a97490226dd099..f4c472862a2a9358098b0db18cd189bf565c9187 100644 --- a/bob/bio/base/pipelines/vanilla_biometrics/wrappers.py +++ b/bob/bio/base/pipelines/vanilla_biometrics/wrappers.py @@ -240,6 +240,8 @@ def dask_vanilla_biometrics(pipeline, npartitions=None, partition_size=None): pipeline.vanilla_biometrics_pipeline = dask_vanilla_biometrics( pipeline.vanilla_biometrics_pipeline, npartitions ) + pipeline.biometric_algorithm = pipeline.vanilla_biometrics_pipeline.biometric_algorithm + pipeline.transformer = pipeline.vanilla_biometrics_pipeline.transformer pipeline.ztnorm_solver = ZTNormDaskWrapper(pipeline.ztnorm_solver) diff --git a/bob/bio/base/pipelines/vanilla_biometrics/zt_norm.py b/bob/bio/base/pipelines/vanilla_biometrics/zt_norm.py index 38b66e73b65f2758479d3a1be05c693ecf4a3760..cbfaec0935a4bf1daa4791d3f461e858889d2314 100644 --- a/bob/bio/base/pipelines/vanilla_biometrics/zt_norm.py +++ b/bob/bio/base/pipelines/vanilla_biometrics/zt_norm.py @@ -70,6 +70,9 @@ class ZTNormPipeline(object): adaptive_score_descending_sort=True, ): self.vanilla_biometrics_pipeline = vanilla_biometrics_pipeline + self.biometric_algorithm = self.vanilla_biometrics_pipeline.biometric_algorithm + self.transformer = self.vanilla_biometrics_pipeline.transformer + self.ztnorm_solver = ZTNorm( adaptive_score_fraction, adaptive_score_descending_sort ) diff --git a/bob/bio/base/script/vanilla_biometrics.py b/bob/bio/base/script/vanilla_biometrics.py index fe4338fbdfa86142aedc0fd509b1bcdbe8f48ef5..7af52380c9146629066cec9b2ed6dfeb43168bd8 100644 --- a/bob/bio/base/script/vanilla_biometrics.py +++ b/bob/bio/base/script/vanilla_biometrics.py @@ -24,63 +24,19 @@ from bob.bio.base.pipelines.vanilla_biometrics import ( checkpoint_vanilla_biometrics, dask_vanilla_biometrics, dask_get_partition_size, + FourColumnsScoreWriter, + CSVScoreWriter ) from dask.delayed import Delayed import pkg_resources from bob.extension.config import load as chain_load from bob.pipelines.utils import isinstance_nested +from bob.bio.base.utils import get_resource_filename logger = logging.getLogger(__name__) -def get_resource_filename(resource_name, group): - """ - Get the file name of a resource. - - - Parameters - ---------- - resource_name: str - Name of the resource to be searched - - group: str - Entry point group - - Return - ------ - filename: str - The entrypoint file name - - """ - - # Check if it's already a path - if os.path.exists(resource_name): - return resource_name - - # If it's a resource get the path of this resource - resources = [r for r in pkg_resources.iter_entry_points(group)] - - # if resource_name not in [r.name for r in resources]: - # raise ValueError(f"Resource not found: `{resource_name}`") - - for r in resources: - if r.name == resource_name: - resource = r - break - else: - raise ValueError(f"Resource not found: `{resource_name}`") - - # TODO: This get the root path only - # I don't know how to get the filename - return ( - pkg_resources.resource_filename( - resource.module_name, resource.module_name.split(".")[-1] - ) - + ".py" - ) - - EPILOG = """\b @@ -165,8 +121,13 @@ def post_process_scores(pipeline, scores, path): default="results", help="Name of output directory", ) +@click.option( + "--write-metadata-scores", "-m", + is_flag=True, + help="If set, all the scores will be written with all its metadata", +) @verbosity_option(cls=ResourceOption) -def vanilla_biometrics(pipeline, database, dask_client, groups, output, **kwargs): +def vanilla_biometrics(pipeline, database, dask_client, groups, output, write_metadata_scores, **kwargs): """Runs the simplest biometrics pipeline. Such pipeline consists into three sub-pipelines. @@ -223,6 +184,10 @@ def vanilla_biometrics(pipeline, database, dask_client, groups, output, **kwargs # Picking the resources database = vanilla_pipeline.database pipeline = vanilla_pipeline.pipeline + if write_metadata_scores: + pipeline.score_writer = CSVScoreWriter(os.path.join(output,"./tmp")) + else: + pipeline.score_writer = FourColumnsScoreWriter(os.path.join(output,"./tmp")) # Check if it's already checkpointed if not isinstance_nested( @@ -241,8 +206,7 @@ def vanilla_biometrics(pipeline, database, dask_client, groups, output, **kwargs if dask_client is not None and not isinstance_nested( pipeline.biometric_algorithm, "biometric_algorithm", BioAlgorithmDaskWrapper - ): - + ): n_objects = ( len(background_model_samples) + len(biometric_references) + len(probes) ) diff --git a/bob/bio/base/script/vanilla_biometrics_ztnorm.py b/bob/bio/base/script/vanilla_biometrics_ztnorm.py new file mode 100644 index 0000000000000000000000000000000000000000..0ef67e3f482b573403e985413bec023151170008 --- /dev/null +++ b/bob/bio/base/script/vanilla_biometrics_ztnorm.py @@ -0,0 +1,269 @@ +#!/usr/bin/env python +# vim: set fileencoding=utf-8 : +# Tiago de Freitas Pereira <tiago.pereira@idiap.ch> + + +"""Executes biometric pipeline""" + +import click + +from bob.extension.scripts.click_helper import ( + verbosity_option, + ResourceOption, + ConfigCommand, +) + +import logging +import os +import itertools +import dask.bag +from bob.bio.base.pipelines.vanilla_biometrics import ( + VanillaBiometricsPipeline, + BioAlgorithmCheckpointWrapper, + BioAlgorithmDaskWrapper, + ZTNormPipeline, + ZTNormDaskWrapper, + ZTNormCheckpointWrapper, + checkpoint_vanilla_biometrics, + dask_vanilla_biometrics, + dask_get_partition_size, + FourColumnsScoreWriter, + CSVScoreWriter +) +from dask.delayed import Delayed +from bob.bio.base.utils import get_resource_filename +from bob.extension.config import load as chain_load +from bob.pipelines.utils import isinstance_nested +from .vanilla_biometrics import compute_scores, post_process_scores +import copy + +logger = logging.getLogger(__name__) + + +EPILOG = """\b + + + Command line examples\n + ----------------------- + + + $ bob pipelines vanilla-biometrics my_experiment.py -vv + + + my_experiment.py must contain the following elements: + + >>> preprocessor = my_preprocessor() \n + >>> extractor = my_extractor() \n + >>> algorithm = my_algorithm() \n + >>> checkpoints = EXPLAIN CHECKPOINTING \n + +\b + + +Look at the following example + + $ bob pipelines vanilla-biometrics ./bob/pipelines/config/distributed/sge_iobig_16cores.py \ + ./bob/pipelines/config/database/mobio_male.py \ + ./bob/pipelines/config/baselines/facecrop_pca.py + +\b + + + +TODO: Work out this help + +""" + + +@click.command( + entry_point_group="bob.pipelines.config", cls=ConfigCommand, epilog=EPILOG, +) +@click.option( + "--pipeline", "-p", required=True, help="Vanilla biometrics pipeline", +) +@click.option( + "--database", + "-d", + required=True, + help="Biometric Database connector (class that implements the methods: `background_model_samples`, `references` and `probes`)", +) +@click.option( + "--dask-client", + "-l", + required=False, + help="Dask client for the execution of the pipeline.", +) +@click.option( + "--group", + "-g", + "groups", + type=click.Choice(["dev", "eval"]), + multiple=True, + default=("dev",), + help="If given, this value will limit the experiments belonging to a particular protocolar group", +) +@click.option( + "-o", + "--output", + show_default=True, + default="results", + help="Name of output directory", +) +@click.option( + "--consider-genuines", + is_flag=True, + help="If set, will consider genuine scores in the ZT score normalization", +) +@click.option( + "--write-metadata-scores", "-m", + is_flag=True, + help="If set, all the scores will be written with all its metadata", +) +@click.option("--ztnorm-cohort-proportion", default=1., type=float, + help="Sets the percentage of samples used for t-norm and z-norm. Sometimes you don't want to use all the t/z samples for normalization") +@verbosity_option(cls=ResourceOption) +def vanilla_biometrics_ztnorm( + pipeline, database, dask_client, groups, output, consider_genuines, write_metadata_scores, ztnorm_cohort_proportion, **kwargs +): + """Runs the simplest biometrics pipeline under ZTNorm. + + """ + + def _merge_references_ztnorm(biometric_references, probes, zprobes, treferences): + treferences_sub = [t.subject for t in treferences] + biometric_references_sub = [t.subject for t in biometric_references] + + for i in range(len(zprobes)): + probes[i].references += treferences_sub + + for i in range(len(zprobes)): + zprobes[i].references = biometric_references_sub + treferences_sub + + return probes, zprobes + + if not os.path.exists(output): + os.makedirs(output, exist_ok=True) + + + # It's necessary to chain load 2 resources together + pipeline_config = get_resource_filename(pipeline, "bob.bio.pipeline") + database_config = get_resource_filename(database, "bob.bio.database") + vanilla_pipeline = chain_load([database_config, pipeline_config]) + if dask_client is not None: + dask_client = chain_load([dask_client]).dask_client + + # Picking the resources + database = vanilla_pipeline.database + pipeline = vanilla_pipeline.pipeline + + if write_metadata_scores: + pipeline.score_writer = CSVScoreWriter(os.path.join(output,"./tmp")) + else: + pipeline.score_writer = FourColumnsScoreWriter(os.path.join(output,"./tmp")) + + + # Check if it's already checkpointed + if not isinstance_nested( + pipeline.biometric_algorithm, + "biometric_algorithm", + BioAlgorithmCheckpointWrapper, + ): + pipeline = checkpoint_vanilla_biometrics(pipeline, output) + + + # Patching the pipeline in case of ZNorm and checkpointing it + pipeline = ZTNormPipeline(pipeline) + pipeline.ztnorm_solver = ZTNormCheckpointWrapper( + pipeline.ztnorm_solver, os.path.join(output, "normed-scores") + ) + + background_model_samples = database.background_model_samples() + zprobes = database.zprobes(proportion=ztnorm_cohort_proportion) + treferences = database.treferences(proportion=ztnorm_cohort_proportion) + for group in groups: + + score_file_name = os.path.join(output, f"scores-{group}") + + biometric_references = database.references(group=group) + probes = database.probes(group=group) + + if dask_client is not None and not isinstance_nested( + pipeline.biometric_algorithm, "biometric_algorithm", BioAlgorithmDaskWrapper + ): + n_objects = ( + len(background_model_samples) + len(biometric_references) + len(probes) + ) + pipeline = dask_vanilla_biometrics( + pipeline, + partition_size=dask_get_partition_size(dask_client.cluster, n_objects), + ) + + logger.info(f"Running vanilla biometrics for group {group}") + allow_scoring_with_all_biometric_references = ( + database.allow_scoring_with_all_biometric_references + if hasattr(database, "allow_scoring_with_all_biometric_references") + else False + ) + + if consider_genuines: + z_probes_cpy = copy.deepcopy(zprobes) + zprobes += copy.deepcopy(treferences) + treferences += z_probes_cpy + + probes, zprobes = _merge_references_ztnorm( + biometric_references, probes, zprobes, treferences + ) + + raw_scores, z_normed_scores, t_normed_scores, zt_normed_scores, s_normed_scores = pipeline( + background_model_samples, + biometric_references, + probes, + zprobes, + treferences, + allow_scoring_with_all_biometric_references=allow_scoring_with_all_biometric_references, + ) + + def _build_filename(score_file_name, suffix): + return os.path.join(score_file_name, suffix) + + # Running RAW_SCORES + raw_scores = post_process_scores( + pipeline, raw_scores, _build_filename(score_file_name, "raw_scores") + ) + + _ = compute_scores(raw_scores, dask_client) + + # Z-SCORES + z_normed_scores = post_process_scores( + pipeline, + z_normed_scores, + _build_filename(score_file_name, "z_normed_scores"), + ) + _ = compute_scores(z_normed_scores, dask_client) + + # T-SCORES + t_normed_scores = post_process_scores( + pipeline, + t_normed_scores, + _build_filename(score_file_name, "t_normed_scores"), + ) + _ = compute_scores(t_normed_scores, dask_client) + + # S-SCORES + s_normed_scores = post_process_scores( + pipeline, + s_normed_scores, + _build_filename(score_file_name, "s_normed_scores"), + ) + _ = compute_scores(s_normed_scores, dask_client) + + # ZT-SCORES + zt_normed_scores = post_process_scores( + pipeline, + zt_normed_scores, + _build_filename(score_file_name, "zt_normed_scores"), + ) + _ = compute_scores(zt_normed_scores, dask_client) + + if dask_client is not None: + dask_client.shutdown() diff --git a/bob/bio/base/utils/resources.py b/bob/bio/base/utils/resources.py index 7ac2d541b970c925015095636e6a4a200cd9f516..7790290c6d5b381e9fa12741a3969afad3ca8082 100644 --- a/bob/bio/base/utils/resources.py +++ b/bob/bio/base/utils/resources.py @@ -252,3 +252,50 @@ def database_directories(strip=['dummy'], replacements = None, package_prefix='b pass return dirs + + +def get_resource_filename(resource_name, group): + """ + Get the file name of a resource. + + + Parameters + ---------- + resource_name: str + Name of the resource to be searched + + group: str + Entry point group + + Return + ------ + filename: str + The entrypoint file name + + """ + + # Check if it's already a path + if os.path.exists(resource_name): + return resource_name + + # If it's a resource get the path of this resource + resources = [r for r in pkg_resources.iter_entry_points(group)] + + # if resource_name not in [r.name for r in resources]: + # raise ValueError(f"Resource not found: `{resource_name}`") + + for r in resources: + if r.name == resource_name: + resource = r + break + else: + raise ValueError(f"Resource not found: `{resource_name}`") + + # TODO: This get the root path only + # I don't know how to get the filename + return ( + pkg_resources.resource_filename( + resource.module_name, resource.module_name.split(".")[-1] + ) + + ".py" + ) \ No newline at end of file diff --git a/setup.py b/setup.py index 6c9c9445162c73218a95b91890ef4c0914c69b5f..571022170e4ca6e5fdb0a21eb074ddbf97dfa710 100644 --- a/setup.py +++ b/setup.py @@ -143,6 +143,7 @@ setup( # run pipelines 'bob.bio.pipelines.cli':[ 'vanilla-biometrics = bob.bio.base.script.vanilla_biometrics:vanilla_biometrics', + 'vanilla-biometrics-ztnorm = bob.bio.base.script.vanilla_biometrics_ztnorm:vanilla_biometrics_ztnorm' ],