Commit 1b803d1d authored by Amir MOHAMMADI's avatar Amir MOHAMMADI

[vanilla-pad] improvements

parent c3cd137a
Pipeline #45573 failed with stage
in 10 minutes and 32 seconds
import bob.bio.base.database
from bob.bio.base.database import BioFile
class PadFile(bob.bio.base.database.BioFile):
class PadFile(BioFile):
"""A simple base class that defines basic properties of File object for the use in PAD experiments"""
def __init__(
......
from .abstract_classes import Database
from .legacy import DatabaseConnector
from .implemented import FrameContainersToFrames
......@@ -48,3 +48,10 @@ class DatabaseConnector(Database):
def predict_samples(self, group="dev"):
objects = self.database.all_files(groups=group, flat=True)
return [_padfile_to_delayed_sample(k, self.database) for k in objects]
def __repr__(self) -> str:
return f"""{self.__class__.__name__}(
database={self.database},
annotation_type={self.annotation_type},
fixed_positions={self.fixed_positions}
)"""
"""Executes PAD pipeline"""
import logging
import bob.pipelines as mario
import click
import joblib
from bob.extension.scripts.click_helper import ConfigCommand
from bob.extension.scripts.click_helper import ResourceOption
from bob.extension.scripts.click_helper import verbosity_option
logger = logging.getLogger(__name__)
EPILOG = """\b
@click.command(
entry_point_group="bob.pad.config",
cls=ConfigCommand,
epilog="""\b
Command line examples\n
-----------------------
$ bob pad vanilla-pad my_experiment.py -vv
my_experiment.py must contain the following elements:
>>> preprocessor = my_preprocessor() \n
>>> extractor = my_extractor() \n
>>> algorithm = my_algorithm() \n
>>> checkpoints = EXPLAIN CHECKPOINTING \n
\b
Look at the following example
$ bob pipelines vanilla-biometrics ./bob/pipelines/config/distributed/sge_iobig_16cores.py \
./bob/pipelines/config/database/mobio_male.py \
./bob/pipelines/config/baselines/facecrop_pca.py
\b
TODO: Work out this help
"""
@click.command(
entry_point_group="bob.pad.config",
cls=ConfigCommand,
epilog=EPILOG,
""",
)
@click.option(
"--pipeline",
......@@ -81,7 +47,7 @@ TODO: Work out this help
"groups",
type=click.Choice(["dev", "eval"]),
multiple=True,
default=("dev",),
default=("dev", "eval"),
help="If given, this value will limit the experiments belonging to a particular group",
)
@click.option(
......@@ -89,7 +55,7 @@ TODO: Work out this help
"--output",
show_default=True,
default="results",
help="Name of output directory",
help="Saves scores (and checkpoints) in this folder.",
)
@click.option(
"--checkpoint",
......@@ -99,56 +65,22 @@ TODO: Work out this help
cls=ResourceOption,
)
@verbosity_option(cls=ResourceOption)
def vanilla_pad(pipeline, database, dask_client, groups, output, checkpoint, **kwargs):
"""Runs the simplest PAD pipeline.
Such pipeline consists into three sub-pipelines.
In all of them, given raw data as input it does the following steps:
Sub-pipeline 1:\n
---------------
Training background model. Some biometric algorithms demands the training of background model, for instance, PCA/LDA matrix or a Neural networks. This sub-pipeline handles that and it consists of 3 steps:
\b
raw_data --> preprocessing >> feature extraction >> train background model --> background_model
\b
Sub-pipeline 2:\n
---------------
Creation of biometric references: This is a standard step in a biometric pipelines.
Given a set of samples of one identity, create a biometric reference (a.k.a template) for sub identity. This sub-pipeline handles that in 3 steps and they are the following:
\b
raw_data --> preprocessing >> feature extraction >> enroll(background_model) --> biometric_reference
Note that this sub-pipeline depends on the previous one
Sub-pipeline 3:\n
---------------
Probing: This is another standard step in biometric pipelines. Given one sample and one biometric reference, computes a score. Such score has different meanings depending on the scoring method your biometric algorithm uses. It's out of scope to explain in a help message to explain what scoring is for different biometric algorithms.
raw_data --> preprocessing >> feature extraction >> probe(biometric_reference, background_model) --> score
Note that this sub-pipeline depends on the two previous ones
"""
@click.pass_context
def vanilla_pad(ctx, pipeline, database, dask_client, groups, output, checkpoint, **kwargs):
"""Runs the simplest PAD pipeline."""
import gzip
import logging
import os
import sys
from glob import glob
import bob.pipelines as mario
import dask.bag
from bob.extension.scripts.click_helper import log_parameters
logger = logging.getLogger(__name__)
log_parameters(logger)
os.makedirs(output, exist_ok=True)
......@@ -159,33 +91,40 @@ def vanilla_pad(pipeline, database, dask_client, groups, output, checkpoint, **k
if dask_client is not None:
pipeline = mario.wrap(["dask"], pipeline)
else:
logger.warning("`dask_client` not set. Your pipeline will run locally")
# create an experiment info file
with open(os.path.join(output, "Experiment_info.txt"), "wt") as f:
f.write(f"{sys.argv!r}\n")
f.write(f"database={database!r}\n")
f.write("Pipeline steps:\n")
for i, name, estimator in pipeline._iter():
f.write(f"Step {i}: {name}\n{estimator!r}\n")
# train the pipeline
fit_samples = database.fit_samples() # [::50]
pipeline = pipeline.fit(fit_samples)
fit_samples = database.fit_samples()
pipeline.fit(fit_samples)
for group in groups:
logger.info(f"Running vanilla biometrics for group {group}")
predict_samples = database.predict_samples(group=group) # [::50]
predict_samples = database.predict_samples(group=group)
result = pipeline.decision_function(predict_samples)
with open(os.path.join(output, f"scores-{group}"), "w") as f:
scores_path = os.path.join(output, f"scores-{group}")
if isinstance(result, dask.bag.core.Bag):
if dask_client is None:
logger.warning(
"`dask_client` not set. Your pipeline will run locally"
)
if isinstance(result, dask.bag.core.Bag):
# write each partition into a zipped txt file
result = result.map(pad_predicted_sample_to_score_line)
prefix, postfix = f"{output}/scores/scores-{group}-", ".txt.gz"
pattern = f"{prefix}*{postfix}"
os.makedirs(os.path.dirname(prefix), exist_ok=True)
logger.info("Writing bag results into files ...")
result.to_textfiles(pattern, last_endline=True, scheduler=dask_client)
# write each partition into a zipped txt file
result = result.map(pad_predicted_sample_to_score_line)
prefix, postfix = f"{output}/scores/scores-{group}-", ".txt.gz"
pattern = f"{prefix}*{postfix}"
os.makedirs(os.path.dirname(prefix), exist_ok=True)
logger.info("Writing bag results into files ...")
result.to_textfiles(pattern, last_endline=True, scheduler=dask_client)
with open(scores_path, "w") as f:
# concatenate scores into one score file
for path in sorted(
glob(pattern),
......@@ -193,8 +132,11 @@ def vanilla_pad(pipeline, database, dask_client, groups, output, checkpoint, **k
):
with gzip.open(path, "rt") as f2:
f.write(f2.read())
# delete intermediate score files
os.remove(path)
else:
else:
with open(scores_path, "w") as f:
for sample in result:
f.write(pad_predicted_sample_to_score_line(sample, endl="\n"))
......
from . import database
from . import database_sql
from . import preprocessor
from . import extractor
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment